xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 4dad4914f7d5232639e64ffbc816078184f1462c)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // First slot in the general register save area.
64   return STI.isPPC64() ? -8U : -4U;
65 }
66 
67 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
68   if (STI.isAIXABI() || STI.isPPC64())
69     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
70 
71   // 32-bit SVR4 ABI:
72   return 8;
73 }
74 
75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
76   // SVR4 ABI: First slot in the general register save area.
77   return STI.isPPC64()
78              ? -16U
79              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
80 }
81 
82 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
83   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
84 }
85 
86 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
87     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
88                           STI.getPlatformStackAlignment(), 0),
89       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
90       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
91       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
92       LinkageSize(computeLinkageSize(Subtarget)),
93       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
94       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
95 
96 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
97 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
98     unsigned &NumEntries) const {
99   // Early exit if not using the SVR4 ABI.
100   if (!Subtarget.isSVR4ABI()) {
101     NumEntries = 0;
102     return nullptr;
103   }
104 
105 // Floating-point register save area offsets.
106 #define CALLEE_SAVED_FPRS \
107       {PPC::F31, -8},     \
108       {PPC::F30, -16},    \
109       {PPC::F29, -24},    \
110       {PPC::F28, -32},    \
111       {PPC::F27, -40},    \
112       {PPC::F26, -48},    \
113       {PPC::F25, -56},    \
114       {PPC::F24, -64},    \
115       {PPC::F23, -72},    \
116       {PPC::F22, -80},    \
117       {PPC::F21, -88},    \
118       {PPC::F20, -96},    \
119       {PPC::F19, -104},   \
120       {PPC::F18, -112},   \
121       {PPC::F17, -120},   \
122       {PPC::F16, -128},   \
123       {PPC::F15, -136},   \
124       {PPC::F14, -144}
125 
126 // 32-bit general purpose register save area offsets.
127 #define CALLEE_SAVED_GPRS32 \
128       {PPC::R31, -4},       \
129       {PPC::R30, -8},       \
130       {PPC::R29, -12},      \
131       {PPC::R28, -16},      \
132       {PPC::R27, -20},      \
133       {PPC::R26, -24},      \
134       {PPC::R25, -28},      \
135       {PPC::R24, -32},      \
136       {PPC::R23, -36},      \
137       {PPC::R22, -40},      \
138       {PPC::R21, -44},      \
139       {PPC::R20, -48},      \
140       {PPC::R19, -52},      \
141       {PPC::R18, -56},      \
142       {PPC::R17, -60},      \
143       {PPC::R16, -64},      \
144       {PPC::R15, -68},      \
145       {PPC::R14, -72}
146 
147 // 64-bit general purpose register save area offsets.
148 #define CALLEE_SAVED_GPRS64 \
149       {PPC::X31, -8},       \
150       {PPC::X30, -16},      \
151       {PPC::X29, -24},      \
152       {PPC::X28, -32},      \
153       {PPC::X27, -40},      \
154       {PPC::X26, -48},      \
155       {PPC::X25, -56},      \
156       {PPC::X24, -64},      \
157       {PPC::X23, -72},      \
158       {PPC::X22, -80},      \
159       {PPC::X21, -88},      \
160       {PPC::X20, -96},      \
161       {PPC::X19, -104},     \
162       {PPC::X18, -112},     \
163       {PPC::X17, -120},     \
164       {PPC::X16, -128},     \
165       {PPC::X15, -136},     \
166       {PPC::X14, -144}
167 
168 // Vector register save area offsets.
169 #define CALLEE_SAVED_VRS \
170       {PPC::V31, -16},   \
171       {PPC::V30, -32},   \
172       {PPC::V29, -48},   \
173       {PPC::V28, -64},   \
174       {PPC::V27, -80},   \
175       {PPC::V26, -96},   \
176       {PPC::V25, -112},  \
177       {PPC::V24, -128},  \
178       {PPC::V23, -144},  \
179       {PPC::V22, -160},  \
180       {PPC::V21, -176},  \
181       {PPC::V20, -192}
182 
183   // Note that the offsets here overlap, but this is fixed up in
184   // processFunctionBeforeFrameFinalized.
185 
186   static const SpillSlot Offsets[] = {
187       CALLEE_SAVED_FPRS,
188       CALLEE_SAVED_GPRS32,
189 
190       // CR save area offset.  We map each of the nonvolatile CR fields
191       // to the slot for CR2, which is the first of the nonvolatile CR
192       // fields to be assigned, so that we only allocate one save slot.
193       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
194       {PPC::CR2, -4},
195 
196       // VRSAVE save area offset.
197       {PPC::VRSAVE, -4},
198 
199       CALLEE_SAVED_VRS,
200 
201       // SPE register save area (overlaps Vector save area).
202       {PPC::S31, -8},
203       {PPC::S30, -16},
204       {PPC::S29, -24},
205       {PPC::S28, -32},
206       {PPC::S27, -40},
207       {PPC::S26, -48},
208       {PPC::S25, -56},
209       {PPC::S24, -64},
210       {PPC::S23, -72},
211       {PPC::S22, -80},
212       {PPC::S21, -88},
213       {PPC::S20, -96},
214       {PPC::S19, -104},
215       {PPC::S18, -112},
216       {PPC::S17, -120},
217       {PPC::S16, -128},
218       {PPC::S15, -136},
219       {PPC::S14, -144}};
220 
221   static const SpillSlot Offsets64[] = {
222       CALLEE_SAVED_FPRS,
223       CALLEE_SAVED_GPRS64,
224 
225       // VRSAVE save area offset.
226       {PPC::VRSAVE, -4},
227 
228       CALLEE_SAVED_VRS
229   };
230 
231   if (Subtarget.isPPC64()) {
232     NumEntries = array_lengthof(Offsets64);
233 
234     return Offsets64;
235   } else {
236     NumEntries = array_lengthof(Offsets);
237 
238     return Offsets;
239   }
240 }
241 
242 /// RemoveVRSaveCode - We have found that this function does not need any code
243 /// to manipulate the VRSAVE register, even though it uses vector registers.
244 /// This can happen when the only registers used are known to be live in or out
245 /// of the function.  Remove all of the VRSAVE related code from the function.
246 /// FIXME: The removal of the code results in a compile failure at -O0 when the
247 /// function contains a function call, as the GPR containing original VRSAVE
248 /// contents is spilled and reloaded around the call.  Without the prolog code,
249 /// the spill instruction refers to an undefined register.  This code needs
250 /// to account for all uses of that GPR.
251 static void RemoveVRSaveCode(MachineInstr &MI) {
252   MachineBasicBlock *Entry = MI.getParent();
253   MachineFunction *MF = Entry->getParent();
254 
255   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
256   MachineBasicBlock::iterator MBBI = MI;
257   ++MBBI;
258   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
259   MBBI->eraseFromParent();
260 
261   bool RemovedAllMTVRSAVEs = true;
262   // See if we can find and remove the MTVRSAVE instruction from all of the
263   // epilog blocks.
264   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
265     // If last instruction is a return instruction, add an epilogue
266     if (I->isReturnBlock()) {
267       bool FoundIt = false;
268       for (MBBI = I->end(); MBBI != I->begin(); ) {
269         --MBBI;
270         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
271           MBBI->eraseFromParent();  // remove it.
272           FoundIt = true;
273           break;
274         }
275       }
276       RemovedAllMTVRSAVEs &= FoundIt;
277     }
278   }
279 
280   // If we found and removed all MTVRSAVE instructions, remove the read of
281   // VRSAVE as well.
282   if (RemovedAllMTVRSAVEs) {
283     MBBI = MI;
284     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
285     --MBBI;
286     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
287     MBBI->eraseFromParent();
288   }
289 
290   // Finally, nuke the UPDATE_VRSAVE.
291   MI.eraseFromParent();
292 }
293 
294 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
295 // instruction selector.  Based on the vector registers that have been used,
296 // transform this into the appropriate ORI instruction.
297 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
298   MachineFunction *MF = MI.getParent()->getParent();
299   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
300   DebugLoc dl = MI.getDebugLoc();
301 
302   const MachineRegisterInfo &MRI = MF->getRegInfo();
303   unsigned UsedRegMask = 0;
304   for (unsigned i = 0; i != 32; ++i)
305     if (MRI.isPhysRegModified(VRRegNo[i]))
306       UsedRegMask |= 1 << (31-i);
307 
308   // Live in and live out values already must be in the mask, so don't bother
309   // marking them.
310   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
311     unsigned RegNo = TRI->getEncodingValue(LI.first);
312     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
313       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
314   }
315 
316   // Live out registers appear as use operands on return instructions.
317   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
318        UsedRegMask != 0 && BI != BE; ++BI) {
319     const MachineBasicBlock &MBB = *BI;
320     if (!MBB.isReturnBlock())
321       continue;
322     const MachineInstr &Ret = MBB.back();
323     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
324       const MachineOperand &MO = Ret.getOperand(I);
325       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
326         continue;
327       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
328       UsedRegMask &= ~(1 << (31-RegNo));
329     }
330   }
331 
332   // If no registers are used, turn this into a copy.
333   if (UsedRegMask == 0) {
334     // Remove all VRSAVE code.
335     RemoveVRSaveCode(MI);
336     return;
337   }
338 
339   Register SrcReg = MI.getOperand(1).getReg();
340   Register DstReg = MI.getOperand(0).getReg();
341 
342   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
343     if (DstReg != SrcReg)
344       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
345           .addReg(SrcReg)
346           .addImm(UsedRegMask);
347     else
348       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
349           .addReg(SrcReg, RegState::Kill)
350           .addImm(UsedRegMask);
351   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
352     if (DstReg != SrcReg)
353       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
354           .addReg(SrcReg)
355           .addImm(UsedRegMask >> 16);
356     else
357       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
358           .addReg(SrcReg, RegState::Kill)
359           .addImm(UsedRegMask >> 16);
360   } else {
361     if (DstReg != SrcReg)
362       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
363           .addReg(SrcReg)
364           .addImm(UsedRegMask >> 16);
365     else
366       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
367           .addReg(SrcReg, RegState::Kill)
368           .addImm(UsedRegMask >> 16);
369 
370     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
371         .addReg(DstReg, RegState::Kill)
372         .addImm(UsedRegMask & 0xFFFF);
373   }
374 
375   // Remove the old UPDATE_VRSAVE instruction.
376   MI.eraseFromParent();
377 }
378 
379 static bool spillsCR(const MachineFunction &MF) {
380   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
381   return FuncInfo->isCRSpilled();
382 }
383 
384 static bool spillsVRSAVE(const MachineFunction &MF) {
385   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
386   return FuncInfo->isVRSAVESpilled();
387 }
388 
389 static bool hasSpills(const MachineFunction &MF) {
390   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
391   return FuncInfo->hasSpills();
392 }
393 
394 static bool hasNonRISpills(const MachineFunction &MF) {
395   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
396   return FuncInfo->hasNonRISpills();
397 }
398 
399 /// MustSaveLR - Return true if this function requires that we save the LR
400 /// register onto the stack in the prolog and restore it in the epilog of the
401 /// function.
402 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
403   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
404 
405   // We need a save/restore of LR if there is any def of LR (which is
406   // defined by calls, including the PIC setup sequence), or if there is
407   // some use of the LR stack slot (e.g. for builtin_return_address).
408   // (LR comes in 32 and 64 bit versions.)
409   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
410   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
411 }
412 
413 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
414 /// call frame size. Update the MachineFunction object with the stack size.
415 unsigned
416 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
417                                                 bool UseEstimate) const {
418   unsigned NewMaxCallFrameSize = 0;
419   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
420                                             &NewMaxCallFrameSize);
421   MF.getFrameInfo().setStackSize(FrameSize);
422   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
423   return FrameSize;
424 }
425 
426 /// determineFrameLayout - Determine the size of the frame and maximum call
427 /// frame size.
428 unsigned
429 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
430                                        bool UseEstimate,
431                                        unsigned *NewMaxCallFrameSize) const {
432   const MachineFrameInfo &MFI = MF.getFrameInfo();
433   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
434 
435   // Get the number of bytes to allocate from the FrameInfo
436   unsigned FrameSize =
437     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
438 
439   // Get stack alignments. The frame must be aligned to the greatest of these:
440   Align TargetAlign = getStackAlign(); // alignment required per the ABI
441   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
442   Align Alignment = std::max(TargetAlign, MaxAlign);
443 
444   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
445 
446   unsigned LR = RegInfo->getRARegister();
447   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
448   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
449                        !MFI.adjustsStack() &&       // No calls.
450                        !MustSaveLR(MF, LR) &&       // No need to save LR.
451                        !FI->mustSaveTOC() &&        // No need to save TOC.
452                        !RegInfo->hasBasePointer(MF); // No special alignment.
453 
454   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
455   // code if all local vars are reg-allocated.
456   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
457 
458   // Check whether we can skip adjusting the stack pointer (by using red zone)
459   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
460     // No need for frame
461     return 0;
462   }
463 
464   // Get the maximum call frame size of all the calls.
465   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
466 
467   // Maximum call frame needs to be at least big enough for linkage area.
468   unsigned minCallFrameSize = getLinkageSize();
469   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
470 
471   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
472   // that allocations will be aligned.
473   if (MFI.hasVarSizedObjects())
474     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
475 
476   // Update the new max call frame size if the caller passes in a valid pointer.
477   if (NewMaxCallFrameSize)
478     *NewMaxCallFrameSize = maxCallFrameSize;
479 
480   // Include call frame size in total.
481   FrameSize += maxCallFrameSize;
482 
483   // Make sure the frame is aligned.
484   FrameSize = alignTo(FrameSize, Alignment);
485 
486   return FrameSize;
487 }
488 
489 // hasFP - Return true if the specified function actually has a dedicated frame
490 // pointer register.
491 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
492   const MachineFrameInfo &MFI = MF.getFrameInfo();
493   // FIXME: This is pretty much broken by design: hasFP() might be called really
494   // early, before the stack layout was calculated and thus hasFP() might return
495   // true or false here depending on the time of call.
496   return (MFI.getStackSize()) && needsFP(MF);
497 }
498 
499 // needsFP - Return true if the specified function should have a dedicated frame
500 // pointer register.  This is true if the function has variable sized allocas or
501 // if frame pointer elimination is disabled.
502 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
503   const MachineFrameInfo &MFI = MF.getFrameInfo();
504 
505   // Naked functions have no stack frame pushed, so we don't have a frame
506   // pointer.
507   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
508     return false;
509 
510   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
511     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
512     (MF.getTarget().Options.GuaranteedTailCallOpt &&
513      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
514 }
515 
516 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
517   bool is31 = needsFP(MF);
518   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
519   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
520 
521   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
522   bool HasBP = RegInfo->hasBasePointer(MF);
523   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
524   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
525 
526   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
527        BI != BE; ++BI)
528     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
529       --MBBI;
530       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
531         MachineOperand &MO = MBBI->getOperand(I);
532         if (!MO.isReg())
533           continue;
534 
535         switch (MO.getReg()) {
536         case PPC::FP:
537           MO.setReg(FPReg);
538           break;
539         case PPC::FP8:
540           MO.setReg(FP8Reg);
541           break;
542         case PPC::BP:
543           MO.setReg(BPReg);
544           break;
545         case PPC::BP8:
546           MO.setReg(BP8Reg);
547           break;
548 
549         }
550       }
551     }
552 }
553 
554 /*  This function will do the following:
555     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
556       respectively (defaults recommended by the ABI) and return true
557     - If MBB is not an entry block, initialize the register scavenger and look
558       for available registers.
559     - If the defaults (R0/R12) are available, return true
560     - If TwoUniqueRegsRequired is set to true, it looks for two unique
561       registers. Otherwise, look for a single available register.
562       - If the required registers are found, set SR1 and SR2 and return true.
563       - If the required registers are not found, set SR2 or both SR1 and SR2 to
564         PPC::NoRegister and return false.
565 
566     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
567     is not set, this function will attempt to find two different registers, but
568     still return true if only one register is available (and set SR1 == SR2).
569 */
570 bool
571 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
572                                       bool UseAtEnd,
573                                       bool TwoUniqueRegsRequired,
574                                       Register *SR1,
575                                       Register *SR2) const {
576   RegScavenger RS;
577   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
578   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
579 
580   // Set the defaults for the two scratch registers.
581   if (SR1)
582     *SR1 = R0;
583 
584   if (SR2) {
585     assert (SR1 && "Asking for the second scratch register but not the first?");
586     *SR2 = R12;
587   }
588 
589   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
590   if ((UseAtEnd && MBB->isReturnBlock()) ||
591       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
592     return true;
593 
594   RS.enterBasicBlock(*MBB);
595 
596   if (UseAtEnd && !MBB->empty()) {
597     // The scratch register will be used at the end of the block, so must
598     // consider all registers used within the block
599 
600     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
601     // If no terminator, back iterator up to previous instruction.
602     if (MBBI == MBB->end())
603       MBBI = std::prev(MBBI);
604 
605     if (MBBI != MBB->begin())
606       RS.forward(MBBI);
607   }
608 
609   // If the two registers are available, we're all good.
610   // Note that we only return here if both R0 and R12 are available because
611   // although the function may not require two unique registers, it may benefit
612   // from having two so we should try to provide them.
613   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
614     return true;
615 
616   // Get the list of callee-saved registers for the target.
617   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
618   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
619 
620   // Get all the available registers in the block.
621   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
622                                      &PPC::GPRCRegClass);
623 
624   // We shouldn't use callee-saved registers as scratch registers as they may be
625   // available when looking for a candidate block for shrink wrapping but not
626   // available when the actual prologue/epilogue is being emitted because they
627   // were added as live-in to the prologue block by PrologueEpilogueInserter.
628   for (int i = 0; CSRegs[i]; ++i)
629     BV.reset(CSRegs[i]);
630 
631   // Set the first scratch register to the first available one.
632   if (SR1) {
633     int FirstScratchReg = BV.find_first();
634     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
635   }
636 
637   // If there is another one available, set the second scratch register to that.
638   // Otherwise, set it to either PPC::NoRegister if this function requires two
639   // or to whatever SR1 is set to if this function doesn't require two.
640   if (SR2) {
641     int SecondScratchReg = BV.find_next(*SR1);
642     if (SecondScratchReg != -1)
643       *SR2 = SecondScratchReg;
644     else
645       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
646   }
647 
648   // Now that we've done our best to provide both registers, double check
649   // whether we were unable to provide enough.
650   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
651     return false;
652 
653   return true;
654 }
655 
656 // We need a scratch register for spilling LR and for spilling CR. By default,
657 // we use two scratch registers to hide latency. However, if only one scratch
658 // register is available, we can adjust for that by not overlapping the spill
659 // code. However, if we need to realign the stack (i.e. have a base pointer)
660 // and the stack frame is large, we need two scratch registers.
661 bool
662 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
663   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
664   MachineFunction &MF = *(MBB->getParent());
665   bool HasBP = RegInfo->hasBasePointer(MF);
666   unsigned FrameSize = determineFrameLayout(MF);
667   int NegFrameSize = -FrameSize;
668   bool IsLargeFrame = !isInt<16>(NegFrameSize);
669   MachineFrameInfo &MFI = MF.getFrameInfo();
670   Align MaxAlign = MFI.getMaxAlign();
671   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
672 
673   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
674 }
675 
676 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
677   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
678 
679   return findScratchRegister(TmpMBB, false,
680                              twoUniqueScratchRegsRequired(TmpMBB));
681 }
682 
683 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
684   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
685 
686   return findScratchRegister(TmpMBB, true);
687 }
688 
689 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
690   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
691   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
692 
693   // Abort if there is no register info or function info.
694   if (!RegInfo || !FI)
695     return false;
696 
697   // Only move the stack update on ELFv2 ABI and PPC64.
698   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
699     return false;
700 
701   // Check the frame size first and return false if it does not fit the
702   // requirements.
703   // We need a non-zero frame size as well as a frame that will fit in the red
704   // zone. This is because by moving the stack pointer update we are now storing
705   // to the red zone until the stack pointer is updated. If we get an interrupt
706   // inside the prologue but before the stack update we now have a number of
707   // stores to the red zone and those stores must all fit.
708   MachineFrameInfo &MFI = MF.getFrameInfo();
709   unsigned FrameSize = MFI.getStackSize();
710   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
711     return false;
712 
713   // Frame pointers and base pointers complicate matters so don't do anything
714   // if we have them. For example having a frame pointer will sometimes require
715   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
716   // difficult.
717   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
718     return false;
719 
720   // Calls to fast_cc functions use different rules for passing parameters on
721   // the stack from the ABI and using PIC base in the function imposes
722   // similar restrictions to using the base pointer. It is not generally safe
723   // to move the stack pointer update in these situations.
724   if (FI->hasFastCall() || FI->usesPICBase())
725     return false;
726 
727   // Finally we can move the stack update if we do not require register
728   // scavenging. Register scavenging can introduce more spills and so
729   // may make the frame size larger than we have computed.
730   return !RegInfo->requiresFrameIndexScavenging(MF);
731 }
732 
733 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
734                                     MachineBasicBlock &MBB) const {
735   MachineBasicBlock::iterator MBBI = MBB.begin();
736   MachineFrameInfo &MFI = MF.getFrameInfo();
737   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
738   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
739 
740   MachineModuleInfo &MMI = MF.getMMI();
741   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
742   DebugLoc dl;
743   // AIX assembler does not support cfi directives.
744   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
745 
746   // Get processor type.
747   bool isPPC64 = Subtarget.isPPC64();
748   // Get the ABI.
749   bool isSVR4ABI = Subtarget.isSVR4ABI();
750   bool isAIXABI = Subtarget.isAIXABI();
751   bool isELFv2ABI = Subtarget.isELFv2ABI();
752   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
753 
754   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
755   // process it.
756   if (!isSVR4ABI)
757     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
758       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
759         if (isAIXABI)
760           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
761         HandleVRSaveUpdate(*MBBI, TII);
762         break;
763       }
764     }
765 
766   // Move MBBI back to the beginning of the prologue block.
767   MBBI = MBB.begin();
768 
769   // Work out frame sizes.
770   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
771   int NegFrameSize = -FrameSize;
772   if (!isInt<32>(NegFrameSize))
773     llvm_unreachable("Unhandled stack size!");
774 
775   if (MFI.isFrameAddressTaken())
776     replaceFPWithRealFP(MF);
777 
778   // Check if the link register (LR) must be saved.
779   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
780   bool MustSaveLR = FI->mustSaveLR();
781   bool MustSaveTOC = FI->mustSaveTOC();
782   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
783   bool MustSaveCR = !MustSaveCRs.empty();
784   // Do we have a frame pointer and/or base pointer for this function?
785   bool HasFP = hasFP(MF);
786   bool HasBP = RegInfo->hasBasePointer(MF);
787   bool HasRedZone = isPPC64 || !isSVR4ABI;
788 
789   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
790   Register BPReg = RegInfo->getBaseRegister(MF);
791   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
792   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
793   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
794   Register ScratchReg;
795   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
796   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
797   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
798                                                 : PPC::MFLR );
799   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
800                                                  : PPC::STW );
801   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
802                                                      : PPC::STWU );
803   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
804                                                         : PPC::STWUX);
805   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
806                                                           : PPC::LIS );
807   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
808                                                  : PPC::ORI );
809   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
810                                               : PPC::OR );
811   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
812                                                             : PPC::SUBFC);
813   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
814                                                                : PPC::SUBFIC);
815   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
816                                                            : PPC::MFCR);
817   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
818 
819   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
820   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
821   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
822   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
823   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
824          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
825 
826   // Using the same bool variable as below to suppress compiler warnings.
827   bool SingleScratchReg =
828     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
829                         &ScratchReg, &TempReg);
830   assert(SingleScratchReg &&
831          "Required number of registers not available in this block");
832 
833   SingleScratchReg = ScratchReg == TempReg;
834 
835   int LROffset = getReturnSaveOffset();
836 
837   int FPOffset = 0;
838   if (HasFP) {
839     if (isSVR4ABI) {
840       MachineFrameInfo &MFI = MF.getFrameInfo();
841       int FPIndex = FI->getFramePointerSaveIndex();
842       assert(FPIndex && "No Frame Pointer Save Slot!");
843       FPOffset = MFI.getObjectOffset(FPIndex);
844     } else {
845       FPOffset = getFramePointerSaveOffset();
846     }
847   }
848 
849   int BPOffset = 0;
850   if (HasBP) {
851     if (isSVR4ABI) {
852       MachineFrameInfo &MFI = MF.getFrameInfo();
853       int BPIndex = FI->getBasePointerSaveIndex();
854       assert(BPIndex && "No Base Pointer Save Slot!");
855       BPOffset = MFI.getObjectOffset(BPIndex);
856     } else {
857       BPOffset = getBasePointerSaveOffset();
858     }
859   }
860 
861   int PBPOffset = 0;
862   if (FI->usesPICBase()) {
863     MachineFrameInfo &MFI = MF.getFrameInfo();
864     int PBPIndex = FI->getPICBasePointerSaveIndex();
865     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
866     PBPOffset = MFI.getObjectOffset(PBPIndex);
867   }
868 
869   // Get stack alignments.
870   Align MaxAlign = MFI.getMaxAlign();
871   if (HasBP && MaxAlign > 1)
872     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
873 
874   // Frames of 32KB & larger require special handling because they cannot be
875   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
876   bool isLargeFrame = !isInt<16>(NegFrameSize);
877 
878   // Check if we can move the stack update instruction (stdu) down the prologue
879   // past the callee saves. Hopefully this will avoid the situation where the
880   // saves are waiting for the update on the store with update to complete.
881   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
882   bool MovingStackUpdateDown = false;
883 
884   // Check if we can move the stack update.
885   if (stackUpdateCanBeMoved(MF)) {
886     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
887     for (CalleeSavedInfo CSI : Info) {
888       int FrIdx = CSI.getFrameIdx();
889       // If the frame index is not negative the callee saved info belongs to a
890       // stack object that is not a fixed stack object. We ignore non-fixed
891       // stack objects because we won't move the stack update pointer past them.
892       if (FrIdx >= 0)
893         continue;
894 
895       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
896         StackUpdateLoc++;
897         MovingStackUpdateDown = true;
898       } else {
899         // We need all of the Frame Indices to meet these conditions.
900         // If they do not, abort the whole operation.
901         StackUpdateLoc = MBBI;
902         MovingStackUpdateDown = false;
903         break;
904       }
905     }
906 
907     // If the operation was not aborted then update the object offset.
908     if (MovingStackUpdateDown) {
909       for (CalleeSavedInfo CSI : Info) {
910         int FrIdx = CSI.getFrameIdx();
911         if (FrIdx < 0)
912           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
913       }
914     }
915   }
916 
917   // Where in the prologue we move the CR fields depends on how many scratch
918   // registers we have, and if we need to save the link register or not. This
919   // lambda is to avoid duplicating the logic in 2 places.
920   auto BuildMoveFromCR = [&]() {
921     if (isELFv2ABI && MustSaveCRs.size() == 1) {
922     // In the ELFv2 ABI, we are not required to save all CR fields.
923     // If only one CR field is clobbered, it is more efficient to use
924     // mfocrf to selectively save just that field, because mfocrf has short
925     // latency compares to mfcr.
926       assert(isPPC64 && "V2 ABI is 64-bit only.");
927       MachineInstrBuilder MIB =
928           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
929       MIB.addReg(MustSaveCRs[0], RegState::Kill);
930     } else {
931       MachineInstrBuilder MIB =
932           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
933       for (unsigned CRfield : MustSaveCRs)
934         MIB.addReg(CRfield, RegState::ImplicitKill);
935     }
936   };
937 
938   // If we need to spill the CR and the LR but we don't have two separate
939   // registers available, we must spill them one at a time
940   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
941     BuildMoveFromCR();
942     BuildMI(MBB, MBBI, dl, StoreWordInst)
943         .addReg(TempReg, getKillRegState(true))
944         .addImm(CRSaveOffset)
945         .addReg(SPReg);
946   }
947 
948   if (MustSaveLR)
949     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
950 
951   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
952     BuildMoveFromCR();
953 
954   if (HasRedZone) {
955     if (HasFP)
956       BuildMI(MBB, MBBI, dl, StoreInst)
957         .addReg(FPReg)
958         .addImm(FPOffset)
959         .addReg(SPReg);
960     if (FI->usesPICBase())
961       BuildMI(MBB, MBBI, dl, StoreInst)
962         .addReg(PPC::R30)
963         .addImm(PBPOffset)
964         .addReg(SPReg);
965     if (HasBP)
966       BuildMI(MBB, MBBI, dl, StoreInst)
967         .addReg(BPReg)
968         .addImm(BPOffset)
969         .addReg(SPReg);
970   }
971 
972   if (MustSaveLR)
973     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
974       .addReg(ScratchReg, getKillRegState(true))
975       .addImm(LROffset)
976       .addReg(SPReg);
977 
978   if (MustSaveCR &&
979       !(SingleScratchReg && MustSaveLR)) {
980     assert(HasRedZone && "A red zone is always available on PPC64");
981     BuildMI(MBB, MBBI, dl, StoreWordInst)
982       .addReg(TempReg, getKillRegState(true))
983       .addImm(CRSaveOffset)
984       .addReg(SPReg);
985   }
986 
987   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
988   if (!FrameSize)
989     return;
990 
991   // Adjust stack pointer: r1 += NegFrameSize.
992   // If there is a preferred stack alignment, align R1 now
993 
994   if (HasBP && HasRedZone) {
995     // Save a copy of r1 as the base pointer.
996     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
997       .addReg(SPReg)
998       .addReg(SPReg);
999   }
1000 
1001   // Have we generated a STUX instruction to claim stack frame? If so,
1002   // the negated frame size will be placed in ScratchReg.
1003   bool HasSTUX = false;
1004 
1005   // This condition must be kept in sync with canUseAsPrologue.
1006   if (HasBP && MaxAlign > 1) {
1007     if (isPPC64)
1008       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1009           .addReg(SPReg)
1010           .addImm(0)
1011           .addImm(64 - Log2(MaxAlign));
1012     else // PPC32...
1013       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1014           .addReg(SPReg)
1015           .addImm(0)
1016           .addImm(32 - Log2(MaxAlign))
1017           .addImm(31);
1018     if (!isLargeFrame) {
1019       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1020         .addReg(ScratchReg, RegState::Kill)
1021         .addImm(NegFrameSize);
1022     } else {
1023       assert(!SingleScratchReg && "Only a single scratch reg available");
1024       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1025         .addImm(NegFrameSize >> 16);
1026       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1027         .addReg(TempReg, RegState::Kill)
1028         .addImm(NegFrameSize & 0xFFFF);
1029       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1030         .addReg(ScratchReg, RegState::Kill)
1031         .addReg(TempReg, RegState::Kill);
1032     }
1033 
1034     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1035       .addReg(SPReg, RegState::Kill)
1036       .addReg(SPReg)
1037       .addReg(ScratchReg);
1038     HasSTUX = true;
1039 
1040   } else if (!isLargeFrame) {
1041     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1042       .addReg(SPReg)
1043       .addImm(NegFrameSize)
1044       .addReg(SPReg);
1045 
1046   } else {
1047     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1048       .addImm(NegFrameSize >> 16);
1049     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1050       .addReg(ScratchReg, RegState::Kill)
1051       .addImm(NegFrameSize & 0xFFFF);
1052     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1053       .addReg(SPReg, RegState::Kill)
1054       .addReg(SPReg)
1055       .addReg(ScratchReg);
1056     HasSTUX = true;
1057   }
1058 
1059   // Save the TOC register after the stack pointer update if a prologue TOC
1060   // save is required for the function.
1061   if (MustSaveTOC) {
1062     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1063     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1064       .addReg(TOCReg, getKillRegState(true))
1065       .addImm(TOCSaveOffset)
1066       .addReg(SPReg);
1067   }
1068 
1069   if (!HasRedZone) {
1070     assert(!isPPC64 && "A red zone is always available on PPC64");
1071     if (HasSTUX) {
1072       // The negated frame size is in ScratchReg, and the SPReg has been
1073       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1074       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1075       // the stack frame (i.e. the old SP), ideally, we would put the old
1076       // SP into a register and use it as the base for the stores. The
1077       // problem is that the only available register may be ScratchReg,
1078       // which could be R0, and R0 cannot be used as a base address.
1079 
1080       // First, set ScratchReg to the old SP. This may need to be modified
1081       // later.
1082       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1083         .addReg(ScratchReg, RegState::Kill)
1084         .addReg(SPReg);
1085 
1086       if (ScratchReg == PPC::R0) {
1087         // R0 cannot be used as a base register, but it can be used as an
1088         // index in a store-indexed.
1089         int LastOffset = 0;
1090         if (HasFP)  {
1091           // R0 += (FPOffset-LastOffset).
1092           // Need addic, since addi treats R0 as 0.
1093           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1094             .addReg(ScratchReg)
1095             .addImm(FPOffset-LastOffset);
1096           LastOffset = FPOffset;
1097           // Store FP into *R0.
1098           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1099             .addReg(FPReg, RegState::Kill)  // Save FP.
1100             .addReg(PPC::ZERO)
1101             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1102         }
1103         if (FI->usesPICBase()) {
1104           // R0 += (PBPOffset-LastOffset).
1105           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1106             .addReg(ScratchReg)
1107             .addImm(PBPOffset-LastOffset);
1108           LastOffset = PBPOffset;
1109           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1110             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1111             .addReg(PPC::ZERO)
1112             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1113         }
1114         if (HasBP) {
1115           // R0 += (BPOffset-LastOffset).
1116           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1117             .addReg(ScratchReg)
1118             .addImm(BPOffset-LastOffset);
1119           LastOffset = BPOffset;
1120           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1121             .addReg(BPReg, RegState::Kill)  // Save BP.
1122             .addReg(PPC::ZERO)
1123             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1124           // BP = R0-LastOffset
1125           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1126             .addReg(ScratchReg, RegState::Kill)
1127             .addImm(-LastOffset);
1128         }
1129       } else {
1130         // ScratchReg is not R0, so use it as the base register. It is
1131         // already set to the old SP, so we can use the offsets directly.
1132 
1133         // Now that the stack frame has been allocated, save all the necessary
1134         // registers using ScratchReg as the base address.
1135         if (HasFP)
1136           BuildMI(MBB, MBBI, dl, StoreInst)
1137             .addReg(FPReg)
1138             .addImm(FPOffset)
1139             .addReg(ScratchReg);
1140         if (FI->usesPICBase())
1141           BuildMI(MBB, MBBI, dl, StoreInst)
1142             .addReg(PPC::R30)
1143             .addImm(PBPOffset)
1144             .addReg(ScratchReg);
1145         if (HasBP) {
1146           BuildMI(MBB, MBBI, dl, StoreInst)
1147             .addReg(BPReg)
1148             .addImm(BPOffset)
1149             .addReg(ScratchReg);
1150           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1151             .addReg(ScratchReg, RegState::Kill)
1152             .addReg(ScratchReg);
1153         }
1154       }
1155     } else {
1156       // The frame size is a known 16-bit constant (fitting in the immediate
1157       // field of STWU). To be here we have to be compiling for PPC32.
1158       // Since the SPReg has been decreased by FrameSize, add it back to each
1159       // offset.
1160       if (HasFP)
1161         BuildMI(MBB, MBBI, dl, StoreInst)
1162           .addReg(FPReg)
1163           .addImm(FrameSize + FPOffset)
1164           .addReg(SPReg);
1165       if (FI->usesPICBase())
1166         BuildMI(MBB, MBBI, dl, StoreInst)
1167           .addReg(PPC::R30)
1168           .addImm(FrameSize + PBPOffset)
1169           .addReg(SPReg);
1170       if (HasBP) {
1171         BuildMI(MBB, MBBI, dl, StoreInst)
1172           .addReg(BPReg)
1173           .addImm(FrameSize + BPOffset)
1174           .addReg(SPReg);
1175         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1176           .addReg(SPReg)
1177           .addImm(FrameSize);
1178       }
1179     }
1180   }
1181 
1182   // Add Call Frame Information for the instructions we generated above.
1183   if (needsCFI) {
1184     unsigned CFIIndex;
1185 
1186     if (HasBP) {
1187       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1188       // because if the stack needed aligning then CFA won't be at a fixed
1189       // offset from FP/SP.
1190       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1191       CFIIndex = MF.addFrameInst(
1192           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1193     } else {
1194       // Adjust the definition of CFA to account for the change in SP.
1195       assert(NegFrameSize);
1196       CFIIndex = MF.addFrameInst(
1197           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1198     }
1199     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1200         .addCFIIndex(CFIIndex);
1201 
1202     if (HasFP) {
1203       // Describe where FP was saved, at a fixed offset from CFA.
1204       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1205       CFIIndex = MF.addFrameInst(
1206           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1207       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1208           .addCFIIndex(CFIIndex);
1209     }
1210 
1211     if (FI->usesPICBase()) {
1212       // Describe where FP was saved, at a fixed offset from CFA.
1213       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1214       CFIIndex = MF.addFrameInst(
1215           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1216       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1217           .addCFIIndex(CFIIndex);
1218     }
1219 
1220     if (HasBP) {
1221       // Describe where BP was saved, at a fixed offset from CFA.
1222       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1223       CFIIndex = MF.addFrameInst(
1224           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1225       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1226           .addCFIIndex(CFIIndex);
1227     }
1228 
1229     if (MustSaveLR) {
1230       // Describe where LR was saved, at a fixed offset from CFA.
1231       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1232       CFIIndex = MF.addFrameInst(
1233           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1234       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1235           .addCFIIndex(CFIIndex);
1236     }
1237   }
1238 
1239   // If there is a frame pointer, copy R1 into R31
1240   if (HasFP) {
1241     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1242       .addReg(SPReg)
1243       .addReg(SPReg);
1244 
1245     if (!HasBP && needsCFI) {
1246       // Change the definition of CFA from SP+offset to FP+offset, because SP
1247       // will change at every alloca.
1248       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1249       unsigned CFIIndex = MF.addFrameInst(
1250           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1251 
1252       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1253           .addCFIIndex(CFIIndex);
1254     }
1255   }
1256 
1257   if (needsCFI) {
1258     // Describe where callee saved registers were saved, at fixed offsets from
1259     // CFA.
1260     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1261     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1262       unsigned Reg = CSI[I].getReg();
1263       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1264 
1265       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1266       // subregisters of CR2. We just need to emit a move of CR2.
1267       if (PPC::CRBITRCRegClass.contains(Reg))
1268         continue;
1269 
1270       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1271         continue;
1272 
1273       // For SVR4, don't emit a move for the CR spill slot if we haven't
1274       // spilled CRs.
1275       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1276           && !MustSaveCR)
1277         continue;
1278 
1279       // For 64-bit SVR4 when we have spilled CRs, the spill location
1280       // is SP+8, not a frame-relative slot.
1281       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1282         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1283         // the whole CR word.  In the ELFv2 ABI, every CR that was
1284         // actually saved gets its own CFI record.
1285         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1286         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1287             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1288         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1289             .addCFIIndex(CFIIndex);
1290         continue;
1291       }
1292 
1293       if (CSI[I].isSpilledToReg()) {
1294         unsigned SpilledReg = CSI[I].getDstReg();
1295         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1296             nullptr, MRI->getDwarfRegNum(Reg, true),
1297             MRI->getDwarfRegNum(SpilledReg, true)));
1298         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1299           .addCFIIndex(CFIRegister);
1300       } else {
1301         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1302         // We have changed the object offset above but we do not want to change
1303         // the actual offsets in the CFI instruction so we have to undo the
1304         // offset change here.
1305         if (MovingStackUpdateDown)
1306           Offset -= NegFrameSize;
1307 
1308         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1309             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1310         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1311             .addCFIIndex(CFIIndex);
1312       }
1313     }
1314   }
1315 }
1316 
1317 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1318                                     MachineBasicBlock &MBB) const {
1319   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1320   DebugLoc dl;
1321 
1322   if (MBBI != MBB.end())
1323     dl = MBBI->getDebugLoc();
1324 
1325   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1326   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1327 
1328   // Get alignment info so we know how to restore the SP.
1329   const MachineFrameInfo &MFI = MF.getFrameInfo();
1330 
1331   // Get the number of bytes allocated from the FrameInfo.
1332   int FrameSize = MFI.getStackSize();
1333 
1334   // Get processor type.
1335   bool isPPC64 = Subtarget.isPPC64();
1336   // Get the ABI.
1337   bool isSVR4ABI = Subtarget.isSVR4ABI();
1338 
1339   // Check if the link register (LR) has been saved.
1340   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1341   bool MustSaveLR = FI->mustSaveLR();
1342   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1343   bool MustSaveCR = !MustSaveCRs.empty();
1344   // Do we have a frame pointer and/or base pointer for this function?
1345   bool HasFP = hasFP(MF);
1346   bool HasBP = RegInfo->hasBasePointer(MF);
1347   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1348 
1349   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1350   Register BPReg = RegInfo->getBaseRegister(MF);
1351   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1352   Register ScratchReg;
1353   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1354   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1355                                                  : PPC::MTLR );
1356   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1357                                                  : PPC::LWZ );
1358   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1359                                                            : PPC::LIS );
1360   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1361                                               : PPC::OR );
1362   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1363                                                   : PPC::ORI );
1364   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1365                                                    : PPC::ADDI );
1366   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1367                                                 : PPC::ADD4 );
1368   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1369                                                      : PPC::LWZ);
1370   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1371                                                      : PPC::MTOCRF);
1372   int LROffset = getReturnSaveOffset();
1373 
1374   int FPOffset = 0;
1375 
1376   // Using the same bool variable as below to suppress compiler warnings.
1377   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1378                                               &TempReg);
1379   assert(SingleScratchReg &&
1380          "Could not find an available scratch register");
1381 
1382   SingleScratchReg = ScratchReg == TempReg;
1383 
1384   if (HasFP) {
1385     if (isSVR4ABI) {
1386       int FPIndex = FI->getFramePointerSaveIndex();
1387       assert(FPIndex && "No Frame Pointer Save Slot!");
1388       FPOffset = MFI.getObjectOffset(FPIndex);
1389     } else {
1390       FPOffset = getFramePointerSaveOffset();
1391     }
1392   }
1393 
1394   int BPOffset = 0;
1395   if (HasBP) {
1396     if (isSVR4ABI) {
1397       int BPIndex = FI->getBasePointerSaveIndex();
1398       assert(BPIndex && "No Base Pointer Save Slot!");
1399       BPOffset = MFI.getObjectOffset(BPIndex);
1400     } else {
1401       BPOffset = getBasePointerSaveOffset();
1402     }
1403   }
1404 
1405   int PBPOffset = 0;
1406   if (FI->usesPICBase()) {
1407     int PBPIndex = FI->getPICBasePointerSaveIndex();
1408     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1409     PBPOffset = MFI.getObjectOffset(PBPIndex);
1410   }
1411 
1412   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1413 
1414   if (IsReturnBlock) {
1415     unsigned RetOpcode = MBBI->getOpcode();
1416     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1417                       RetOpcode == PPC::TCRETURNdi ||
1418                       RetOpcode == PPC::TCRETURNai ||
1419                       RetOpcode == PPC::TCRETURNri8 ||
1420                       RetOpcode == PPC::TCRETURNdi8 ||
1421                       RetOpcode == PPC::TCRETURNai8;
1422 
1423     if (UsesTCRet) {
1424       int MaxTCRetDelta = FI->getTailCallSPDelta();
1425       MachineOperand &StackAdjust = MBBI->getOperand(1);
1426       assert(StackAdjust.isImm() && "Expecting immediate value.");
1427       // Adjust stack pointer.
1428       int StackAdj = StackAdjust.getImm();
1429       int Delta = StackAdj - MaxTCRetDelta;
1430       assert((Delta >= 0) && "Delta must be positive");
1431       if (MaxTCRetDelta>0)
1432         FrameSize += (StackAdj +Delta);
1433       else
1434         FrameSize += StackAdj;
1435     }
1436   }
1437 
1438   // Frames of 32KB & larger require special handling because they cannot be
1439   // indexed into with a simple LD/LWZ immediate offset operand.
1440   bool isLargeFrame = !isInt<16>(FrameSize);
1441 
1442   // On targets without red zone, the SP needs to be restored last, so that
1443   // all live contents of the stack frame are upwards of the SP. This means
1444   // that we cannot restore SP just now, since there may be more registers
1445   // to restore from the stack frame (e.g. R31). If the frame size is not
1446   // a simple immediate value, we will need a spare register to hold the
1447   // restored SP. If the frame size is known and small, we can simply adjust
1448   // the offsets of the registers to be restored, and still use SP to restore
1449   // them. In such case, the final update of SP will be to add the frame
1450   // size to it.
1451   // To simplify the code, set RBReg to the base register used to restore
1452   // values from the stack, and set SPAdd to the value that needs to be added
1453   // to the SP at the end. The default values are as if red zone was present.
1454   unsigned RBReg = SPReg;
1455   unsigned SPAdd = 0;
1456 
1457   // Check if we can move the stack update instruction up the epilogue
1458   // past the callee saves. This will allow the move to LR instruction
1459   // to be executed before the restores of the callee saves which means
1460   // that the callee saves can hide the latency from the MTLR instrcution.
1461   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1462   if (stackUpdateCanBeMoved(MF)) {
1463     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1464     for (CalleeSavedInfo CSI : Info) {
1465       int FrIdx = CSI.getFrameIdx();
1466       // If the frame index is not negative the callee saved info belongs to a
1467       // stack object that is not a fixed stack object. We ignore non-fixed
1468       // stack objects because we won't move the update of the stack pointer
1469       // past them.
1470       if (FrIdx >= 0)
1471         continue;
1472 
1473       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1474         StackUpdateLoc--;
1475       else {
1476         // Abort the operation as we can't update all CSR restores.
1477         StackUpdateLoc = MBBI;
1478         break;
1479       }
1480     }
1481   }
1482 
1483   if (FrameSize) {
1484     // In the prologue, the loaded (or persistent) stack pointer value is
1485     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1486     // zone add this offset back now.
1487 
1488     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1489     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1490     // call which invalidates the stack pointer value in SP(0). So we use the
1491     // value of R31 in this case.
1492     if (FI->hasFastCall()) {
1493       assert(HasFP && "Expecting a valid frame pointer.");
1494       if (!HasRedZone)
1495         RBReg = FPReg;
1496       if (!isLargeFrame) {
1497         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1498           .addReg(FPReg).addImm(FrameSize);
1499       } else {
1500         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1501           .addImm(FrameSize >> 16);
1502         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1503           .addReg(ScratchReg, RegState::Kill)
1504           .addImm(FrameSize & 0xFFFF);
1505         BuildMI(MBB, MBBI, dl, AddInst)
1506           .addReg(RBReg)
1507           .addReg(FPReg)
1508           .addReg(ScratchReg);
1509       }
1510     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1511       if (HasRedZone) {
1512         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1513           .addReg(SPReg)
1514           .addImm(FrameSize);
1515       } else {
1516         // Make sure that adding FrameSize will not overflow the max offset
1517         // size.
1518         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1519                "Local offsets should be negative");
1520         SPAdd = FrameSize;
1521         FPOffset += FrameSize;
1522         BPOffset += FrameSize;
1523         PBPOffset += FrameSize;
1524       }
1525     } else {
1526       // We don't want to use ScratchReg as a base register, because it
1527       // could happen to be R0. Use FP instead, but make sure to preserve it.
1528       if (!HasRedZone) {
1529         // If FP is not saved, copy it to ScratchReg.
1530         if (!HasFP)
1531           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1532             .addReg(FPReg)
1533             .addReg(FPReg);
1534         RBReg = FPReg;
1535       }
1536       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1537         .addImm(0)
1538         .addReg(SPReg);
1539     }
1540   }
1541   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1542   // If there is no red zone, ScratchReg may be needed for holding a useful
1543   // value (although not the base register). Make sure it is not overwritten
1544   // too early.
1545 
1546   // If we need to restore both the LR and the CR and we only have one
1547   // available scratch register, we must do them one at a time.
1548   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1549     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1550     // is live here.
1551     assert(HasRedZone && "Expecting red zone");
1552     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1553       .addImm(CRSaveOffset)
1554       .addReg(SPReg);
1555     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1556       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1557         .addReg(TempReg, getKillRegState(i == e-1));
1558   }
1559 
1560   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1561   // LR is stored in the caller's stack frame. ScratchReg will be needed
1562   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1563   // a base register anyway, because it may happen to be R0.
1564   bool LoadedLR = false;
1565   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1566     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1567       .addImm(LROffset+SPAdd)
1568       .addReg(RBReg);
1569     LoadedLR = true;
1570   }
1571 
1572   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1573     assert(RBReg == SPReg && "Should be using SP as a base register");
1574     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1575       .addImm(CRSaveOffset)
1576       .addReg(RBReg);
1577   }
1578 
1579   if (HasFP) {
1580     // If there is red zone, restore FP directly, since SP has already been
1581     // restored. Otherwise, restore the value of FP into ScratchReg.
1582     if (HasRedZone || RBReg == SPReg)
1583       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1584         .addImm(FPOffset)
1585         .addReg(SPReg);
1586     else
1587       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1588         .addImm(FPOffset)
1589         .addReg(RBReg);
1590   }
1591 
1592   if (FI->usesPICBase())
1593     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1594       .addImm(PBPOffset)
1595       .addReg(RBReg);
1596 
1597   if (HasBP)
1598     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1599       .addImm(BPOffset)
1600       .addReg(RBReg);
1601 
1602   // There is nothing more to be loaded from the stack, so now we can
1603   // restore SP: SP = RBReg + SPAdd.
1604   if (RBReg != SPReg || SPAdd != 0) {
1605     assert(!HasRedZone && "This should not happen with red zone");
1606     // If SPAdd is 0, generate a copy.
1607     if (SPAdd == 0)
1608       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1609         .addReg(RBReg)
1610         .addReg(RBReg);
1611     else
1612       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1613         .addReg(RBReg)
1614         .addImm(SPAdd);
1615 
1616     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1617     if (RBReg == FPReg)
1618       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1619         .addReg(ScratchReg)
1620         .addReg(ScratchReg);
1621 
1622     // Now load the LR from the caller's stack frame.
1623     if (MustSaveLR && !LoadedLR)
1624       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1625         .addImm(LROffset)
1626         .addReg(SPReg);
1627   }
1628 
1629   if (MustSaveCR &&
1630       !(SingleScratchReg && MustSaveLR))
1631     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1632       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1633         .addReg(TempReg, getKillRegState(i == e-1));
1634 
1635   if (MustSaveLR)
1636     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1637 
1638   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1639   // call optimization
1640   if (IsReturnBlock) {
1641     unsigned RetOpcode = MBBI->getOpcode();
1642     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1643         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1644         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1645       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1646       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1647 
1648       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1649         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1650           .addReg(SPReg).addImm(CallerAllocatedAmt);
1651       } else {
1652         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1653           .addImm(CallerAllocatedAmt >> 16);
1654         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1655           .addReg(ScratchReg, RegState::Kill)
1656           .addImm(CallerAllocatedAmt & 0xFFFF);
1657         BuildMI(MBB, MBBI, dl, AddInst)
1658           .addReg(SPReg)
1659           .addReg(FPReg)
1660           .addReg(ScratchReg);
1661       }
1662     } else {
1663       createTailCallBranchInstr(MBB);
1664     }
1665   }
1666 }
1667 
1668 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1669   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1670 
1671   // If we got this far a first terminator should exist.
1672   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1673 
1674   DebugLoc dl = MBBI->getDebugLoc();
1675   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1676 
1677   // Create branch instruction for pseudo tail call return instruction.
1678   // The TCRETURNdi variants are direct calls. Valid targets for those are
1679   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1680   // since we can tail call external functions with PC-Rel (i.e. we don't need
1681   // to worry about different TOC pointers). Some of the external functions will
1682   // be MO_GlobalAddress while others like memcpy for example, are going to
1683   // be MO_ExternalSymbol.
1684   unsigned RetOpcode = MBBI->getOpcode();
1685   if (RetOpcode == PPC::TCRETURNdi) {
1686     MBBI = MBB.getLastNonDebugInstr();
1687     MachineOperand &JumpTarget = MBBI->getOperand(0);
1688     if (JumpTarget.isGlobal())
1689       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1690         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1691     else if (JumpTarget.isSymbol())
1692       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1693         addExternalSymbol(JumpTarget.getSymbolName());
1694     else
1695       llvm_unreachable("Expecting Global or External Symbol");
1696   } else if (RetOpcode == PPC::TCRETURNri) {
1697     MBBI = MBB.getLastNonDebugInstr();
1698     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1699     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1700   } else if (RetOpcode == PPC::TCRETURNai) {
1701     MBBI = MBB.getLastNonDebugInstr();
1702     MachineOperand &JumpTarget = MBBI->getOperand(0);
1703     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1704   } else if (RetOpcode == PPC::TCRETURNdi8) {
1705     MBBI = MBB.getLastNonDebugInstr();
1706     MachineOperand &JumpTarget = MBBI->getOperand(0);
1707     if (JumpTarget.isGlobal())
1708       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1709         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1710     else if (JumpTarget.isSymbol())
1711       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1712         addExternalSymbol(JumpTarget.getSymbolName());
1713     else
1714       llvm_unreachable("Expecting Global or External Symbol");
1715   } else if (RetOpcode == PPC::TCRETURNri8) {
1716     MBBI = MBB.getLastNonDebugInstr();
1717     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1718     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1719   } else if (RetOpcode == PPC::TCRETURNai8) {
1720     MBBI = MBB.getLastNonDebugInstr();
1721     MachineOperand &JumpTarget = MBBI->getOperand(0);
1722     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1723   }
1724 }
1725 
1726 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1727                                             BitVector &SavedRegs,
1728                                             RegScavenger *RS) const {
1729   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1730 
1731   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1732 
1733   //  Save and clear the LR state.
1734   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1735   unsigned LR = RegInfo->getRARegister();
1736   FI->setMustSaveLR(MustSaveLR(MF, LR));
1737   SavedRegs.reset(LR);
1738 
1739   //  Save R31 if necessary
1740   int FPSI = FI->getFramePointerSaveIndex();
1741   const bool isPPC64 = Subtarget.isPPC64();
1742   MachineFrameInfo &MFI = MF.getFrameInfo();
1743 
1744   // If the frame pointer save index hasn't been defined yet.
1745   if (!FPSI && needsFP(MF)) {
1746     // Find out what the fix offset of the frame pointer save area.
1747     int FPOffset = getFramePointerSaveOffset();
1748     // Allocate the frame index for frame pointer save area.
1749     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1750     // Save the result.
1751     FI->setFramePointerSaveIndex(FPSI);
1752   }
1753 
1754   int BPSI = FI->getBasePointerSaveIndex();
1755   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1756     int BPOffset = getBasePointerSaveOffset();
1757     // Allocate the frame index for the base pointer save area.
1758     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1759     // Save the result.
1760     FI->setBasePointerSaveIndex(BPSI);
1761   }
1762 
1763   // Reserve stack space for the PIC Base register (R30).
1764   // Only used in SVR4 32-bit.
1765   if (FI->usesPICBase()) {
1766     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1767     FI->setPICBasePointerSaveIndex(PBPSI);
1768   }
1769 
1770   // Make sure we don't explicitly spill r31, because, for example, we have
1771   // some inline asm which explicitly clobbers it, when we otherwise have a
1772   // frame pointer and are using r31's spill slot for the prologue/epilogue
1773   // code. Same goes for the base pointer and the PIC base register.
1774   if (needsFP(MF))
1775     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1776   if (RegInfo->hasBasePointer(MF))
1777     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1778   if (FI->usesPICBase())
1779     SavedRegs.reset(PPC::R30);
1780 
1781   // Reserve stack space to move the linkage area to in case of a tail call.
1782   int TCSPDelta = 0;
1783   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1784       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1785     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1786   }
1787 
1788   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1789   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1790   // object at the offset of the CR-save slot in the linkage area. The actual
1791   // save and restore of the condition register will be created as part of the
1792   // prologue and epilogue insertion, but the FixedStack object is needed to
1793   // keep the CalleSavedInfo valid.
1794   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1795        SavedRegs.test(PPC::CR4))) {
1796     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1797     const int64_t SpillOffset =
1798         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1799     int FrameIdx =
1800         MFI.CreateFixedObject(SpillSize, SpillOffset,
1801                               /* IsImmutable */ true, /* IsAliased */ false);
1802     FI->setCRSpillFrameIndex(FrameIdx);
1803   }
1804 }
1805 
1806 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1807                                                        RegScavenger *RS) const {
1808   // Early exit if not using the SVR4 ABI.
1809   if (!Subtarget.isSVR4ABI()) {
1810     addScavengingSpillSlot(MF, RS);
1811     return;
1812   }
1813 
1814   // Get callee saved register information.
1815   MachineFrameInfo &MFI = MF.getFrameInfo();
1816   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1817 
1818   // If the function is shrink-wrapped, and if the function has a tail call, the
1819   // tail call might not be in the new RestoreBlock, so real branch instruction
1820   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1821   // RestoreBlock. So we handle this case here.
1822   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1823     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1824     for (MachineBasicBlock &MBB : MF) {
1825       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1826         createTailCallBranchInstr(MBB);
1827     }
1828   }
1829 
1830   // Early exit if no callee saved registers are modified!
1831   if (CSI.empty() && !needsFP(MF)) {
1832     addScavengingSpillSlot(MF, RS);
1833     return;
1834   }
1835 
1836   unsigned MinGPR = PPC::R31;
1837   unsigned MinG8R = PPC::X31;
1838   unsigned MinFPR = PPC::F31;
1839   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1840 
1841   bool HasGPSaveArea = false;
1842   bool HasG8SaveArea = false;
1843   bool HasFPSaveArea = false;
1844   bool HasVRSAVESaveArea = false;
1845   bool HasVRSaveArea = false;
1846 
1847   SmallVector<CalleeSavedInfo, 18> GPRegs;
1848   SmallVector<CalleeSavedInfo, 18> G8Regs;
1849   SmallVector<CalleeSavedInfo, 18> FPRegs;
1850   SmallVector<CalleeSavedInfo, 18> VRegs;
1851 
1852   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1853     unsigned Reg = CSI[i].getReg();
1854     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1855             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1856            "Not expecting to try to spill R2 in a function that must save TOC");
1857     if (PPC::GPRCRegClass.contains(Reg)) {
1858       HasGPSaveArea = true;
1859 
1860       GPRegs.push_back(CSI[i]);
1861 
1862       if (Reg < MinGPR) {
1863         MinGPR = Reg;
1864       }
1865     } else if (PPC::G8RCRegClass.contains(Reg)) {
1866       HasG8SaveArea = true;
1867 
1868       G8Regs.push_back(CSI[i]);
1869 
1870       if (Reg < MinG8R) {
1871         MinG8R = Reg;
1872       }
1873     } else if (PPC::F8RCRegClass.contains(Reg)) {
1874       HasFPSaveArea = true;
1875 
1876       FPRegs.push_back(CSI[i]);
1877 
1878       if (Reg < MinFPR) {
1879         MinFPR = Reg;
1880       }
1881     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1882                PPC::CRRCRegClass.contains(Reg)) {
1883       ; // do nothing, as we already know whether CRs are spilled
1884     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1885       HasVRSAVESaveArea = true;
1886     } else if (PPC::VRRCRegClass.contains(Reg) ||
1887                PPC::SPERCRegClass.contains(Reg)) {
1888       // Altivec and SPE are mutually exclusive, but have the same stack
1889       // alignment requirements, so overload the save area for both cases.
1890       HasVRSaveArea = true;
1891 
1892       VRegs.push_back(CSI[i]);
1893 
1894       if (Reg < MinVR) {
1895         MinVR = Reg;
1896       }
1897     } else {
1898       llvm_unreachable("Unknown RegisterClass!");
1899     }
1900   }
1901 
1902   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1903   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1904 
1905   int64_t LowerBound = 0;
1906 
1907   // Take into account stack space reserved for tail calls.
1908   int TCSPDelta = 0;
1909   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1910       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1911     LowerBound = TCSPDelta;
1912   }
1913 
1914   // The Floating-point register save area is right below the back chain word
1915   // of the previous stack frame.
1916   if (HasFPSaveArea) {
1917     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1918       int FI = FPRegs[i].getFrameIdx();
1919 
1920       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1921     }
1922 
1923     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1924   }
1925 
1926   // Check whether the frame pointer register is allocated. If so, make sure it
1927   // is spilled to the correct offset.
1928   if (needsFP(MF)) {
1929     int FI = PFI->getFramePointerSaveIndex();
1930     assert(FI && "No Frame Pointer Save Slot!");
1931     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1932     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1933     HasGPSaveArea = true;
1934   }
1935 
1936   if (PFI->usesPICBase()) {
1937     int FI = PFI->getPICBasePointerSaveIndex();
1938     assert(FI && "No PIC Base Pointer Save Slot!");
1939     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1940 
1941     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1942     HasGPSaveArea = true;
1943   }
1944 
1945   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1946   if (RegInfo->hasBasePointer(MF)) {
1947     int FI = PFI->getBasePointerSaveIndex();
1948     assert(FI && "No Base Pointer Save Slot!");
1949     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1950 
1951     Register BP = RegInfo->getBaseRegister(MF);
1952     if (PPC::G8RCRegClass.contains(BP)) {
1953       MinG8R = std::min<unsigned>(MinG8R, BP);
1954       HasG8SaveArea = true;
1955     } else if (PPC::GPRCRegClass.contains(BP)) {
1956       MinGPR = std::min<unsigned>(MinGPR, BP);
1957       HasGPSaveArea = true;
1958     }
1959   }
1960 
1961   // General register save area starts right below the Floating-point
1962   // register save area.
1963   if (HasGPSaveArea || HasG8SaveArea) {
1964     // Move general register save area spill slots down, taking into account
1965     // the size of the Floating-point register save area.
1966     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1967       if (!GPRegs[i].isSpilledToReg()) {
1968         int FI = GPRegs[i].getFrameIdx();
1969         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1970       }
1971     }
1972 
1973     // Move general register save area spill slots down, taking into account
1974     // the size of the Floating-point register save area.
1975     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1976       if (!G8Regs[i].isSpilledToReg()) {
1977         int FI = G8Regs[i].getFrameIdx();
1978         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1979       }
1980     }
1981 
1982     unsigned MinReg =
1983       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1984                          TRI->getEncodingValue(MinG8R));
1985 
1986     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
1987     LowerBound -= (31 - MinReg + 1) * GPRegSize;
1988   }
1989 
1990   // For 32-bit only, the CR save area is below the general register
1991   // save area.  For 64-bit SVR4, the CR save area is addressed relative
1992   // to the stack pointer and hence does not need an adjustment here.
1993   // Only CR2 (the first nonvolatile spilled) has an associated frame
1994   // index so that we have a single uniform save area.
1995   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
1996     // Adjust the frame index of the CR spill slot.
1997     for (const auto &CSInfo : CSI) {
1998       if (CSInfo.getReg() == PPC::CR2) {
1999         int FI = CSInfo.getFrameIdx();
2000         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2001         break;
2002       }
2003     }
2004 
2005     LowerBound -= 4; // The CR save area is always 4 bytes long.
2006   }
2007 
2008   if (HasVRSAVESaveArea) {
2009     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2010     //             which have the VRSAVE register class?
2011     // Adjust the frame index of the VRSAVE spill slot.
2012     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2013       unsigned Reg = CSI[i].getReg();
2014 
2015       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2016         int FI = CSI[i].getFrameIdx();
2017 
2018         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2019       }
2020     }
2021 
2022     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2023   }
2024 
2025   // Both Altivec and SPE have the same alignment and padding requirements
2026   // within the stack frame.
2027   if (HasVRSaveArea) {
2028     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2029     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2030     // we are using negative number here (the stack grows downward). We should
2031     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2032     // is the alignment size ( n = 16 here) and y is the size after aligning.
2033     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2034     LowerBound &= ~(15);
2035 
2036     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2037       int FI = VRegs[i].getFrameIdx();
2038 
2039       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2040     }
2041   }
2042 
2043   addScavengingSpillSlot(MF, RS);
2044 }
2045 
2046 void
2047 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2048                                          RegScavenger *RS) const {
2049   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2050   // a large stack, which will require scavenging a register to materialize a
2051   // large offset.
2052 
2053   // We need to have a scavenger spill slot for spills if the frame size is
2054   // large. In case there is no free register for large-offset addressing,
2055   // this slot is used for the necessary emergency spill. Also, we need the
2056   // slot for dynamic stack allocations.
2057 
2058   // The scavenger might be invoked if the frame offset does not fit into
2059   // the 16-bit immediate. We don't know the complete frame size here
2060   // because we've not yet computed callee-saved register spills or the
2061   // needed alignment padding.
2062   unsigned StackSize = determineFrameLayout(MF, true);
2063   MachineFrameInfo &MFI = MF.getFrameInfo();
2064   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2065       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2066     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2067     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2068     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2069     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2070     unsigned Size = TRI.getSpillSize(RC);
2071     unsigned Align = TRI.getSpillAlignment(RC);
2072     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2073 
2074     // Might we have over-aligned allocas?
2075     bool HasAlVars =
2076         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2077 
2078     // These kinds of spills might need two registers.
2079     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2080       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2081 
2082   }
2083 }
2084 
2085 // This function checks if a callee saved gpr can be spilled to a volatile
2086 // vector register. This occurs for leaf functions when the option
2087 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2088 // which were not spilled to vectors, return false so the target independent
2089 // code can handle them by assigning a FrameIdx to a stack slot.
2090 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2091     MachineFunction &MF, const TargetRegisterInfo *TRI,
2092     std::vector<CalleeSavedInfo> &CSI) const {
2093 
2094   if (CSI.empty())
2095     return true; // Early exit if no callee saved registers are modified!
2096 
2097   // Early exit if cannot spill gprs to volatile vector registers.
2098   MachineFrameInfo &MFI = MF.getFrameInfo();
2099   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2100     return false;
2101 
2102   // Build a BitVector of VSRs that can be used for spilling GPRs.
2103   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2104   BitVector BVCalleeSaved(TRI->getNumRegs());
2105   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2106   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2107   for (unsigned i = 0; CSRegs[i]; ++i)
2108     BVCalleeSaved.set(CSRegs[i]);
2109 
2110   for (unsigned Reg : BVAllocatable.set_bits()) {
2111     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2112     // used in the function.
2113     if (BVCalleeSaved[Reg] ||
2114         (!PPC::F8RCRegClass.contains(Reg) &&
2115          !PPC::VFRCRegClass.contains(Reg)) ||
2116         (MF.getRegInfo().isPhysRegUsed(Reg)))
2117       BVAllocatable.reset(Reg);
2118   }
2119 
2120   bool AllSpilledToReg = true;
2121   for (auto &CS : CSI) {
2122     if (BVAllocatable.none())
2123       return false;
2124 
2125     unsigned Reg = CS.getReg();
2126     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2127       AllSpilledToReg = false;
2128       continue;
2129     }
2130 
2131     unsigned VolatileVFReg = BVAllocatable.find_first();
2132     if (VolatileVFReg < BVAllocatable.size()) {
2133       CS.setDstReg(VolatileVFReg);
2134       BVAllocatable.reset(VolatileVFReg);
2135     } else {
2136       AllSpilledToReg = false;
2137     }
2138   }
2139   return AllSpilledToReg;
2140 }
2141 
2142 bool PPCFrameLowering::spillCalleeSavedRegisters(
2143     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2144     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2145 
2146   MachineFunction *MF = MBB.getParent();
2147   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2148   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2149   bool MustSaveTOC = FI->mustSaveTOC();
2150   DebugLoc DL;
2151   bool CRSpilled = false;
2152   MachineInstrBuilder CRMIB;
2153 
2154   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2155     unsigned Reg = CSI[i].getReg();
2156     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2157     if (Reg == PPC::VRSAVE)
2158       continue;
2159 
2160     // CR2 through CR4 are the nonvolatile CR fields.
2161     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2162 
2163     // Add the callee-saved register as live-in; it's killed at the spill.
2164     // Do not do this for callee-saved registers that are live-in to the
2165     // function because they will already be marked live-in and this will be
2166     // adding it for a second time. It is an error to add the same register
2167     // to the set more than once.
2168     const MachineRegisterInfo &MRI = MF->getRegInfo();
2169     bool IsLiveIn = MRI.isLiveIn(Reg);
2170     if (!IsLiveIn)
2171        MBB.addLiveIn(Reg);
2172 
2173     if (CRSpilled && IsCRField) {
2174       CRMIB.addReg(Reg, RegState::ImplicitKill);
2175       continue;
2176     }
2177 
2178     // The actual spill will happen in the prologue.
2179     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2180       continue;
2181 
2182     // Insert the spill to the stack frame.
2183     if (IsCRField) {
2184       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2185       if (!Subtarget.is32BitELFABI()) {
2186         // The actual spill will happen at the start of the prologue.
2187         FuncInfo->addMustSaveCR(Reg);
2188       } else {
2189         CRSpilled = true;
2190         FuncInfo->setSpillsCR();
2191 
2192         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2193         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2194         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2195                   .addReg(Reg, RegState::ImplicitKill);
2196 
2197         MBB.insert(MI, CRMIB);
2198         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2199                                          .addReg(PPC::R12,
2200                                                  getKillRegState(true)),
2201                                          CSI[i].getFrameIdx()));
2202       }
2203     } else {
2204       if (CSI[i].isSpilledToReg()) {
2205         NumPESpillVSR++;
2206         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2207           .addReg(Reg, getKillRegState(true));
2208       } else {
2209         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2210         // Use !IsLiveIn for the kill flag.
2211         // We do not want to kill registers that are live in this function
2212         // before their use because they will become undefined registers.
2213         // Functions without NoUnwind need to preserve the order of elements in
2214         // saved vector registers.
2215         if (Subtarget.needsSwapsForVSXMemOps() &&
2216             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2217           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2218                                        CSI[i].getFrameIdx(), RC, TRI);
2219         else
2220           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2221                                   RC, TRI);
2222       }
2223     }
2224   }
2225   return true;
2226 }
2227 
2228 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2229                        bool CR4Spilled, MachineBasicBlock &MBB,
2230                        MachineBasicBlock::iterator MI,
2231                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2232 
2233   MachineFunction *MF = MBB.getParent();
2234   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2235   DebugLoc DL;
2236   unsigned MoveReg = PPC::R12;
2237 
2238   // 32-bit:  FP-relative
2239   MBB.insert(MI,
2240              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2241                                CSI[CSIIndex].getFrameIdx()));
2242 
2243   unsigned RestoreOp = PPC::MTOCRF;
2244   if (CR2Spilled)
2245     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2246                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2247 
2248   if (CR3Spilled)
2249     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2250                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2251 
2252   if (CR4Spilled)
2253     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2254                .addReg(MoveReg, getKillRegState(true)));
2255 }
2256 
2257 MachineBasicBlock::iterator PPCFrameLowering::
2258 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2259                               MachineBasicBlock::iterator I) const {
2260   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2261   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2262       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2263     // Add (actually subtract) back the amount the callee popped on return.
2264     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2265       bool is64Bit = Subtarget.isPPC64();
2266       CalleeAmt *= -1;
2267       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2268       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2269       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2270       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2271       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2272       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2273       const DebugLoc &dl = I->getDebugLoc();
2274 
2275       if (isInt<16>(CalleeAmt)) {
2276         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2277           .addReg(StackReg, RegState::Kill)
2278           .addImm(CalleeAmt);
2279       } else {
2280         MachineBasicBlock::iterator MBBI = I;
2281         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2282           .addImm(CalleeAmt >> 16);
2283         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2284           .addReg(TmpReg, RegState::Kill)
2285           .addImm(CalleeAmt & 0xFFFF);
2286         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2287           .addReg(StackReg, RegState::Kill)
2288           .addReg(TmpReg);
2289       }
2290     }
2291   }
2292   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2293   return MBB.erase(I);
2294 }
2295 
2296 static bool isCalleeSavedCR(unsigned Reg) {
2297   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2298 }
2299 
2300 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2301     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2302     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2303   MachineFunction *MF = MBB.getParent();
2304   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2305   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2306   bool MustSaveTOC = FI->mustSaveTOC();
2307   bool CR2Spilled = false;
2308   bool CR3Spilled = false;
2309   bool CR4Spilled = false;
2310   unsigned CSIIndex = 0;
2311 
2312   // Initialize insertion-point logic; we will be restoring in reverse
2313   // order of spill.
2314   MachineBasicBlock::iterator I = MI, BeforeI = I;
2315   bool AtStart = I == MBB.begin();
2316 
2317   if (!AtStart)
2318     --BeforeI;
2319 
2320   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2321     unsigned Reg = CSI[i].getReg();
2322 
2323     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2324     if (Reg == PPC::VRSAVE)
2325       continue;
2326 
2327     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2328       continue;
2329 
2330     // Restore of callee saved condition register field is handled during
2331     // epilogue insertion.
2332     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2333       continue;
2334 
2335     if (Reg == PPC::CR2) {
2336       CR2Spilled = true;
2337       // The spill slot is associated only with CR2, which is the
2338       // first nonvolatile spilled.  Save it here.
2339       CSIIndex = i;
2340       continue;
2341     } else if (Reg == PPC::CR3) {
2342       CR3Spilled = true;
2343       continue;
2344     } else if (Reg == PPC::CR4) {
2345       CR4Spilled = true;
2346       continue;
2347     } else {
2348       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2349       // least one CR register, restore all spilled CRs together.
2350       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2351         bool is31 = needsFP(*MF);
2352         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2353                    CSIIndex);
2354         CR2Spilled = CR3Spilled = CR4Spilled = false;
2355       }
2356 
2357       if (CSI[i].isSpilledToReg()) {
2358         DebugLoc DL;
2359         NumPEReloadVSR++;
2360         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2361             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2362       } else {
2363        // Default behavior for non-CR saves.
2364         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2365 
2366         // Functions without NoUnwind need to preserve the order of elements in
2367         // saved vector registers.
2368         if (Subtarget.needsSwapsForVSXMemOps() &&
2369             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2370           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2371                                         TRI);
2372         else
2373           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2374 
2375         assert(I != MBB.begin() &&
2376                "loadRegFromStackSlot didn't insert any code!");
2377       }
2378     }
2379 
2380     // Insert in reverse order.
2381     if (AtStart)
2382       I = MBB.begin();
2383     else {
2384       I = BeforeI;
2385       ++I;
2386     }
2387   }
2388 
2389   // If we haven't yet spilled the CRs, do so now.
2390   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2391     assert(Subtarget.is32BitELFABI() &&
2392            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2393     bool is31 = needsFP(*MF);
2394     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2395   }
2396 
2397   return true;
2398 }
2399 
2400 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2401   return TOCSaveOffset;
2402 }
2403 
2404 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2405   return FramePointerSaveOffset;
2406 }
2407 
2408 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2409   if (Subtarget.isAIXABI())
2410     report_fatal_error("BasePointer is not implemented on AIX yet.");
2411   return BasePointerSaveOffset;
2412 }
2413 
2414 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2415   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2416     return false;
2417   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2418           MF.getSubtarget<PPCSubtarget>().isPPC64());
2419 }
2420