xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 3e62289f42d21e7e1f9a8b1d6f970740b22f5d47)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // First slot in the general register save area.
64   return STI.isPPC64() ? -8U : -4U;
65 }
66 
67 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
68   if (STI.isAIXABI() || STI.isPPC64())
69     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
70 
71   // 32-bit SVR4 ABI:
72   return 8;
73 }
74 
75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
76   // SVR4 ABI: First slot in the general register save area.
77   return STI.isPPC64()
78              ? -16U
79              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
80 }
81 
82 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
83   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
84 }
85 
86 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
87     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
88                           STI.getPlatformStackAlignment(), 0),
89       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
90       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
91       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
92       LinkageSize(computeLinkageSize(Subtarget)),
93       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
94       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
95 
96 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
97 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
98     unsigned &NumEntries) const {
99 
100 // Floating-point register save area offsets.
101 #define CALLEE_SAVED_FPRS \
102       {PPC::F31, -8},     \
103       {PPC::F30, -16},    \
104       {PPC::F29, -24},    \
105       {PPC::F28, -32},    \
106       {PPC::F27, -40},    \
107       {PPC::F26, -48},    \
108       {PPC::F25, -56},    \
109       {PPC::F24, -64},    \
110       {PPC::F23, -72},    \
111       {PPC::F22, -80},    \
112       {PPC::F21, -88},    \
113       {PPC::F20, -96},    \
114       {PPC::F19, -104},   \
115       {PPC::F18, -112},   \
116       {PPC::F17, -120},   \
117       {PPC::F16, -128},   \
118       {PPC::F15, -136},   \
119       {PPC::F14, -144}
120 
121 // 32-bit general purpose register save area offsets shared by ELF and
122 // AIX. AIX has an extra CSR with r13.
123 #define CALLEE_SAVED_GPRS32 \
124       {PPC::R31, -4},       \
125       {PPC::R30, -8},       \
126       {PPC::R29, -12},      \
127       {PPC::R28, -16},      \
128       {PPC::R27, -20},      \
129       {PPC::R26, -24},      \
130       {PPC::R25, -28},      \
131       {PPC::R24, -32},      \
132       {PPC::R23, -36},      \
133       {PPC::R22, -40},      \
134       {PPC::R21, -44},      \
135       {PPC::R20, -48},      \
136       {PPC::R19, -52},      \
137       {PPC::R18, -56},      \
138       {PPC::R17, -60},      \
139       {PPC::R16, -64},      \
140       {PPC::R15, -68},      \
141       {PPC::R14, -72}
142 
143 // 64-bit general purpose register save area offsets.
144 #define CALLEE_SAVED_GPRS64 \
145       {PPC::X31, -8},       \
146       {PPC::X30, -16},      \
147       {PPC::X29, -24},      \
148       {PPC::X28, -32},      \
149       {PPC::X27, -40},      \
150       {PPC::X26, -48},      \
151       {PPC::X25, -56},      \
152       {PPC::X24, -64},      \
153       {PPC::X23, -72},      \
154       {PPC::X22, -80},      \
155       {PPC::X21, -88},      \
156       {PPC::X20, -96},      \
157       {PPC::X19, -104},     \
158       {PPC::X18, -112},     \
159       {PPC::X17, -120},     \
160       {PPC::X16, -128},     \
161       {PPC::X15, -136},     \
162       {PPC::X14, -144}
163 
164 // Vector register save area offsets.
165 #define CALLEE_SAVED_VRS \
166       {PPC::V31, -16},   \
167       {PPC::V30, -32},   \
168       {PPC::V29, -48},   \
169       {PPC::V28, -64},   \
170       {PPC::V27, -80},   \
171       {PPC::V26, -96},   \
172       {PPC::V25, -112},  \
173       {PPC::V24, -128},  \
174       {PPC::V23, -144},  \
175       {PPC::V22, -160},  \
176       {PPC::V21, -176},  \
177       {PPC::V20, -192}
178 
179   // Note that the offsets here overlap, but this is fixed up in
180   // processFunctionBeforeFrameFinalized.
181 
182   static const SpillSlot ELFOffsets32[] = {
183       CALLEE_SAVED_FPRS,
184       CALLEE_SAVED_GPRS32,
185 
186       // CR save area offset.  We map each of the nonvolatile CR fields
187       // to the slot for CR2, which is the first of the nonvolatile CR
188       // fields to be assigned, so that we only allocate one save slot.
189       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
190       {PPC::CR2, -4},
191 
192       // VRSAVE save area offset.
193       {PPC::VRSAVE, -4},
194 
195       CALLEE_SAVED_VRS,
196 
197       // SPE register save area (overlaps Vector save area).
198       {PPC::S31, -8},
199       {PPC::S30, -16},
200       {PPC::S29, -24},
201       {PPC::S28, -32},
202       {PPC::S27, -40},
203       {PPC::S26, -48},
204       {PPC::S25, -56},
205       {PPC::S24, -64},
206       {PPC::S23, -72},
207       {PPC::S22, -80},
208       {PPC::S21, -88},
209       {PPC::S20, -96},
210       {PPC::S19, -104},
211       {PPC::S18, -112},
212       {PPC::S17, -120},
213       {PPC::S16, -128},
214       {PPC::S15, -136},
215       {PPC::S14, -144}};
216 
217   static const SpillSlot ELFOffsets64[] = {
218       CALLEE_SAVED_FPRS,
219       CALLEE_SAVED_GPRS64,
220 
221       // VRSAVE save area offset.
222       {PPC::VRSAVE, -4},
223       CALLEE_SAVED_VRS
224   };
225 
226   static const SpillSlot AIXOffsets32[] = {
227       CALLEE_SAVED_FPRS,
228       CALLEE_SAVED_GPRS32,
229       // Add AIX's extra CSR.
230       {PPC::R13, -76},
231       // TODO: Update when we add vector support for AIX.
232   };
233 
234   static const SpillSlot AIXOffsets64[] = {
235       CALLEE_SAVED_FPRS,
236       CALLEE_SAVED_GPRS64,
237       // TODO: Update when we add vector support for AIX.
238   };
239 
240   if (Subtarget.is64BitELFABI()) {
241     NumEntries = array_lengthof(ELFOffsets64);
242     return ELFOffsets64;
243   }
244 
245   if (Subtarget.is32BitELFABI()) {
246     NumEntries = array_lengthof(ELFOffsets32);
247     return ELFOffsets32;
248   }
249 
250   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
251 
252   if (Subtarget.isPPC64()) {
253     NumEntries = array_lengthof(AIXOffsets64);
254     return AIXOffsets64;
255   }
256 
257   NumEntries = array_lengthof(AIXOffsets32);
258   return AIXOffsets32;
259 }
260 
261 /// RemoveVRSaveCode - We have found that this function does not need any code
262 /// to manipulate the VRSAVE register, even though it uses vector registers.
263 /// This can happen when the only registers used are known to be live in or out
264 /// of the function.  Remove all of the VRSAVE related code from the function.
265 /// FIXME: The removal of the code results in a compile failure at -O0 when the
266 /// function contains a function call, as the GPR containing original VRSAVE
267 /// contents is spilled and reloaded around the call.  Without the prolog code,
268 /// the spill instruction refers to an undefined register.  This code needs
269 /// to account for all uses of that GPR.
270 static void RemoveVRSaveCode(MachineInstr &MI) {
271   MachineBasicBlock *Entry = MI.getParent();
272   MachineFunction *MF = Entry->getParent();
273 
274   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
275   MachineBasicBlock::iterator MBBI = MI;
276   ++MBBI;
277   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
278   MBBI->eraseFromParent();
279 
280   bool RemovedAllMTVRSAVEs = true;
281   // See if we can find and remove the MTVRSAVE instruction from all of the
282   // epilog blocks.
283   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
284     // If last instruction is a return instruction, add an epilogue
285     if (I->isReturnBlock()) {
286       bool FoundIt = false;
287       for (MBBI = I->end(); MBBI != I->begin(); ) {
288         --MBBI;
289         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
290           MBBI->eraseFromParent();  // remove it.
291           FoundIt = true;
292           break;
293         }
294       }
295       RemovedAllMTVRSAVEs &= FoundIt;
296     }
297   }
298 
299   // If we found and removed all MTVRSAVE instructions, remove the read of
300   // VRSAVE as well.
301   if (RemovedAllMTVRSAVEs) {
302     MBBI = MI;
303     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
304     --MBBI;
305     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
306     MBBI->eraseFromParent();
307   }
308 
309   // Finally, nuke the UPDATE_VRSAVE.
310   MI.eraseFromParent();
311 }
312 
313 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
314 // instruction selector.  Based on the vector registers that have been used,
315 // transform this into the appropriate ORI instruction.
316 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
317   MachineFunction *MF = MI.getParent()->getParent();
318   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
319   DebugLoc dl = MI.getDebugLoc();
320 
321   const MachineRegisterInfo &MRI = MF->getRegInfo();
322   unsigned UsedRegMask = 0;
323   for (unsigned i = 0; i != 32; ++i)
324     if (MRI.isPhysRegModified(VRRegNo[i]))
325       UsedRegMask |= 1 << (31-i);
326 
327   // Live in and live out values already must be in the mask, so don't bother
328   // marking them.
329   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
330     unsigned RegNo = TRI->getEncodingValue(LI.first);
331     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
332       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
333   }
334 
335   // Live out registers appear as use operands on return instructions.
336   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
337        UsedRegMask != 0 && BI != BE; ++BI) {
338     const MachineBasicBlock &MBB = *BI;
339     if (!MBB.isReturnBlock())
340       continue;
341     const MachineInstr &Ret = MBB.back();
342     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
343       const MachineOperand &MO = Ret.getOperand(I);
344       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
345         continue;
346       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
347       UsedRegMask &= ~(1 << (31-RegNo));
348     }
349   }
350 
351   // If no registers are used, turn this into a copy.
352   if (UsedRegMask == 0) {
353     // Remove all VRSAVE code.
354     RemoveVRSaveCode(MI);
355     return;
356   }
357 
358   Register SrcReg = MI.getOperand(1).getReg();
359   Register DstReg = MI.getOperand(0).getReg();
360 
361   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
362     if (DstReg != SrcReg)
363       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
364           .addReg(SrcReg)
365           .addImm(UsedRegMask);
366     else
367       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
368           .addReg(SrcReg, RegState::Kill)
369           .addImm(UsedRegMask);
370   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
371     if (DstReg != SrcReg)
372       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
373           .addReg(SrcReg)
374           .addImm(UsedRegMask >> 16);
375     else
376       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
377           .addReg(SrcReg, RegState::Kill)
378           .addImm(UsedRegMask >> 16);
379   } else {
380     if (DstReg != SrcReg)
381       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
382           .addReg(SrcReg)
383           .addImm(UsedRegMask >> 16);
384     else
385       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
386           .addReg(SrcReg, RegState::Kill)
387           .addImm(UsedRegMask >> 16);
388 
389     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
390         .addReg(DstReg, RegState::Kill)
391         .addImm(UsedRegMask & 0xFFFF);
392   }
393 
394   // Remove the old UPDATE_VRSAVE instruction.
395   MI.eraseFromParent();
396 }
397 
398 static bool spillsCR(const MachineFunction &MF) {
399   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
400   return FuncInfo->isCRSpilled();
401 }
402 
403 static bool spillsVRSAVE(const MachineFunction &MF) {
404   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
405   return FuncInfo->isVRSAVESpilled();
406 }
407 
408 static bool hasSpills(const MachineFunction &MF) {
409   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
410   return FuncInfo->hasSpills();
411 }
412 
413 static bool hasNonRISpills(const MachineFunction &MF) {
414   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
415   return FuncInfo->hasNonRISpills();
416 }
417 
418 /// MustSaveLR - Return true if this function requires that we save the LR
419 /// register onto the stack in the prolog and restore it in the epilog of the
420 /// function.
421 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
422   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
423 
424   // We need a save/restore of LR if there is any def of LR (which is
425   // defined by calls, including the PIC setup sequence), or if there is
426   // some use of the LR stack slot (e.g. for builtin_return_address).
427   // (LR comes in 32 and 64 bit versions.)
428   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
429   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
430 }
431 
432 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
433 /// call frame size. Update the MachineFunction object with the stack size.
434 unsigned
435 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
436                                                 bool UseEstimate) const {
437   unsigned NewMaxCallFrameSize = 0;
438   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
439                                             &NewMaxCallFrameSize);
440   MF.getFrameInfo().setStackSize(FrameSize);
441   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
442   return FrameSize;
443 }
444 
445 /// determineFrameLayout - Determine the size of the frame and maximum call
446 /// frame size.
447 unsigned
448 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
449                                        bool UseEstimate,
450                                        unsigned *NewMaxCallFrameSize) const {
451   const MachineFrameInfo &MFI = MF.getFrameInfo();
452   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
453 
454   // Get the number of bytes to allocate from the FrameInfo
455   unsigned FrameSize =
456     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
457 
458   // Get stack alignments. The frame must be aligned to the greatest of these:
459   Align TargetAlign = getStackAlign(); // alignment required per the ABI
460   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
461   Align Alignment = std::max(TargetAlign, MaxAlign);
462 
463   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
464 
465   unsigned LR = RegInfo->getRARegister();
466   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
467   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
468                        !MFI.adjustsStack() &&       // No calls.
469                        !MustSaveLR(MF, LR) &&       // No need to save LR.
470                        !FI->mustSaveTOC() &&        // No need to save TOC.
471                        !RegInfo->hasBasePointer(MF); // No special alignment.
472 
473   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
474   // code if all local vars are reg-allocated.
475   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
476 
477   // Check whether we can skip adjusting the stack pointer (by using red zone)
478   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
479     // No need for frame
480     return 0;
481   }
482 
483   // Get the maximum call frame size of all the calls.
484   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
485 
486   // Maximum call frame needs to be at least big enough for linkage area.
487   unsigned minCallFrameSize = getLinkageSize();
488   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
489 
490   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
491   // that allocations will be aligned.
492   if (MFI.hasVarSizedObjects())
493     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
494 
495   // Update the new max call frame size if the caller passes in a valid pointer.
496   if (NewMaxCallFrameSize)
497     *NewMaxCallFrameSize = maxCallFrameSize;
498 
499   // Include call frame size in total.
500   FrameSize += maxCallFrameSize;
501 
502   // Make sure the frame is aligned.
503   FrameSize = alignTo(FrameSize, Alignment);
504 
505   return FrameSize;
506 }
507 
508 // hasFP - Return true if the specified function actually has a dedicated frame
509 // pointer register.
510 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
511   const MachineFrameInfo &MFI = MF.getFrameInfo();
512   // FIXME: This is pretty much broken by design: hasFP() might be called really
513   // early, before the stack layout was calculated and thus hasFP() might return
514   // true or false here depending on the time of call.
515   return (MFI.getStackSize()) && needsFP(MF);
516 }
517 
518 // needsFP - Return true if the specified function should have a dedicated frame
519 // pointer register.  This is true if the function has variable sized allocas or
520 // if frame pointer elimination is disabled.
521 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
522   const MachineFrameInfo &MFI = MF.getFrameInfo();
523 
524   // Naked functions have no stack frame pushed, so we don't have a frame
525   // pointer.
526   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
527     return false;
528 
529   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
530     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
531     (MF.getTarget().Options.GuaranteedTailCallOpt &&
532      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
533 }
534 
535 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
536   bool is31 = needsFP(MF);
537   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
538   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
539 
540   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
541   bool HasBP = RegInfo->hasBasePointer(MF);
542   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
543   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
544 
545   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
546        BI != BE; ++BI)
547     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
548       --MBBI;
549       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
550         MachineOperand &MO = MBBI->getOperand(I);
551         if (!MO.isReg())
552           continue;
553 
554         switch (MO.getReg()) {
555         case PPC::FP:
556           MO.setReg(FPReg);
557           break;
558         case PPC::FP8:
559           MO.setReg(FP8Reg);
560           break;
561         case PPC::BP:
562           MO.setReg(BPReg);
563           break;
564         case PPC::BP8:
565           MO.setReg(BP8Reg);
566           break;
567 
568         }
569       }
570     }
571 }
572 
573 /*  This function will do the following:
574     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
575       respectively (defaults recommended by the ABI) and return true
576     - If MBB is not an entry block, initialize the register scavenger and look
577       for available registers.
578     - If the defaults (R0/R12) are available, return true
579     - If TwoUniqueRegsRequired is set to true, it looks for two unique
580       registers. Otherwise, look for a single available register.
581       - If the required registers are found, set SR1 and SR2 and return true.
582       - If the required registers are not found, set SR2 or both SR1 and SR2 to
583         PPC::NoRegister and return false.
584 
585     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
586     is not set, this function will attempt to find two different registers, but
587     still return true if only one register is available (and set SR1 == SR2).
588 */
589 bool
590 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
591                                       bool UseAtEnd,
592                                       bool TwoUniqueRegsRequired,
593                                       Register *SR1,
594                                       Register *SR2) const {
595   RegScavenger RS;
596   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
597   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
598 
599   // Set the defaults for the two scratch registers.
600   if (SR1)
601     *SR1 = R0;
602 
603   if (SR2) {
604     assert (SR1 && "Asking for the second scratch register but not the first?");
605     *SR2 = R12;
606   }
607 
608   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
609   if ((UseAtEnd && MBB->isReturnBlock()) ||
610       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
611     return true;
612 
613   RS.enterBasicBlock(*MBB);
614 
615   if (UseAtEnd && !MBB->empty()) {
616     // The scratch register will be used at the end of the block, so must
617     // consider all registers used within the block
618 
619     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
620     // If no terminator, back iterator up to previous instruction.
621     if (MBBI == MBB->end())
622       MBBI = std::prev(MBBI);
623 
624     if (MBBI != MBB->begin())
625       RS.forward(MBBI);
626   }
627 
628   // If the two registers are available, we're all good.
629   // Note that we only return here if both R0 and R12 are available because
630   // although the function may not require two unique registers, it may benefit
631   // from having two so we should try to provide them.
632   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
633     return true;
634 
635   // Get the list of callee-saved registers for the target.
636   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
637   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
638 
639   // Get all the available registers in the block.
640   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
641                                      &PPC::GPRCRegClass);
642 
643   // We shouldn't use callee-saved registers as scratch registers as they may be
644   // available when looking for a candidate block for shrink wrapping but not
645   // available when the actual prologue/epilogue is being emitted because they
646   // were added as live-in to the prologue block by PrologueEpilogueInserter.
647   for (int i = 0; CSRegs[i]; ++i)
648     BV.reset(CSRegs[i]);
649 
650   // Set the first scratch register to the first available one.
651   if (SR1) {
652     int FirstScratchReg = BV.find_first();
653     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
654   }
655 
656   // If there is another one available, set the second scratch register to that.
657   // Otherwise, set it to either PPC::NoRegister if this function requires two
658   // or to whatever SR1 is set to if this function doesn't require two.
659   if (SR2) {
660     int SecondScratchReg = BV.find_next(*SR1);
661     if (SecondScratchReg != -1)
662       *SR2 = SecondScratchReg;
663     else
664       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
665   }
666 
667   // Now that we've done our best to provide both registers, double check
668   // whether we were unable to provide enough.
669   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
670     return false;
671 
672   return true;
673 }
674 
675 // We need a scratch register for spilling LR and for spilling CR. By default,
676 // we use two scratch registers to hide latency. However, if only one scratch
677 // register is available, we can adjust for that by not overlapping the spill
678 // code. However, if we need to realign the stack (i.e. have a base pointer)
679 // and the stack frame is large, we need two scratch registers.
680 bool
681 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
682   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
683   MachineFunction &MF = *(MBB->getParent());
684   bool HasBP = RegInfo->hasBasePointer(MF);
685   unsigned FrameSize = determineFrameLayout(MF);
686   int NegFrameSize = -FrameSize;
687   bool IsLargeFrame = !isInt<16>(NegFrameSize);
688   MachineFrameInfo &MFI = MF.getFrameInfo();
689   Align MaxAlign = MFI.getMaxAlign();
690   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
691 
692   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
693 }
694 
695 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
696   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
697 
698   return findScratchRegister(TmpMBB, false,
699                              twoUniqueScratchRegsRequired(TmpMBB));
700 }
701 
702 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
703   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
704 
705   return findScratchRegister(TmpMBB, true);
706 }
707 
708 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
709   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
710   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
711 
712   // Abort if there is no register info or function info.
713   if (!RegInfo || !FI)
714     return false;
715 
716   // Only move the stack update on ELFv2 ABI and PPC64.
717   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
718     return false;
719 
720   // Check the frame size first and return false if it does not fit the
721   // requirements.
722   // We need a non-zero frame size as well as a frame that will fit in the red
723   // zone. This is because by moving the stack pointer update we are now storing
724   // to the red zone until the stack pointer is updated. If we get an interrupt
725   // inside the prologue but before the stack update we now have a number of
726   // stores to the red zone and those stores must all fit.
727   MachineFrameInfo &MFI = MF.getFrameInfo();
728   unsigned FrameSize = MFI.getStackSize();
729   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
730     return false;
731 
732   // Frame pointers and base pointers complicate matters so don't do anything
733   // if we have them. For example having a frame pointer will sometimes require
734   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
735   // difficult.
736   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
737     return false;
738 
739   // Calls to fast_cc functions use different rules for passing parameters on
740   // the stack from the ABI and using PIC base in the function imposes
741   // similar restrictions to using the base pointer. It is not generally safe
742   // to move the stack pointer update in these situations.
743   if (FI->hasFastCall() || FI->usesPICBase())
744     return false;
745 
746   // Finally we can move the stack update if we do not require register
747   // scavenging. Register scavenging can introduce more spills and so
748   // may make the frame size larger than we have computed.
749   return !RegInfo->requiresFrameIndexScavenging(MF);
750 }
751 
752 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
753                                     MachineBasicBlock &MBB) const {
754   MachineBasicBlock::iterator MBBI = MBB.begin();
755   MachineFrameInfo &MFI = MF.getFrameInfo();
756   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
757   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
758 
759   MachineModuleInfo &MMI = MF.getMMI();
760   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
761   DebugLoc dl;
762   // AIX assembler does not support cfi directives.
763   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
764 
765   // Get processor type.
766   bool isPPC64 = Subtarget.isPPC64();
767   // Get the ABI.
768   bool isSVR4ABI = Subtarget.isSVR4ABI();
769   bool isAIXABI = Subtarget.isAIXABI();
770   bool isELFv2ABI = Subtarget.isELFv2ABI();
771   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
772 
773   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
774   // process it.
775   if (!isSVR4ABI)
776     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
777       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
778         if (isAIXABI)
779           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
780         HandleVRSaveUpdate(*MBBI, TII);
781         break;
782       }
783     }
784 
785   // Move MBBI back to the beginning of the prologue block.
786   MBBI = MBB.begin();
787 
788   // Work out frame sizes.
789   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
790   int NegFrameSize = -FrameSize;
791   if (!isInt<32>(NegFrameSize))
792     llvm_unreachable("Unhandled stack size!");
793 
794   if (MFI.isFrameAddressTaken())
795     replaceFPWithRealFP(MF);
796 
797   // Check if the link register (LR) must be saved.
798   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
799   bool MustSaveLR = FI->mustSaveLR();
800   bool MustSaveTOC = FI->mustSaveTOC();
801   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
802   bool MustSaveCR = !MustSaveCRs.empty();
803   // Do we have a frame pointer and/or base pointer for this function?
804   bool HasFP = hasFP(MF);
805   bool HasBP = RegInfo->hasBasePointer(MF);
806   bool HasRedZone = isPPC64 || !isSVR4ABI;
807 
808   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
809   Register BPReg = RegInfo->getBaseRegister(MF);
810   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
811   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
812   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
813   Register ScratchReg;
814   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
815   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
816   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
817                                                 : PPC::MFLR );
818   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
819                                                  : PPC::STW );
820   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
821                                                      : PPC::STWU );
822   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
823                                                         : PPC::STWUX);
824   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
825                                                           : PPC::LIS );
826   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
827                                                  : PPC::ORI );
828   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
829                                               : PPC::OR );
830   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
831                                                             : PPC::SUBFC);
832   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
833                                                                : PPC::SUBFIC);
834   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
835                                                            : PPC::MFCR);
836   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
837 
838   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
839   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
840   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
841   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
842   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
843          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
844 
845   // Using the same bool variable as below to suppress compiler warnings.
846   bool SingleScratchReg =
847     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
848                         &ScratchReg, &TempReg);
849   assert(SingleScratchReg &&
850          "Required number of registers not available in this block");
851 
852   SingleScratchReg = ScratchReg == TempReg;
853 
854   int LROffset = getReturnSaveOffset();
855 
856   int FPOffset = 0;
857   if (HasFP) {
858     if (isSVR4ABI) {
859       MachineFrameInfo &MFI = MF.getFrameInfo();
860       int FPIndex = FI->getFramePointerSaveIndex();
861       assert(FPIndex && "No Frame Pointer Save Slot!");
862       FPOffset = MFI.getObjectOffset(FPIndex);
863     } else {
864       FPOffset = getFramePointerSaveOffset();
865     }
866   }
867 
868   int BPOffset = 0;
869   if (HasBP) {
870     if (isSVR4ABI) {
871       MachineFrameInfo &MFI = MF.getFrameInfo();
872       int BPIndex = FI->getBasePointerSaveIndex();
873       assert(BPIndex && "No Base Pointer Save Slot!");
874       BPOffset = MFI.getObjectOffset(BPIndex);
875     } else {
876       BPOffset = getBasePointerSaveOffset();
877     }
878   }
879 
880   int PBPOffset = 0;
881   if (FI->usesPICBase()) {
882     MachineFrameInfo &MFI = MF.getFrameInfo();
883     int PBPIndex = FI->getPICBasePointerSaveIndex();
884     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
885     PBPOffset = MFI.getObjectOffset(PBPIndex);
886   }
887 
888   // Get stack alignments.
889   Align MaxAlign = MFI.getMaxAlign();
890   if (HasBP && MaxAlign > 1)
891     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
892 
893   // Frames of 32KB & larger require special handling because they cannot be
894   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
895   bool isLargeFrame = !isInt<16>(NegFrameSize);
896 
897   // Check if we can move the stack update instruction (stdu) down the prologue
898   // past the callee saves. Hopefully this will avoid the situation where the
899   // saves are waiting for the update on the store with update to complete.
900   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
901   bool MovingStackUpdateDown = false;
902 
903   // Check if we can move the stack update.
904   if (stackUpdateCanBeMoved(MF)) {
905     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
906     for (CalleeSavedInfo CSI : Info) {
907       int FrIdx = CSI.getFrameIdx();
908       // If the frame index is not negative the callee saved info belongs to a
909       // stack object that is not a fixed stack object. We ignore non-fixed
910       // stack objects because we won't move the stack update pointer past them.
911       if (FrIdx >= 0)
912         continue;
913 
914       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
915         StackUpdateLoc++;
916         MovingStackUpdateDown = true;
917       } else {
918         // We need all of the Frame Indices to meet these conditions.
919         // If they do not, abort the whole operation.
920         StackUpdateLoc = MBBI;
921         MovingStackUpdateDown = false;
922         break;
923       }
924     }
925 
926     // If the operation was not aborted then update the object offset.
927     if (MovingStackUpdateDown) {
928       for (CalleeSavedInfo CSI : Info) {
929         int FrIdx = CSI.getFrameIdx();
930         if (FrIdx < 0)
931           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
932       }
933     }
934   }
935 
936   // Where in the prologue we move the CR fields depends on how many scratch
937   // registers we have, and if we need to save the link register or not. This
938   // lambda is to avoid duplicating the logic in 2 places.
939   auto BuildMoveFromCR = [&]() {
940     if (isELFv2ABI && MustSaveCRs.size() == 1) {
941     // In the ELFv2 ABI, we are not required to save all CR fields.
942     // If only one CR field is clobbered, it is more efficient to use
943     // mfocrf to selectively save just that field, because mfocrf has short
944     // latency compares to mfcr.
945       assert(isPPC64 && "V2 ABI is 64-bit only.");
946       MachineInstrBuilder MIB =
947           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
948       MIB.addReg(MustSaveCRs[0], RegState::Kill);
949     } else {
950       MachineInstrBuilder MIB =
951           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
952       for (unsigned CRfield : MustSaveCRs)
953         MIB.addReg(CRfield, RegState::ImplicitKill);
954     }
955   };
956 
957   // If we need to spill the CR and the LR but we don't have two separate
958   // registers available, we must spill them one at a time
959   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
960     BuildMoveFromCR();
961     BuildMI(MBB, MBBI, dl, StoreWordInst)
962         .addReg(TempReg, getKillRegState(true))
963         .addImm(CRSaveOffset)
964         .addReg(SPReg);
965   }
966 
967   if (MustSaveLR)
968     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
969 
970   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
971     BuildMoveFromCR();
972 
973   if (HasRedZone) {
974     if (HasFP)
975       BuildMI(MBB, MBBI, dl, StoreInst)
976         .addReg(FPReg)
977         .addImm(FPOffset)
978         .addReg(SPReg);
979     if (FI->usesPICBase())
980       BuildMI(MBB, MBBI, dl, StoreInst)
981         .addReg(PPC::R30)
982         .addImm(PBPOffset)
983         .addReg(SPReg);
984     if (HasBP)
985       BuildMI(MBB, MBBI, dl, StoreInst)
986         .addReg(BPReg)
987         .addImm(BPOffset)
988         .addReg(SPReg);
989   }
990 
991   if (MustSaveLR)
992     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
993       .addReg(ScratchReg, getKillRegState(true))
994       .addImm(LROffset)
995       .addReg(SPReg);
996 
997   if (MustSaveCR &&
998       !(SingleScratchReg && MustSaveLR)) {
999     assert(HasRedZone && "A red zone is always available on PPC64");
1000     BuildMI(MBB, MBBI, dl, StoreWordInst)
1001       .addReg(TempReg, getKillRegState(true))
1002       .addImm(CRSaveOffset)
1003       .addReg(SPReg);
1004   }
1005 
1006   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1007   if (!FrameSize)
1008     return;
1009 
1010   // Adjust stack pointer: r1 += NegFrameSize.
1011   // If there is a preferred stack alignment, align R1 now
1012 
1013   if (HasBP && HasRedZone) {
1014     // Save a copy of r1 as the base pointer.
1015     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1016       .addReg(SPReg)
1017       .addReg(SPReg);
1018   }
1019 
1020   // Have we generated a STUX instruction to claim stack frame? If so,
1021   // the negated frame size will be placed in ScratchReg.
1022   bool HasSTUX = false;
1023 
1024   // This condition must be kept in sync with canUseAsPrologue.
1025   if (HasBP && MaxAlign > 1) {
1026     if (isPPC64)
1027       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1028           .addReg(SPReg)
1029           .addImm(0)
1030           .addImm(64 - Log2(MaxAlign));
1031     else // PPC32...
1032       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1033           .addReg(SPReg)
1034           .addImm(0)
1035           .addImm(32 - Log2(MaxAlign))
1036           .addImm(31);
1037     if (!isLargeFrame) {
1038       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1039         .addReg(ScratchReg, RegState::Kill)
1040         .addImm(NegFrameSize);
1041     } else {
1042       assert(!SingleScratchReg && "Only a single scratch reg available");
1043       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1044         .addImm(NegFrameSize >> 16);
1045       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1046         .addReg(TempReg, RegState::Kill)
1047         .addImm(NegFrameSize & 0xFFFF);
1048       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1049         .addReg(ScratchReg, RegState::Kill)
1050         .addReg(TempReg, RegState::Kill);
1051     }
1052 
1053     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1054       .addReg(SPReg, RegState::Kill)
1055       .addReg(SPReg)
1056       .addReg(ScratchReg);
1057     HasSTUX = true;
1058 
1059   } else if (!isLargeFrame) {
1060     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1061       .addReg(SPReg)
1062       .addImm(NegFrameSize)
1063       .addReg(SPReg);
1064 
1065   } else {
1066     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1067       .addImm(NegFrameSize >> 16);
1068     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1069       .addReg(ScratchReg, RegState::Kill)
1070       .addImm(NegFrameSize & 0xFFFF);
1071     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1072       .addReg(SPReg, RegState::Kill)
1073       .addReg(SPReg)
1074       .addReg(ScratchReg);
1075     HasSTUX = true;
1076   }
1077 
1078   // Save the TOC register after the stack pointer update if a prologue TOC
1079   // save is required for the function.
1080   if (MustSaveTOC) {
1081     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1082     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1083       .addReg(TOCReg, getKillRegState(true))
1084       .addImm(TOCSaveOffset)
1085       .addReg(SPReg);
1086   }
1087 
1088   if (!HasRedZone) {
1089     assert(!isPPC64 && "A red zone is always available on PPC64");
1090     if (HasSTUX) {
1091       // The negated frame size is in ScratchReg, and the SPReg has been
1092       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1093       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1094       // the stack frame (i.e. the old SP), ideally, we would put the old
1095       // SP into a register and use it as the base for the stores. The
1096       // problem is that the only available register may be ScratchReg,
1097       // which could be R0, and R0 cannot be used as a base address.
1098 
1099       // First, set ScratchReg to the old SP. This may need to be modified
1100       // later.
1101       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1102         .addReg(ScratchReg, RegState::Kill)
1103         .addReg(SPReg);
1104 
1105       if (ScratchReg == PPC::R0) {
1106         // R0 cannot be used as a base register, but it can be used as an
1107         // index in a store-indexed.
1108         int LastOffset = 0;
1109         if (HasFP)  {
1110           // R0 += (FPOffset-LastOffset).
1111           // Need addic, since addi treats R0 as 0.
1112           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1113             .addReg(ScratchReg)
1114             .addImm(FPOffset-LastOffset);
1115           LastOffset = FPOffset;
1116           // Store FP into *R0.
1117           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1118             .addReg(FPReg, RegState::Kill)  // Save FP.
1119             .addReg(PPC::ZERO)
1120             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1121         }
1122         if (FI->usesPICBase()) {
1123           // R0 += (PBPOffset-LastOffset).
1124           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1125             .addReg(ScratchReg)
1126             .addImm(PBPOffset-LastOffset);
1127           LastOffset = PBPOffset;
1128           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1129             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1130             .addReg(PPC::ZERO)
1131             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1132         }
1133         if (HasBP) {
1134           // R0 += (BPOffset-LastOffset).
1135           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1136             .addReg(ScratchReg)
1137             .addImm(BPOffset-LastOffset);
1138           LastOffset = BPOffset;
1139           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1140             .addReg(BPReg, RegState::Kill)  // Save BP.
1141             .addReg(PPC::ZERO)
1142             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1143           // BP = R0-LastOffset
1144           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1145             .addReg(ScratchReg, RegState::Kill)
1146             .addImm(-LastOffset);
1147         }
1148       } else {
1149         // ScratchReg is not R0, so use it as the base register. It is
1150         // already set to the old SP, so we can use the offsets directly.
1151 
1152         // Now that the stack frame has been allocated, save all the necessary
1153         // registers using ScratchReg as the base address.
1154         if (HasFP)
1155           BuildMI(MBB, MBBI, dl, StoreInst)
1156             .addReg(FPReg)
1157             .addImm(FPOffset)
1158             .addReg(ScratchReg);
1159         if (FI->usesPICBase())
1160           BuildMI(MBB, MBBI, dl, StoreInst)
1161             .addReg(PPC::R30)
1162             .addImm(PBPOffset)
1163             .addReg(ScratchReg);
1164         if (HasBP) {
1165           BuildMI(MBB, MBBI, dl, StoreInst)
1166             .addReg(BPReg)
1167             .addImm(BPOffset)
1168             .addReg(ScratchReg);
1169           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1170             .addReg(ScratchReg, RegState::Kill)
1171             .addReg(ScratchReg);
1172         }
1173       }
1174     } else {
1175       // The frame size is a known 16-bit constant (fitting in the immediate
1176       // field of STWU). To be here we have to be compiling for PPC32.
1177       // Since the SPReg has been decreased by FrameSize, add it back to each
1178       // offset.
1179       if (HasFP)
1180         BuildMI(MBB, MBBI, dl, StoreInst)
1181           .addReg(FPReg)
1182           .addImm(FrameSize + FPOffset)
1183           .addReg(SPReg);
1184       if (FI->usesPICBase())
1185         BuildMI(MBB, MBBI, dl, StoreInst)
1186           .addReg(PPC::R30)
1187           .addImm(FrameSize + PBPOffset)
1188           .addReg(SPReg);
1189       if (HasBP) {
1190         BuildMI(MBB, MBBI, dl, StoreInst)
1191           .addReg(BPReg)
1192           .addImm(FrameSize + BPOffset)
1193           .addReg(SPReg);
1194         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1195           .addReg(SPReg)
1196           .addImm(FrameSize);
1197       }
1198     }
1199   }
1200 
1201   // Add Call Frame Information for the instructions we generated above.
1202   if (needsCFI) {
1203     unsigned CFIIndex;
1204 
1205     if (HasBP) {
1206       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1207       // because if the stack needed aligning then CFA won't be at a fixed
1208       // offset from FP/SP.
1209       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1210       CFIIndex = MF.addFrameInst(
1211           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1212     } else {
1213       // Adjust the definition of CFA to account for the change in SP.
1214       assert(NegFrameSize);
1215       CFIIndex = MF.addFrameInst(
1216           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1217     }
1218     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1219         .addCFIIndex(CFIIndex);
1220 
1221     if (HasFP) {
1222       // Describe where FP was saved, at a fixed offset from CFA.
1223       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1224       CFIIndex = MF.addFrameInst(
1225           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1226       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1227           .addCFIIndex(CFIIndex);
1228     }
1229 
1230     if (FI->usesPICBase()) {
1231       // Describe where FP was saved, at a fixed offset from CFA.
1232       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1233       CFIIndex = MF.addFrameInst(
1234           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1235       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1236           .addCFIIndex(CFIIndex);
1237     }
1238 
1239     if (HasBP) {
1240       // Describe where BP was saved, at a fixed offset from CFA.
1241       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1242       CFIIndex = MF.addFrameInst(
1243           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1244       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1245           .addCFIIndex(CFIIndex);
1246     }
1247 
1248     if (MustSaveLR) {
1249       // Describe where LR was saved, at a fixed offset from CFA.
1250       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1251       CFIIndex = MF.addFrameInst(
1252           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1253       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1254           .addCFIIndex(CFIIndex);
1255     }
1256   }
1257 
1258   // If there is a frame pointer, copy R1 into R31
1259   if (HasFP) {
1260     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1261       .addReg(SPReg)
1262       .addReg(SPReg);
1263 
1264     if (!HasBP && needsCFI) {
1265       // Change the definition of CFA from SP+offset to FP+offset, because SP
1266       // will change at every alloca.
1267       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1268       unsigned CFIIndex = MF.addFrameInst(
1269           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1270 
1271       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1272           .addCFIIndex(CFIIndex);
1273     }
1274   }
1275 
1276   if (needsCFI) {
1277     // Describe where callee saved registers were saved, at fixed offsets from
1278     // CFA.
1279     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1280     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1281       unsigned Reg = CSI[I].getReg();
1282       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1283 
1284       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1285       // subregisters of CR2. We just need to emit a move of CR2.
1286       if (PPC::CRBITRCRegClass.contains(Reg))
1287         continue;
1288 
1289       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1290         continue;
1291 
1292       // For SVR4, don't emit a move for the CR spill slot if we haven't
1293       // spilled CRs.
1294       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1295           && !MustSaveCR)
1296         continue;
1297 
1298       // For 64-bit SVR4 when we have spilled CRs, the spill location
1299       // is SP+8, not a frame-relative slot.
1300       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1301         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1302         // the whole CR word.  In the ELFv2 ABI, every CR that was
1303         // actually saved gets its own CFI record.
1304         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1305         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1306             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1307         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1308             .addCFIIndex(CFIIndex);
1309         continue;
1310       }
1311 
1312       if (CSI[I].isSpilledToReg()) {
1313         unsigned SpilledReg = CSI[I].getDstReg();
1314         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1315             nullptr, MRI->getDwarfRegNum(Reg, true),
1316             MRI->getDwarfRegNum(SpilledReg, true)));
1317         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1318           .addCFIIndex(CFIRegister);
1319       } else {
1320         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1321         // We have changed the object offset above but we do not want to change
1322         // the actual offsets in the CFI instruction so we have to undo the
1323         // offset change here.
1324         if (MovingStackUpdateDown)
1325           Offset -= NegFrameSize;
1326 
1327         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1328             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1329         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1330             .addCFIIndex(CFIIndex);
1331       }
1332     }
1333   }
1334 }
1335 
1336 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1337                                     MachineBasicBlock &MBB) const {
1338   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1339   DebugLoc dl;
1340 
1341   if (MBBI != MBB.end())
1342     dl = MBBI->getDebugLoc();
1343 
1344   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1345   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1346 
1347   // Get alignment info so we know how to restore the SP.
1348   const MachineFrameInfo &MFI = MF.getFrameInfo();
1349 
1350   // Get the number of bytes allocated from the FrameInfo.
1351   int FrameSize = MFI.getStackSize();
1352 
1353   // Get processor type.
1354   bool isPPC64 = Subtarget.isPPC64();
1355   // Get the ABI.
1356   bool isSVR4ABI = Subtarget.isSVR4ABI();
1357 
1358   // Check if the link register (LR) has been saved.
1359   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1360   bool MustSaveLR = FI->mustSaveLR();
1361   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1362   bool MustSaveCR = !MustSaveCRs.empty();
1363   // Do we have a frame pointer and/or base pointer for this function?
1364   bool HasFP = hasFP(MF);
1365   bool HasBP = RegInfo->hasBasePointer(MF);
1366   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1367 
1368   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1369   Register BPReg = RegInfo->getBaseRegister(MF);
1370   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1371   Register ScratchReg;
1372   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1373   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1374                                                  : PPC::MTLR );
1375   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1376                                                  : PPC::LWZ );
1377   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1378                                                            : PPC::LIS );
1379   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1380                                               : PPC::OR );
1381   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1382                                                   : PPC::ORI );
1383   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1384                                                    : PPC::ADDI );
1385   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1386                                                 : PPC::ADD4 );
1387   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1388                                                      : PPC::LWZ);
1389   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1390                                                      : PPC::MTOCRF);
1391   int LROffset = getReturnSaveOffset();
1392 
1393   int FPOffset = 0;
1394 
1395   // Using the same bool variable as below to suppress compiler warnings.
1396   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1397                                               &TempReg);
1398   assert(SingleScratchReg &&
1399          "Could not find an available scratch register");
1400 
1401   SingleScratchReg = ScratchReg == TempReg;
1402 
1403   if (HasFP) {
1404     if (isSVR4ABI) {
1405       int FPIndex = FI->getFramePointerSaveIndex();
1406       assert(FPIndex && "No Frame Pointer Save Slot!");
1407       FPOffset = MFI.getObjectOffset(FPIndex);
1408     } else {
1409       FPOffset = getFramePointerSaveOffset();
1410     }
1411   }
1412 
1413   int BPOffset = 0;
1414   if (HasBP) {
1415     if (isSVR4ABI) {
1416       int BPIndex = FI->getBasePointerSaveIndex();
1417       assert(BPIndex && "No Base Pointer Save Slot!");
1418       BPOffset = MFI.getObjectOffset(BPIndex);
1419     } else {
1420       BPOffset = getBasePointerSaveOffset();
1421     }
1422   }
1423 
1424   int PBPOffset = 0;
1425   if (FI->usesPICBase()) {
1426     int PBPIndex = FI->getPICBasePointerSaveIndex();
1427     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1428     PBPOffset = MFI.getObjectOffset(PBPIndex);
1429   }
1430 
1431   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1432 
1433   if (IsReturnBlock) {
1434     unsigned RetOpcode = MBBI->getOpcode();
1435     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1436                       RetOpcode == PPC::TCRETURNdi ||
1437                       RetOpcode == PPC::TCRETURNai ||
1438                       RetOpcode == PPC::TCRETURNri8 ||
1439                       RetOpcode == PPC::TCRETURNdi8 ||
1440                       RetOpcode == PPC::TCRETURNai8;
1441 
1442     if (UsesTCRet) {
1443       int MaxTCRetDelta = FI->getTailCallSPDelta();
1444       MachineOperand &StackAdjust = MBBI->getOperand(1);
1445       assert(StackAdjust.isImm() && "Expecting immediate value.");
1446       // Adjust stack pointer.
1447       int StackAdj = StackAdjust.getImm();
1448       int Delta = StackAdj - MaxTCRetDelta;
1449       assert((Delta >= 0) && "Delta must be positive");
1450       if (MaxTCRetDelta>0)
1451         FrameSize += (StackAdj +Delta);
1452       else
1453         FrameSize += StackAdj;
1454     }
1455   }
1456 
1457   // Frames of 32KB & larger require special handling because they cannot be
1458   // indexed into with a simple LD/LWZ immediate offset operand.
1459   bool isLargeFrame = !isInt<16>(FrameSize);
1460 
1461   // On targets without red zone, the SP needs to be restored last, so that
1462   // all live contents of the stack frame are upwards of the SP. This means
1463   // that we cannot restore SP just now, since there may be more registers
1464   // to restore from the stack frame (e.g. R31). If the frame size is not
1465   // a simple immediate value, we will need a spare register to hold the
1466   // restored SP. If the frame size is known and small, we can simply adjust
1467   // the offsets of the registers to be restored, and still use SP to restore
1468   // them. In such case, the final update of SP will be to add the frame
1469   // size to it.
1470   // To simplify the code, set RBReg to the base register used to restore
1471   // values from the stack, and set SPAdd to the value that needs to be added
1472   // to the SP at the end. The default values are as if red zone was present.
1473   unsigned RBReg = SPReg;
1474   unsigned SPAdd = 0;
1475 
1476   // Check if we can move the stack update instruction up the epilogue
1477   // past the callee saves. This will allow the move to LR instruction
1478   // to be executed before the restores of the callee saves which means
1479   // that the callee saves can hide the latency from the MTLR instrcution.
1480   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1481   if (stackUpdateCanBeMoved(MF)) {
1482     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1483     for (CalleeSavedInfo CSI : Info) {
1484       int FrIdx = CSI.getFrameIdx();
1485       // If the frame index is not negative the callee saved info belongs to a
1486       // stack object that is not a fixed stack object. We ignore non-fixed
1487       // stack objects because we won't move the update of the stack pointer
1488       // past them.
1489       if (FrIdx >= 0)
1490         continue;
1491 
1492       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1493         StackUpdateLoc--;
1494       else {
1495         // Abort the operation as we can't update all CSR restores.
1496         StackUpdateLoc = MBBI;
1497         break;
1498       }
1499     }
1500   }
1501 
1502   if (FrameSize) {
1503     // In the prologue, the loaded (or persistent) stack pointer value is
1504     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1505     // zone add this offset back now.
1506 
1507     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1508     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1509     // call which invalidates the stack pointer value in SP(0). So we use the
1510     // value of R31 in this case.
1511     if (FI->hasFastCall()) {
1512       assert(HasFP && "Expecting a valid frame pointer.");
1513       if (!HasRedZone)
1514         RBReg = FPReg;
1515       if (!isLargeFrame) {
1516         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1517           .addReg(FPReg).addImm(FrameSize);
1518       } else {
1519         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1520           .addImm(FrameSize >> 16);
1521         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1522           .addReg(ScratchReg, RegState::Kill)
1523           .addImm(FrameSize & 0xFFFF);
1524         BuildMI(MBB, MBBI, dl, AddInst)
1525           .addReg(RBReg)
1526           .addReg(FPReg)
1527           .addReg(ScratchReg);
1528       }
1529     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1530       if (HasRedZone) {
1531         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1532           .addReg(SPReg)
1533           .addImm(FrameSize);
1534       } else {
1535         // Make sure that adding FrameSize will not overflow the max offset
1536         // size.
1537         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1538                "Local offsets should be negative");
1539         SPAdd = FrameSize;
1540         FPOffset += FrameSize;
1541         BPOffset += FrameSize;
1542         PBPOffset += FrameSize;
1543       }
1544     } else {
1545       // We don't want to use ScratchReg as a base register, because it
1546       // could happen to be R0. Use FP instead, but make sure to preserve it.
1547       if (!HasRedZone) {
1548         // If FP is not saved, copy it to ScratchReg.
1549         if (!HasFP)
1550           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1551             .addReg(FPReg)
1552             .addReg(FPReg);
1553         RBReg = FPReg;
1554       }
1555       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1556         .addImm(0)
1557         .addReg(SPReg);
1558     }
1559   }
1560   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1561   // If there is no red zone, ScratchReg may be needed for holding a useful
1562   // value (although not the base register). Make sure it is not overwritten
1563   // too early.
1564 
1565   // If we need to restore both the LR and the CR and we only have one
1566   // available scratch register, we must do them one at a time.
1567   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1568     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1569     // is live here.
1570     assert(HasRedZone && "Expecting red zone");
1571     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1572       .addImm(CRSaveOffset)
1573       .addReg(SPReg);
1574     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1575       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1576         .addReg(TempReg, getKillRegState(i == e-1));
1577   }
1578 
1579   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1580   // LR is stored in the caller's stack frame. ScratchReg will be needed
1581   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1582   // a base register anyway, because it may happen to be R0.
1583   bool LoadedLR = false;
1584   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1585     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1586       .addImm(LROffset+SPAdd)
1587       .addReg(RBReg);
1588     LoadedLR = true;
1589   }
1590 
1591   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1592     assert(RBReg == SPReg && "Should be using SP as a base register");
1593     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1594       .addImm(CRSaveOffset)
1595       .addReg(RBReg);
1596   }
1597 
1598   if (HasFP) {
1599     // If there is red zone, restore FP directly, since SP has already been
1600     // restored. Otherwise, restore the value of FP into ScratchReg.
1601     if (HasRedZone || RBReg == SPReg)
1602       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1603         .addImm(FPOffset)
1604         .addReg(SPReg);
1605     else
1606       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1607         .addImm(FPOffset)
1608         .addReg(RBReg);
1609   }
1610 
1611   if (FI->usesPICBase())
1612     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1613       .addImm(PBPOffset)
1614       .addReg(RBReg);
1615 
1616   if (HasBP)
1617     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1618       .addImm(BPOffset)
1619       .addReg(RBReg);
1620 
1621   // There is nothing more to be loaded from the stack, so now we can
1622   // restore SP: SP = RBReg + SPAdd.
1623   if (RBReg != SPReg || SPAdd != 0) {
1624     assert(!HasRedZone && "This should not happen with red zone");
1625     // If SPAdd is 0, generate a copy.
1626     if (SPAdd == 0)
1627       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1628         .addReg(RBReg)
1629         .addReg(RBReg);
1630     else
1631       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1632         .addReg(RBReg)
1633         .addImm(SPAdd);
1634 
1635     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1636     if (RBReg == FPReg)
1637       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1638         .addReg(ScratchReg)
1639         .addReg(ScratchReg);
1640 
1641     // Now load the LR from the caller's stack frame.
1642     if (MustSaveLR && !LoadedLR)
1643       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1644         .addImm(LROffset)
1645         .addReg(SPReg);
1646   }
1647 
1648   if (MustSaveCR &&
1649       !(SingleScratchReg && MustSaveLR))
1650     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1651       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1652         .addReg(TempReg, getKillRegState(i == e-1));
1653 
1654   if (MustSaveLR)
1655     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1656 
1657   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1658   // call optimization
1659   if (IsReturnBlock) {
1660     unsigned RetOpcode = MBBI->getOpcode();
1661     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1662         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1663         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1664       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1665       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1666 
1667       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1668         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1669           .addReg(SPReg).addImm(CallerAllocatedAmt);
1670       } else {
1671         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1672           .addImm(CallerAllocatedAmt >> 16);
1673         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1674           .addReg(ScratchReg, RegState::Kill)
1675           .addImm(CallerAllocatedAmt & 0xFFFF);
1676         BuildMI(MBB, MBBI, dl, AddInst)
1677           .addReg(SPReg)
1678           .addReg(FPReg)
1679           .addReg(ScratchReg);
1680       }
1681     } else {
1682       createTailCallBranchInstr(MBB);
1683     }
1684   }
1685 }
1686 
1687 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1688   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1689 
1690   // If we got this far a first terminator should exist.
1691   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1692 
1693   DebugLoc dl = MBBI->getDebugLoc();
1694   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1695 
1696   // Create branch instruction for pseudo tail call return instruction.
1697   // The TCRETURNdi variants are direct calls. Valid targets for those are
1698   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1699   // since we can tail call external functions with PC-Rel (i.e. we don't need
1700   // to worry about different TOC pointers). Some of the external functions will
1701   // be MO_GlobalAddress while others like memcpy for example, are going to
1702   // be MO_ExternalSymbol.
1703   unsigned RetOpcode = MBBI->getOpcode();
1704   if (RetOpcode == PPC::TCRETURNdi) {
1705     MBBI = MBB.getLastNonDebugInstr();
1706     MachineOperand &JumpTarget = MBBI->getOperand(0);
1707     if (JumpTarget.isGlobal())
1708       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1709         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1710     else if (JumpTarget.isSymbol())
1711       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1712         addExternalSymbol(JumpTarget.getSymbolName());
1713     else
1714       llvm_unreachable("Expecting Global or External Symbol");
1715   } else if (RetOpcode == PPC::TCRETURNri) {
1716     MBBI = MBB.getLastNonDebugInstr();
1717     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1718     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1719   } else if (RetOpcode == PPC::TCRETURNai) {
1720     MBBI = MBB.getLastNonDebugInstr();
1721     MachineOperand &JumpTarget = MBBI->getOperand(0);
1722     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1723   } else if (RetOpcode == PPC::TCRETURNdi8) {
1724     MBBI = MBB.getLastNonDebugInstr();
1725     MachineOperand &JumpTarget = MBBI->getOperand(0);
1726     if (JumpTarget.isGlobal())
1727       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1728         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1729     else if (JumpTarget.isSymbol())
1730       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1731         addExternalSymbol(JumpTarget.getSymbolName());
1732     else
1733       llvm_unreachable("Expecting Global or External Symbol");
1734   } else if (RetOpcode == PPC::TCRETURNri8) {
1735     MBBI = MBB.getLastNonDebugInstr();
1736     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1737     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1738   } else if (RetOpcode == PPC::TCRETURNai8) {
1739     MBBI = MBB.getLastNonDebugInstr();
1740     MachineOperand &JumpTarget = MBBI->getOperand(0);
1741     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1742   }
1743 }
1744 
1745 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1746                                             BitVector &SavedRegs,
1747                                             RegScavenger *RS) const {
1748   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1749 
1750   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1751 
1752   //  Save and clear the LR state.
1753   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1754   unsigned LR = RegInfo->getRARegister();
1755   FI->setMustSaveLR(MustSaveLR(MF, LR));
1756   SavedRegs.reset(LR);
1757 
1758   //  Save R31 if necessary
1759   int FPSI = FI->getFramePointerSaveIndex();
1760   const bool isPPC64 = Subtarget.isPPC64();
1761   MachineFrameInfo &MFI = MF.getFrameInfo();
1762 
1763   // If the frame pointer save index hasn't been defined yet.
1764   if (!FPSI && needsFP(MF)) {
1765     // Find out what the fix offset of the frame pointer save area.
1766     int FPOffset = getFramePointerSaveOffset();
1767     // Allocate the frame index for frame pointer save area.
1768     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1769     // Save the result.
1770     FI->setFramePointerSaveIndex(FPSI);
1771   }
1772 
1773   int BPSI = FI->getBasePointerSaveIndex();
1774   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1775     int BPOffset = getBasePointerSaveOffset();
1776     // Allocate the frame index for the base pointer save area.
1777     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1778     // Save the result.
1779     FI->setBasePointerSaveIndex(BPSI);
1780   }
1781 
1782   // Reserve stack space for the PIC Base register (R30).
1783   // Only used in SVR4 32-bit.
1784   if (FI->usesPICBase()) {
1785     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1786     FI->setPICBasePointerSaveIndex(PBPSI);
1787   }
1788 
1789   // Make sure we don't explicitly spill r31, because, for example, we have
1790   // some inline asm which explicitly clobbers it, when we otherwise have a
1791   // frame pointer and are using r31's spill slot for the prologue/epilogue
1792   // code. Same goes for the base pointer and the PIC base register.
1793   if (needsFP(MF))
1794     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1795   if (RegInfo->hasBasePointer(MF))
1796     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1797   if (FI->usesPICBase())
1798     SavedRegs.reset(PPC::R30);
1799 
1800   // Reserve stack space to move the linkage area to in case of a tail call.
1801   int TCSPDelta = 0;
1802   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1803       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1804     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1805   }
1806 
1807   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1808   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1809   // object at the offset of the CR-save slot in the linkage area. The actual
1810   // save and restore of the condition register will be created as part of the
1811   // prologue and epilogue insertion, but the FixedStack object is needed to
1812   // keep the CalleSavedInfo valid.
1813   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1814        SavedRegs.test(PPC::CR4))) {
1815     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1816     const int64_t SpillOffset =
1817         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1818     int FrameIdx =
1819         MFI.CreateFixedObject(SpillSize, SpillOffset,
1820                               /* IsImmutable */ true, /* IsAliased */ false);
1821     FI->setCRSpillFrameIndex(FrameIdx);
1822   }
1823 }
1824 
1825 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1826                                                        RegScavenger *RS) const {
1827   // Get callee saved register information.
1828   MachineFrameInfo &MFI = MF.getFrameInfo();
1829   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1830 
1831   // If the function is shrink-wrapped, and if the function has a tail call, the
1832   // tail call might not be in the new RestoreBlock, so real branch instruction
1833   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1834   // RestoreBlock. So we handle this case here.
1835   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1836     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1837     for (MachineBasicBlock &MBB : MF) {
1838       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1839         createTailCallBranchInstr(MBB);
1840     }
1841   }
1842 
1843   // Early exit if no callee saved registers are modified!
1844   if (CSI.empty() && !needsFP(MF)) {
1845     addScavengingSpillSlot(MF, RS);
1846     return;
1847   }
1848 
1849   unsigned MinGPR = PPC::R31;
1850   unsigned MinG8R = PPC::X31;
1851   unsigned MinFPR = PPC::F31;
1852   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1853 
1854   bool HasGPSaveArea = false;
1855   bool HasG8SaveArea = false;
1856   bool HasFPSaveArea = false;
1857   bool HasVRSAVESaveArea = false;
1858   bool HasVRSaveArea = false;
1859 
1860   SmallVector<CalleeSavedInfo, 18> GPRegs;
1861   SmallVector<CalleeSavedInfo, 18> G8Regs;
1862   SmallVector<CalleeSavedInfo, 18> FPRegs;
1863   SmallVector<CalleeSavedInfo, 18> VRegs;
1864 
1865   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1866     unsigned Reg = CSI[i].getReg();
1867     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1868             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1869            "Not expecting to try to spill R2 in a function that must save TOC");
1870     if (PPC::GPRCRegClass.contains(Reg)) {
1871       HasGPSaveArea = true;
1872 
1873       GPRegs.push_back(CSI[i]);
1874 
1875       if (Reg < MinGPR) {
1876         MinGPR = Reg;
1877       }
1878     } else if (PPC::G8RCRegClass.contains(Reg)) {
1879       HasG8SaveArea = true;
1880 
1881       G8Regs.push_back(CSI[i]);
1882 
1883       if (Reg < MinG8R) {
1884         MinG8R = Reg;
1885       }
1886     } else if (PPC::F8RCRegClass.contains(Reg)) {
1887       HasFPSaveArea = true;
1888 
1889       FPRegs.push_back(CSI[i]);
1890 
1891       if (Reg < MinFPR) {
1892         MinFPR = Reg;
1893       }
1894     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1895                PPC::CRRCRegClass.contains(Reg)) {
1896       ; // do nothing, as we already know whether CRs are spilled
1897     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1898       HasVRSAVESaveArea = true;
1899     } else if (PPC::VRRCRegClass.contains(Reg) ||
1900                PPC::SPERCRegClass.contains(Reg)) {
1901       // Altivec and SPE are mutually exclusive, but have the same stack
1902       // alignment requirements, so overload the save area for both cases.
1903       HasVRSaveArea = true;
1904 
1905       VRegs.push_back(CSI[i]);
1906 
1907       if (Reg < MinVR) {
1908         MinVR = Reg;
1909       }
1910     } else {
1911       llvm_unreachable("Unknown RegisterClass!");
1912     }
1913   }
1914 
1915   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1916   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1917 
1918   int64_t LowerBound = 0;
1919 
1920   // Take into account stack space reserved for tail calls.
1921   int TCSPDelta = 0;
1922   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1923       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1924     LowerBound = TCSPDelta;
1925   }
1926 
1927   // The Floating-point register save area is right below the back chain word
1928   // of the previous stack frame.
1929   if (HasFPSaveArea) {
1930     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1931       int FI = FPRegs[i].getFrameIdx();
1932 
1933       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1934     }
1935 
1936     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1937   }
1938 
1939   // Check whether the frame pointer register is allocated. If so, make sure it
1940   // is spilled to the correct offset.
1941   if (needsFP(MF)) {
1942     int FI = PFI->getFramePointerSaveIndex();
1943     assert(FI && "No Frame Pointer Save Slot!");
1944     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1945     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1946     HasGPSaveArea = true;
1947   }
1948 
1949   if (PFI->usesPICBase()) {
1950     int FI = PFI->getPICBasePointerSaveIndex();
1951     assert(FI && "No PIC Base Pointer Save Slot!");
1952     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1953 
1954     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1955     HasGPSaveArea = true;
1956   }
1957 
1958   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1959   if (RegInfo->hasBasePointer(MF)) {
1960     int FI = PFI->getBasePointerSaveIndex();
1961     assert(FI && "No Base Pointer Save Slot!");
1962     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1963 
1964     Register BP = RegInfo->getBaseRegister(MF);
1965     if (PPC::G8RCRegClass.contains(BP)) {
1966       MinG8R = std::min<unsigned>(MinG8R, BP);
1967       HasG8SaveArea = true;
1968     } else if (PPC::GPRCRegClass.contains(BP)) {
1969       MinGPR = std::min<unsigned>(MinGPR, BP);
1970       HasGPSaveArea = true;
1971     }
1972   }
1973 
1974   // General register save area starts right below the Floating-point
1975   // register save area.
1976   if (HasGPSaveArea || HasG8SaveArea) {
1977     // Move general register save area spill slots down, taking into account
1978     // the size of the Floating-point register save area.
1979     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1980       if (!GPRegs[i].isSpilledToReg()) {
1981         int FI = GPRegs[i].getFrameIdx();
1982         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1983       }
1984     }
1985 
1986     // Move general register save area spill slots down, taking into account
1987     // the size of the Floating-point register save area.
1988     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1989       if (!G8Regs[i].isSpilledToReg()) {
1990         int FI = G8Regs[i].getFrameIdx();
1991         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1992       }
1993     }
1994 
1995     unsigned MinReg =
1996       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1997                          TRI->getEncodingValue(MinG8R));
1998 
1999     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2000     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2001   }
2002 
2003   // For 32-bit only, the CR save area is below the general register
2004   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2005   // to the stack pointer and hence does not need an adjustment here.
2006   // Only CR2 (the first nonvolatile spilled) has an associated frame
2007   // index so that we have a single uniform save area.
2008   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2009     // Adjust the frame index of the CR spill slot.
2010     for (const auto &CSInfo : CSI) {
2011       if (CSInfo.getReg() == PPC::CR2) {
2012         int FI = CSInfo.getFrameIdx();
2013         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2014         break;
2015       }
2016     }
2017 
2018     LowerBound -= 4; // The CR save area is always 4 bytes long.
2019   }
2020 
2021   if (HasVRSAVESaveArea) {
2022     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2023     //             which have the VRSAVE register class?
2024     // Adjust the frame index of the VRSAVE spill slot.
2025     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2026       unsigned Reg = CSI[i].getReg();
2027 
2028       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2029         int FI = CSI[i].getFrameIdx();
2030 
2031         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2032       }
2033     }
2034 
2035     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2036   }
2037 
2038   // Both Altivec and SPE have the same alignment and padding requirements
2039   // within the stack frame.
2040   if (HasVRSaveArea) {
2041     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2042     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2043     // we are using negative number here (the stack grows downward). We should
2044     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2045     // is the alignment size ( n = 16 here) and y is the size after aligning.
2046     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2047     LowerBound &= ~(15);
2048 
2049     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2050       int FI = VRegs[i].getFrameIdx();
2051 
2052       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2053     }
2054   }
2055 
2056   addScavengingSpillSlot(MF, RS);
2057 }
2058 
2059 void
2060 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2061                                          RegScavenger *RS) const {
2062   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2063   // a large stack, which will require scavenging a register to materialize a
2064   // large offset.
2065 
2066   // We need to have a scavenger spill slot for spills if the frame size is
2067   // large. In case there is no free register for large-offset addressing,
2068   // this slot is used for the necessary emergency spill. Also, we need the
2069   // slot for dynamic stack allocations.
2070 
2071   // The scavenger might be invoked if the frame offset does not fit into
2072   // the 16-bit immediate. We don't know the complete frame size here
2073   // because we've not yet computed callee-saved register spills or the
2074   // needed alignment padding.
2075   unsigned StackSize = determineFrameLayout(MF, true);
2076   MachineFrameInfo &MFI = MF.getFrameInfo();
2077   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2078       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2079     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2080     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2081     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2082     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2083     unsigned Size = TRI.getSpillSize(RC);
2084     unsigned Align = TRI.getSpillAlignment(RC);
2085     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2086 
2087     // Might we have over-aligned allocas?
2088     bool HasAlVars =
2089         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2090 
2091     // These kinds of spills might need two registers.
2092     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2093       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2094 
2095   }
2096 }
2097 
2098 // This function checks if a callee saved gpr can be spilled to a volatile
2099 // vector register. This occurs for leaf functions when the option
2100 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2101 // which were not spilled to vectors, return false so the target independent
2102 // code can handle them by assigning a FrameIdx to a stack slot.
2103 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2104     MachineFunction &MF, const TargetRegisterInfo *TRI,
2105     std::vector<CalleeSavedInfo> &CSI) const {
2106 
2107   if (CSI.empty())
2108     return true; // Early exit if no callee saved registers are modified!
2109 
2110   // Early exit if cannot spill gprs to volatile vector registers.
2111   MachineFrameInfo &MFI = MF.getFrameInfo();
2112   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2113     return false;
2114 
2115   // Build a BitVector of VSRs that can be used for spilling GPRs.
2116   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2117   BitVector BVCalleeSaved(TRI->getNumRegs());
2118   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2119   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2120   for (unsigned i = 0; CSRegs[i]; ++i)
2121     BVCalleeSaved.set(CSRegs[i]);
2122 
2123   for (unsigned Reg : BVAllocatable.set_bits()) {
2124     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2125     // used in the function.
2126     if (BVCalleeSaved[Reg] ||
2127         (!PPC::F8RCRegClass.contains(Reg) &&
2128          !PPC::VFRCRegClass.contains(Reg)) ||
2129         (MF.getRegInfo().isPhysRegUsed(Reg)))
2130       BVAllocatable.reset(Reg);
2131   }
2132 
2133   bool AllSpilledToReg = true;
2134   for (auto &CS : CSI) {
2135     if (BVAllocatable.none())
2136       return false;
2137 
2138     unsigned Reg = CS.getReg();
2139     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2140       AllSpilledToReg = false;
2141       continue;
2142     }
2143 
2144     unsigned VolatileVFReg = BVAllocatable.find_first();
2145     if (VolatileVFReg < BVAllocatable.size()) {
2146       CS.setDstReg(VolatileVFReg);
2147       BVAllocatable.reset(VolatileVFReg);
2148     } else {
2149       AllSpilledToReg = false;
2150     }
2151   }
2152   return AllSpilledToReg;
2153 }
2154 
2155 bool PPCFrameLowering::spillCalleeSavedRegisters(
2156     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2157     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2158 
2159   MachineFunction *MF = MBB.getParent();
2160   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2161   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2162   bool MustSaveTOC = FI->mustSaveTOC();
2163   DebugLoc DL;
2164   bool CRSpilled = false;
2165   MachineInstrBuilder CRMIB;
2166 
2167   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2168     unsigned Reg = CSI[i].getReg();
2169     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2170     if (Reg == PPC::VRSAVE)
2171       continue;
2172 
2173     // CR2 through CR4 are the nonvolatile CR fields.
2174     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2175 
2176     // Add the callee-saved register as live-in; it's killed at the spill.
2177     // Do not do this for callee-saved registers that are live-in to the
2178     // function because they will already be marked live-in and this will be
2179     // adding it for a second time. It is an error to add the same register
2180     // to the set more than once.
2181     const MachineRegisterInfo &MRI = MF->getRegInfo();
2182     bool IsLiveIn = MRI.isLiveIn(Reg);
2183     if (!IsLiveIn)
2184        MBB.addLiveIn(Reg);
2185 
2186     if (CRSpilled && IsCRField) {
2187       CRMIB.addReg(Reg, RegState::ImplicitKill);
2188       continue;
2189     }
2190 
2191     // The actual spill will happen in the prologue.
2192     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2193       continue;
2194 
2195     // Insert the spill to the stack frame.
2196     if (IsCRField) {
2197       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2198       if (!Subtarget.is32BitELFABI()) {
2199         // The actual spill will happen at the start of the prologue.
2200         FuncInfo->addMustSaveCR(Reg);
2201       } else {
2202         CRSpilled = true;
2203         FuncInfo->setSpillsCR();
2204 
2205         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2206         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2207         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2208                   .addReg(Reg, RegState::ImplicitKill);
2209 
2210         MBB.insert(MI, CRMIB);
2211         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2212                                          .addReg(PPC::R12,
2213                                                  getKillRegState(true)),
2214                                          CSI[i].getFrameIdx()));
2215       }
2216     } else {
2217       if (CSI[i].isSpilledToReg()) {
2218         NumPESpillVSR++;
2219         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2220           .addReg(Reg, getKillRegState(true));
2221       } else {
2222         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2223         // Use !IsLiveIn for the kill flag.
2224         // We do not want to kill registers that are live in this function
2225         // before their use because they will become undefined registers.
2226         // Functions without NoUnwind need to preserve the order of elements in
2227         // saved vector registers.
2228         if (Subtarget.needsSwapsForVSXMemOps() &&
2229             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2230           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2231                                        CSI[i].getFrameIdx(), RC, TRI);
2232         else
2233           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2234                                   RC, TRI);
2235       }
2236     }
2237   }
2238   return true;
2239 }
2240 
2241 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2242                        bool CR4Spilled, MachineBasicBlock &MBB,
2243                        MachineBasicBlock::iterator MI,
2244                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2245 
2246   MachineFunction *MF = MBB.getParent();
2247   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2248   DebugLoc DL;
2249   unsigned MoveReg = PPC::R12;
2250 
2251   // 32-bit:  FP-relative
2252   MBB.insert(MI,
2253              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2254                                CSI[CSIIndex].getFrameIdx()));
2255 
2256   unsigned RestoreOp = PPC::MTOCRF;
2257   if (CR2Spilled)
2258     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2259                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2260 
2261   if (CR3Spilled)
2262     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2263                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2264 
2265   if (CR4Spilled)
2266     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2267                .addReg(MoveReg, getKillRegState(true)));
2268 }
2269 
2270 MachineBasicBlock::iterator PPCFrameLowering::
2271 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2272                               MachineBasicBlock::iterator I) const {
2273   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2274   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2275       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2276     // Add (actually subtract) back the amount the callee popped on return.
2277     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2278       bool is64Bit = Subtarget.isPPC64();
2279       CalleeAmt *= -1;
2280       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2281       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2282       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2283       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2284       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2285       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2286       const DebugLoc &dl = I->getDebugLoc();
2287 
2288       if (isInt<16>(CalleeAmt)) {
2289         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2290           .addReg(StackReg, RegState::Kill)
2291           .addImm(CalleeAmt);
2292       } else {
2293         MachineBasicBlock::iterator MBBI = I;
2294         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2295           .addImm(CalleeAmt >> 16);
2296         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2297           .addReg(TmpReg, RegState::Kill)
2298           .addImm(CalleeAmt & 0xFFFF);
2299         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2300           .addReg(StackReg, RegState::Kill)
2301           .addReg(TmpReg);
2302       }
2303     }
2304   }
2305   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2306   return MBB.erase(I);
2307 }
2308 
2309 static bool isCalleeSavedCR(unsigned Reg) {
2310   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2311 }
2312 
2313 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2314     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2315     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2316   MachineFunction *MF = MBB.getParent();
2317   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2318   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2319   bool MustSaveTOC = FI->mustSaveTOC();
2320   bool CR2Spilled = false;
2321   bool CR3Spilled = false;
2322   bool CR4Spilled = false;
2323   unsigned CSIIndex = 0;
2324 
2325   // Initialize insertion-point logic; we will be restoring in reverse
2326   // order of spill.
2327   MachineBasicBlock::iterator I = MI, BeforeI = I;
2328   bool AtStart = I == MBB.begin();
2329 
2330   if (!AtStart)
2331     --BeforeI;
2332 
2333   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2334     unsigned Reg = CSI[i].getReg();
2335 
2336     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2337     if (Reg == PPC::VRSAVE)
2338       continue;
2339 
2340     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2341       continue;
2342 
2343     // Restore of callee saved condition register field is handled during
2344     // epilogue insertion.
2345     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2346       continue;
2347 
2348     if (Reg == PPC::CR2) {
2349       CR2Spilled = true;
2350       // The spill slot is associated only with CR2, which is the
2351       // first nonvolatile spilled.  Save it here.
2352       CSIIndex = i;
2353       continue;
2354     } else if (Reg == PPC::CR3) {
2355       CR3Spilled = true;
2356       continue;
2357     } else if (Reg == PPC::CR4) {
2358       CR4Spilled = true;
2359       continue;
2360     } else {
2361       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2362       // least one CR register, restore all spilled CRs together.
2363       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2364         bool is31 = needsFP(*MF);
2365         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2366                    CSIIndex);
2367         CR2Spilled = CR3Spilled = CR4Spilled = false;
2368       }
2369 
2370       if (CSI[i].isSpilledToReg()) {
2371         DebugLoc DL;
2372         NumPEReloadVSR++;
2373         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2374             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2375       } else {
2376        // Default behavior for non-CR saves.
2377         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2378 
2379         // Functions without NoUnwind need to preserve the order of elements in
2380         // saved vector registers.
2381         if (Subtarget.needsSwapsForVSXMemOps() &&
2382             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2383           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2384                                         TRI);
2385         else
2386           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2387 
2388         assert(I != MBB.begin() &&
2389                "loadRegFromStackSlot didn't insert any code!");
2390       }
2391     }
2392 
2393     // Insert in reverse order.
2394     if (AtStart)
2395       I = MBB.begin();
2396     else {
2397       I = BeforeI;
2398       ++I;
2399     }
2400   }
2401 
2402   // If we haven't yet spilled the CRs, do so now.
2403   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2404     assert(Subtarget.is32BitELFABI() &&
2405            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2406     bool is31 = needsFP(*MF);
2407     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2408   }
2409 
2410   return true;
2411 }
2412 
2413 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2414   return TOCSaveOffset;
2415 }
2416 
2417 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2418   return FramePointerSaveOffset;
2419 }
2420 
2421 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2422   if (Subtarget.isAIXABI())
2423     report_fatal_error("BasePointer is not implemented on AIX yet.");
2424   return BasePointerSaveOffset;
2425 }
2426 
2427 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2428   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2429     return false;
2430   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2431           MF.getSubtarget<PPCSubtarget>().isPPC64());
2432 }
2433