xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 26bf877ec5ce07eaaf2ebf19e78f26fa59a8e41a)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // SVR4 ABI: First slot in the general register save area.
64   return STI.isPPC64() ? -8U : -4U;
65 }
66 
67 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
68   if (STI.isAIXABI() || STI.isPPC64())
69     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
70 
71   // 32-bit SVR4 ABI:
72   return 8;
73 }
74 
75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
76   // SVR4 ABI: First slot in the general register save area.
77   return STI.isPPC64()
78              ? -16U
79              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
80 }
81 
82 static unsigned computeCRSaveOffset() {
83   // The condition register save offset needs to be updated for AIX PPC32.
84   return 8;
85 }
86 
87 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
88     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
89                           STI.getPlatformStackAlignment(), 0),
90       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
91       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
92       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
93       LinkageSize(computeLinkageSize(Subtarget)),
94       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
95       CRSaveOffset(computeCRSaveOffset()) {}
96 
97 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
98 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
99     unsigned &NumEntries) const {
100   // Early exit if not using the SVR4 ABI.
101   if (!Subtarget.isSVR4ABI()) {
102     NumEntries = 0;
103     return nullptr;
104   }
105 
106   // Note that the offsets here overlap, but this is fixed up in
107   // processFunctionBeforeFrameFinalized.
108 
109   static const SpillSlot Offsets[] = {
110       // Floating-point register save area offsets.
111       {PPC::F31, -8},
112       {PPC::F30, -16},
113       {PPC::F29, -24},
114       {PPC::F28, -32},
115       {PPC::F27, -40},
116       {PPC::F26, -48},
117       {PPC::F25, -56},
118       {PPC::F24, -64},
119       {PPC::F23, -72},
120       {PPC::F22, -80},
121       {PPC::F21, -88},
122       {PPC::F20, -96},
123       {PPC::F19, -104},
124       {PPC::F18, -112},
125       {PPC::F17, -120},
126       {PPC::F16, -128},
127       {PPC::F15, -136},
128       {PPC::F14, -144},
129 
130       // General register save area offsets.
131       {PPC::R31, -4},
132       {PPC::R30, -8},
133       {PPC::R29, -12},
134       {PPC::R28, -16},
135       {PPC::R27, -20},
136       {PPC::R26, -24},
137       {PPC::R25, -28},
138       {PPC::R24, -32},
139       {PPC::R23, -36},
140       {PPC::R22, -40},
141       {PPC::R21, -44},
142       {PPC::R20, -48},
143       {PPC::R19, -52},
144       {PPC::R18, -56},
145       {PPC::R17, -60},
146       {PPC::R16, -64},
147       {PPC::R15, -68},
148       {PPC::R14, -72},
149 
150       // CR save area offset.  We map each of the nonvolatile CR fields
151       // to the slot for CR2, which is the first of the nonvolatile CR
152       // fields to be assigned, so that we only allocate one save slot.
153       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
154       {PPC::CR2, -4},
155 
156       // VRSAVE save area offset.
157       {PPC::VRSAVE, -4},
158 
159       // Vector register save area
160       {PPC::V31, -16},
161       {PPC::V30, -32},
162       {PPC::V29, -48},
163       {PPC::V28, -64},
164       {PPC::V27, -80},
165       {PPC::V26, -96},
166       {PPC::V25, -112},
167       {PPC::V24, -128},
168       {PPC::V23, -144},
169       {PPC::V22, -160},
170       {PPC::V21, -176},
171       {PPC::V20, -192},
172 
173       // SPE register save area (overlaps Vector save area).
174       {PPC::S31, -8},
175       {PPC::S30, -16},
176       {PPC::S29, -24},
177       {PPC::S28, -32},
178       {PPC::S27, -40},
179       {PPC::S26, -48},
180       {PPC::S25, -56},
181       {PPC::S24, -64},
182       {PPC::S23, -72},
183       {PPC::S22, -80},
184       {PPC::S21, -88},
185       {PPC::S20, -96},
186       {PPC::S19, -104},
187       {PPC::S18, -112},
188       {PPC::S17, -120},
189       {PPC::S16, -128},
190       {PPC::S15, -136},
191       {PPC::S14, -144}};
192 
193   static const SpillSlot Offsets64[] = {
194       // Floating-point register save area offsets.
195       {PPC::F31, -8},
196       {PPC::F30, -16},
197       {PPC::F29, -24},
198       {PPC::F28, -32},
199       {PPC::F27, -40},
200       {PPC::F26, -48},
201       {PPC::F25, -56},
202       {PPC::F24, -64},
203       {PPC::F23, -72},
204       {PPC::F22, -80},
205       {PPC::F21, -88},
206       {PPC::F20, -96},
207       {PPC::F19, -104},
208       {PPC::F18, -112},
209       {PPC::F17, -120},
210       {PPC::F16, -128},
211       {PPC::F15, -136},
212       {PPC::F14, -144},
213 
214       // General register save area offsets.
215       {PPC::X31, -8},
216       {PPC::X30, -16},
217       {PPC::X29, -24},
218       {PPC::X28, -32},
219       {PPC::X27, -40},
220       {PPC::X26, -48},
221       {PPC::X25, -56},
222       {PPC::X24, -64},
223       {PPC::X23, -72},
224       {PPC::X22, -80},
225       {PPC::X21, -88},
226       {PPC::X20, -96},
227       {PPC::X19, -104},
228       {PPC::X18, -112},
229       {PPC::X17, -120},
230       {PPC::X16, -128},
231       {PPC::X15, -136},
232       {PPC::X14, -144},
233 
234       // VRSAVE save area offset.
235       {PPC::VRSAVE, -4},
236 
237       // Vector register save area
238       {PPC::V31, -16},
239       {PPC::V30, -32},
240       {PPC::V29, -48},
241       {PPC::V28, -64},
242       {PPC::V27, -80},
243       {PPC::V26, -96},
244       {PPC::V25, -112},
245       {PPC::V24, -128},
246       {PPC::V23, -144},
247       {PPC::V22, -160},
248       {PPC::V21, -176},
249       {PPC::V20, -192}};
250 
251   if (Subtarget.isPPC64()) {
252     NumEntries = array_lengthof(Offsets64);
253 
254     return Offsets64;
255   } else {
256     NumEntries = array_lengthof(Offsets);
257 
258     return Offsets;
259   }
260 }
261 
262 /// RemoveVRSaveCode - We have found that this function does not need any code
263 /// to manipulate the VRSAVE register, even though it uses vector registers.
264 /// This can happen when the only registers used are known to be live in or out
265 /// of the function.  Remove all of the VRSAVE related code from the function.
266 /// FIXME: The removal of the code results in a compile failure at -O0 when the
267 /// function contains a function call, as the GPR containing original VRSAVE
268 /// contents is spilled and reloaded around the call.  Without the prolog code,
269 /// the spill instruction refers to an undefined register.  This code needs
270 /// to account for all uses of that GPR.
271 static void RemoveVRSaveCode(MachineInstr &MI) {
272   MachineBasicBlock *Entry = MI.getParent();
273   MachineFunction *MF = Entry->getParent();
274 
275   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
276   MachineBasicBlock::iterator MBBI = MI;
277   ++MBBI;
278   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
279   MBBI->eraseFromParent();
280 
281   bool RemovedAllMTVRSAVEs = true;
282   // See if we can find and remove the MTVRSAVE instruction from all of the
283   // epilog blocks.
284   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
285     // If last instruction is a return instruction, add an epilogue
286     if (I->isReturnBlock()) {
287       bool FoundIt = false;
288       for (MBBI = I->end(); MBBI != I->begin(); ) {
289         --MBBI;
290         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
291           MBBI->eraseFromParent();  // remove it.
292           FoundIt = true;
293           break;
294         }
295       }
296       RemovedAllMTVRSAVEs &= FoundIt;
297     }
298   }
299 
300   // If we found and removed all MTVRSAVE instructions, remove the read of
301   // VRSAVE as well.
302   if (RemovedAllMTVRSAVEs) {
303     MBBI = MI;
304     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
305     --MBBI;
306     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
307     MBBI->eraseFromParent();
308   }
309 
310   // Finally, nuke the UPDATE_VRSAVE.
311   MI.eraseFromParent();
312 }
313 
314 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
315 // instruction selector.  Based on the vector registers that have been used,
316 // transform this into the appropriate ORI instruction.
317 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
318   MachineFunction *MF = MI.getParent()->getParent();
319   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
320   DebugLoc dl = MI.getDebugLoc();
321 
322   const MachineRegisterInfo &MRI = MF->getRegInfo();
323   unsigned UsedRegMask = 0;
324   for (unsigned i = 0; i != 32; ++i)
325     if (MRI.isPhysRegModified(VRRegNo[i]))
326       UsedRegMask |= 1 << (31-i);
327 
328   // Live in and live out values already must be in the mask, so don't bother
329   // marking them.
330   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
331     unsigned RegNo = TRI->getEncodingValue(LI.first);
332     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
333       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
334   }
335 
336   // Live out registers appear as use operands on return instructions.
337   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
338        UsedRegMask != 0 && BI != BE; ++BI) {
339     const MachineBasicBlock &MBB = *BI;
340     if (!MBB.isReturnBlock())
341       continue;
342     const MachineInstr &Ret = MBB.back();
343     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
344       const MachineOperand &MO = Ret.getOperand(I);
345       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
346         continue;
347       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
348       UsedRegMask &= ~(1 << (31-RegNo));
349     }
350   }
351 
352   // If no registers are used, turn this into a copy.
353   if (UsedRegMask == 0) {
354     // Remove all VRSAVE code.
355     RemoveVRSaveCode(MI);
356     return;
357   }
358 
359   Register SrcReg = MI.getOperand(1).getReg();
360   Register DstReg = MI.getOperand(0).getReg();
361 
362   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
363     if (DstReg != SrcReg)
364       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
365           .addReg(SrcReg)
366           .addImm(UsedRegMask);
367     else
368       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
369           .addReg(SrcReg, RegState::Kill)
370           .addImm(UsedRegMask);
371   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
372     if (DstReg != SrcReg)
373       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
374           .addReg(SrcReg)
375           .addImm(UsedRegMask >> 16);
376     else
377       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
378           .addReg(SrcReg, RegState::Kill)
379           .addImm(UsedRegMask >> 16);
380   } else {
381     if (DstReg != SrcReg)
382       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
383           .addReg(SrcReg)
384           .addImm(UsedRegMask >> 16);
385     else
386       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
387           .addReg(SrcReg, RegState::Kill)
388           .addImm(UsedRegMask >> 16);
389 
390     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
391         .addReg(DstReg, RegState::Kill)
392         .addImm(UsedRegMask & 0xFFFF);
393   }
394 
395   // Remove the old UPDATE_VRSAVE instruction.
396   MI.eraseFromParent();
397 }
398 
399 static bool spillsCR(const MachineFunction &MF) {
400   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
401   return FuncInfo->isCRSpilled();
402 }
403 
404 static bool spillsVRSAVE(const MachineFunction &MF) {
405   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
406   return FuncInfo->isVRSAVESpilled();
407 }
408 
409 static bool hasSpills(const MachineFunction &MF) {
410   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
411   return FuncInfo->hasSpills();
412 }
413 
414 static bool hasNonRISpills(const MachineFunction &MF) {
415   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
416   return FuncInfo->hasNonRISpills();
417 }
418 
419 /// MustSaveLR - Return true if this function requires that we save the LR
420 /// register onto the stack in the prolog and restore it in the epilog of the
421 /// function.
422 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
423   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
424 
425   // We need a save/restore of LR if there is any def of LR (which is
426   // defined by calls, including the PIC setup sequence), or if there is
427   // some use of the LR stack slot (e.g. for builtin_return_address).
428   // (LR comes in 32 and 64 bit versions.)
429   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
430   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
431 }
432 
433 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
434 /// call frame size. Update the MachineFunction object with the stack size.
435 unsigned
436 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
437                                                 bool UseEstimate) const {
438   unsigned NewMaxCallFrameSize = 0;
439   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
440                                             &NewMaxCallFrameSize);
441   MF.getFrameInfo().setStackSize(FrameSize);
442   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
443   return FrameSize;
444 }
445 
446 /// determineFrameLayout - Determine the size of the frame and maximum call
447 /// frame size.
448 unsigned
449 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
450                                        bool UseEstimate,
451                                        unsigned *NewMaxCallFrameSize) const {
452   const MachineFrameInfo &MFI = MF.getFrameInfo();
453   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
454 
455   // Get the number of bytes to allocate from the FrameInfo
456   unsigned FrameSize =
457     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
458 
459   // Get stack alignments. The frame must be aligned to the greatest of these:
460   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
461   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
462   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
463 
464   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
465 
466   unsigned LR = RegInfo->getRARegister();
467   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
468   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
469                        !MFI.adjustsStack() &&       // No calls.
470                        !MustSaveLR(MF, LR) &&       // No need to save LR.
471                        !FI->mustSaveTOC() &&        // No need to save TOC.
472                        !RegInfo->hasBasePointer(MF); // No special alignment.
473 
474   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
475   // code if all local vars are reg-allocated.
476   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
477 
478   // Check whether we can skip adjusting the stack pointer (by using red zone)
479   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
480     // No need for frame
481     return 0;
482   }
483 
484   // Get the maximum call frame size of all the calls.
485   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
486 
487   // Maximum call frame needs to be at least big enough for linkage area.
488   unsigned minCallFrameSize = getLinkageSize();
489   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
490 
491   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
492   // that allocations will be aligned.
493   if (MFI.hasVarSizedObjects())
494     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
495 
496   // Update the new max call frame size if the caller passes in a valid pointer.
497   if (NewMaxCallFrameSize)
498     *NewMaxCallFrameSize = maxCallFrameSize;
499 
500   // Include call frame size in total.
501   FrameSize += maxCallFrameSize;
502 
503   // Make sure the frame is aligned.
504   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
505 
506   return FrameSize;
507 }
508 
509 // hasFP - Return true if the specified function actually has a dedicated frame
510 // pointer register.
511 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
512   const MachineFrameInfo &MFI = MF.getFrameInfo();
513   // FIXME: This is pretty much broken by design: hasFP() might be called really
514   // early, before the stack layout was calculated and thus hasFP() might return
515   // true or false here depending on the time of call.
516   return (MFI.getStackSize()) && needsFP(MF);
517 }
518 
519 // needsFP - Return true if the specified function should have a dedicated frame
520 // pointer register.  This is true if the function has variable sized allocas or
521 // if frame pointer elimination is disabled.
522 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
523   const MachineFrameInfo &MFI = MF.getFrameInfo();
524 
525   // Naked functions have no stack frame pushed, so we don't have a frame
526   // pointer.
527   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
528     return false;
529 
530   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
531     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
532     (MF.getTarget().Options.GuaranteedTailCallOpt &&
533      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
534 }
535 
536 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
537   bool is31 = needsFP(MF);
538   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
539   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
540 
541   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
542   bool HasBP = RegInfo->hasBasePointer(MF);
543   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
544   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
545 
546   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
547        BI != BE; ++BI)
548     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
549       --MBBI;
550       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
551         MachineOperand &MO = MBBI->getOperand(I);
552         if (!MO.isReg())
553           continue;
554 
555         switch (MO.getReg()) {
556         case PPC::FP:
557           MO.setReg(FPReg);
558           break;
559         case PPC::FP8:
560           MO.setReg(FP8Reg);
561           break;
562         case PPC::BP:
563           MO.setReg(BPReg);
564           break;
565         case PPC::BP8:
566           MO.setReg(BP8Reg);
567           break;
568 
569         }
570       }
571     }
572 }
573 
574 /*  This function will do the following:
575     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
576       respectively (defaults recommended by the ABI) and return true
577     - If MBB is not an entry block, initialize the register scavenger and look
578       for available registers.
579     - If the defaults (R0/R12) are available, return true
580     - If TwoUniqueRegsRequired is set to true, it looks for two unique
581       registers. Otherwise, look for a single available register.
582       - If the required registers are found, set SR1 and SR2 and return true.
583       - If the required registers are not found, set SR2 or both SR1 and SR2 to
584         PPC::NoRegister and return false.
585 
586     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
587     is not set, this function will attempt to find two different registers, but
588     still return true if only one register is available (and set SR1 == SR2).
589 */
590 bool
591 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
592                                       bool UseAtEnd,
593                                       bool TwoUniqueRegsRequired,
594                                       unsigned *SR1,
595                                       unsigned *SR2) const {
596   RegScavenger RS;
597   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
598   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
599 
600   // Set the defaults for the two scratch registers.
601   if (SR1)
602     *SR1 = R0;
603 
604   if (SR2) {
605     assert (SR1 && "Asking for the second scratch register but not the first?");
606     *SR2 = R12;
607   }
608 
609   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
610   if ((UseAtEnd && MBB->isReturnBlock()) ||
611       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
612     return true;
613 
614   RS.enterBasicBlock(*MBB);
615 
616   if (UseAtEnd && !MBB->empty()) {
617     // The scratch register will be used at the end of the block, so must
618     // consider all registers used within the block
619 
620     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
621     // If no terminator, back iterator up to previous instruction.
622     if (MBBI == MBB->end())
623       MBBI = std::prev(MBBI);
624 
625     if (MBBI != MBB->begin())
626       RS.forward(MBBI);
627   }
628 
629   // If the two registers are available, we're all good.
630   // Note that we only return here if both R0 and R12 are available because
631   // although the function may not require two unique registers, it may benefit
632   // from having two so we should try to provide them.
633   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
634     return true;
635 
636   // Get the list of callee-saved registers for the target.
637   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
638   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
639 
640   // Get all the available registers in the block.
641   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
642                                      &PPC::GPRCRegClass);
643 
644   // We shouldn't use callee-saved registers as scratch registers as they may be
645   // available when looking for a candidate block for shrink wrapping but not
646   // available when the actual prologue/epilogue is being emitted because they
647   // were added as live-in to the prologue block by PrologueEpilogueInserter.
648   for (int i = 0; CSRegs[i]; ++i)
649     BV.reset(CSRegs[i]);
650 
651   // Set the first scratch register to the first available one.
652   if (SR1) {
653     int FirstScratchReg = BV.find_first();
654     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
655   }
656 
657   // If there is another one available, set the second scratch register to that.
658   // Otherwise, set it to either PPC::NoRegister if this function requires two
659   // or to whatever SR1 is set to if this function doesn't require two.
660   if (SR2) {
661     int SecondScratchReg = BV.find_next(*SR1);
662     if (SecondScratchReg != -1)
663       *SR2 = SecondScratchReg;
664     else
665       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
666   }
667 
668   // Now that we've done our best to provide both registers, double check
669   // whether we were unable to provide enough.
670   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
671     return false;
672 
673   return true;
674 }
675 
676 // We need a scratch register for spilling LR and for spilling CR. By default,
677 // we use two scratch registers to hide latency. However, if only one scratch
678 // register is available, we can adjust for that by not overlapping the spill
679 // code. However, if we need to realign the stack (i.e. have a base pointer)
680 // and the stack frame is large, we need two scratch registers.
681 bool
682 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
683   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
684   MachineFunction &MF = *(MBB->getParent());
685   bool HasBP = RegInfo->hasBasePointer(MF);
686   unsigned FrameSize = determineFrameLayout(MF);
687   int NegFrameSize = -FrameSize;
688   bool IsLargeFrame = !isInt<16>(NegFrameSize);
689   MachineFrameInfo &MFI = MF.getFrameInfo();
690   unsigned MaxAlign = MFI.getMaxAlignment();
691   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
692 
693   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
694 }
695 
696 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
697   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
698 
699   return findScratchRegister(TmpMBB, false,
700                              twoUniqueScratchRegsRequired(TmpMBB));
701 }
702 
703 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
704   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
705 
706   return findScratchRegister(TmpMBB, true);
707 }
708 
709 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
710   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
711   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
712 
713   // Abort if there is no register info or function info.
714   if (!RegInfo || !FI)
715     return false;
716 
717   // Only move the stack update on ELFv2 ABI and PPC64.
718   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
719     return false;
720 
721   // Check the frame size first and return false if it does not fit the
722   // requirements.
723   // We need a non-zero frame size as well as a frame that will fit in the red
724   // zone. This is because by moving the stack pointer update we are now storing
725   // to the red zone until the stack pointer is updated. If we get an interrupt
726   // inside the prologue but before the stack update we now have a number of
727   // stores to the red zone and those stores must all fit.
728   MachineFrameInfo &MFI = MF.getFrameInfo();
729   unsigned FrameSize = MFI.getStackSize();
730   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
731     return false;
732 
733   // Frame pointers and base pointers complicate matters so don't do anything
734   // if we have them. For example having a frame pointer will sometimes require
735   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
736   // difficult.
737   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
738     return false;
739 
740   // Calls to fast_cc functions use different rules for passing parameters on
741   // the stack from the ABI and using PIC base in the function imposes
742   // similar restrictions to using the base pointer. It is not generally safe
743   // to move the stack pointer update in these situations.
744   if (FI->hasFastCall() || FI->usesPICBase())
745     return false;
746 
747   // Finally we can move the stack update if we do not require register
748   // scavenging. Register scavenging can introduce more spills and so
749   // may make the frame size larger than we have computed.
750   return !RegInfo->requiresFrameIndexScavenging(MF);
751 }
752 
753 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
754                                     MachineBasicBlock &MBB) const {
755   MachineBasicBlock::iterator MBBI = MBB.begin();
756   MachineFrameInfo &MFI = MF.getFrameInfo();
757   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
758   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
759 
760   MachineModuleInfo &MMI = MF.getMMI();
761   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
762   DebugLoc dl;
763   bool needsCFI = MF.needsFrameMoves();
764 
765   // Get processor type.
766   bool isPPC64 = Subtarget.isPPC64();
767   // Get the ABI.
768   bool isSVR4ABI = Subtarget.isSVR4ABI();
769   bool isAIXABI = Subtarget.isAIXABI();
770   bool isELFv2ABI = Subtarget.isELFv2ABI();
771   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
772 
773   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
774   // process it.
775   if (!isSVR4ABI)
776     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
777       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
778         if (isAIXABI)
779           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
780         HandleVRSaveUpdate(*MBBI, TII);
781         break;
782       }
783     }
784 
785   // Move MBBI back to the beginning of the prologue block.
786   MBBI = MBB.begin();
787 
788   // Work out frame sizes.
789   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
790   int NegFrameSize = -FrameSize;
791   if (!isInt<32>(NegFrameSize))
792     llvm_unreachable("Unhandled stack size!");
793 
794   if (MFI.isFrameAddressTaken())
795     replaceFPWithRealFP(MF);
796 
797   // Check if the link register (LR) must be saved.
798   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
799   bool MustSaveLR = FI->mustSaveLR();
800   bool MustSaveTOC = FI->mustSaveTOC();
801   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
802   bool MustSaveCR = !MustSaveCRs.empty();
803   // Do we have a frame pointer and/or base pointer for this function?
804   bool HasFP = hasFP(MF);
805   bool HasBP = RegInfo->hasBasePointer(MF);
806   bool HasRedZone = isPPC64 || !isSVR4ABI;
807 
808   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
809   Register BPReg = RegInfo->getBaseRegister(MF);
810   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
811   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
812   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
813   unsigned ScratchReg  = 0;
814   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
815   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
816   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
817                                                 : PPC::MFLR );
818   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
819                                                  : PPC::STW );
820   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
821                                                      : PPC::STWU );
822   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
823                                                         : PPC::STWUX);
824   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
825                                                           : PPC::LIS );
826   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
827                                                  : PPC::ORI );
828   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
829                                               : PPC::OR );
830   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
831                                                             : PPC::SUBFC);
832   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
833                                                                : PPC::SUBFIC);
834 
835   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
836   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
837   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
838   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
839   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
840          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
841 
842   // Using the same bool variable as below to suppress compiler warnings.
843   bool SingleScratchReg =
844     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
845                         &ScratchReg, &TempReg);
846   assert(SingleScratchReg &&
847          "Required number of registers not available in this block");
848 
849   SingleScratchReg = ScratchReg == TempReg;
850 
851   int LROffset = getReturnSaveOffset();
852 
853   int FPOffset = 0;
854   if (HasFP) {
855     if (isSVR4ABI) {
856       MachineFrameInfo &MFI = MF.getFrameInfo();
857       int FPIndex = FI->getFramePointerSaveIndex();
858       assert(FPIndex && "No Frame Pointer Save Slot!");
859       FPOffset = MFI.getObjectOffset(FPIndex);
860     } else {
861       FPOffset = getFramePointerSaveOffset();
862     }
863   }
864 
865   int BPOffset = 0;
866   if (HasBP) {
867     if (isSVR4ABI) {
868       MachineFrameInfo &MFI = MF.getFrameInfo();
869       int BPIndex = FI->getBasePointerSaveIndex();
870       assert(BPIndex && "No Base Pointer Save Slot!");
871       BPOffset = MFI.getObjectOffset(BPIndex);
872     } else {
873       BPOffset = getBasePointerSaveOffset();
874     }
875   }
876 
877   int PBPOffset = 0;
878   if (FI->usesPICBase()) {
879     MachineFrameInfo &MFI = MF.getFrameInfo();
880     int PBPIndex = FI->getPICBasePointerSaveIndex();
881     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
882     PBPOffset = MFI.getObjectOffset(PBPIndex);
883   }
884 
885   // Get stack alignments.
886   unsigned MaxAlign = MFI.getMaxAlignment();
887   if (HasBP && MaxAlign > 1)
888     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
889            "Invalid alignment!");
890 
891   // Frames of 32KB & larger require special handling because they cannot be
892   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
893   bool isLargeFrame = !isInt<16>(NegFrameSize);
894 
895   assert((isPPC64 || !MustSaveCR) &&
896          "Prologue CR saving supported only in 64-bit mode");
897 
898   if (MustSaveCR && isAIXABI)
899     report_fatal_error("Prologue CR saving is unimplemented on AIX.");
900 
901   // Check if we can move the stack update instruction (stdu) down the prologue
902   // past the callee saves. Hopefully this will avoid the situation where the
903   // saves are waiting for the update on the store with update to complete.
904   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
905   bool MovingStackUpdateDown = false;
906 
907   // Check if we can move the stack update.
908   if (stackUpdateCanBeMoved(MF)) {
909     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
910     for (CalleeSavedInfo CSI : Info) {
911       int FrIdx = CSI.getFrameIdx();
912       // If the frame index is not negative the callee saved info belongs to a
913       // stack object that is not a fixed stack object. We ignore non-fixed
914       // stack objects because we won't move the stack update pointer past them.
915       if (FrIdx >= 0)
916         continue;
917 
918       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
919         StackUpdateLoc++;
920         MovingStackUpdateDown = true;
921       } else {
922         // We need all of the Frame Indices to meet these conditions.
923         // If they do not, abort the whole operation.
924         StackUpdateLoc = MBBI;
925         MovingStackUpdateDown = false;
926         break;
927       }
928     }
929 
930     // If the operation was not aborted then update the object offset.
931     if (MovingStackUpdateDown) {
932       for (CalleeSavedInfo CSI : Info) {
933         int FrIdx = CSI.getFrameIdx();
934         if (FrIdx < 0)
935           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
936       }
937     }
938   }
939 
940   // If we need to spill the CR and the LR but we don't have two separate
941   // registers available, we must spill them one at a time
942   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
943     // In the ELFv2 ABI, we are not required to save all CR fields.
944     // If only one or two CR fields are clobbered, it is more efficient to use
945     // mfocrf to selectively save just those fields, because mfocrf has short
946     // latency compares to mfcr.
947     unsigned MfcrOpcode = PPC::MFCR8;
948     unsigned CrState = RegState::ImplicitKill;
949     if (isELFv2ABI && MustSaveCRs.size() == 1) {
950       MfcrOpcode = PPC::MFOCRF8;
951       CrState = RegState::Kill;
952     }
953     MachineInstrBuilder MIB =
954       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
955     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
956       MIB.addReg(MustSaveCRs[i], CrState);
957     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
958       .addReg(TempReg, getKillRegState(true))
959       .addImm(getCRSaveOffset())
960       .addReg(SPReg);
961   }
962 
963   if (MustSaveLR)
964     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
965 
966   if (MustSaveCR &&
967       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
968     // In the ELFv2 ABI, we are not required to save all CR fields.
969     // If only one or two CR fields are clobbered, it is more efficient to use
970     // mfocrf to selectively save just those fields, because mfocrf has short
971     // latency compares to mfcr.
972     unsigned MfcrOpcode = PPC::MFCR8;
973     unsigned CrState = RegState::ImplicitKill;
974     if (isELFv2ABI && MustSaveCRs.size() == 1) {
975       MfcrOpcode = PPC::MFOCRF8;
976       CrState = RegState::Kill;
977     }
978     MachineInstrBuilder MIB =
979       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
980     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
981       MIB.addReg(MustSaveCRs[i], CrState);
982   }
983 
984   if (HasRedZone) {
985     if (HasFP)
986       BuildMI(MBB, MBBI, dl, StoreInst)
987         .addReg(FPReg)
988         .addImm(FPOffset)
989         .addReg(SPReg);
990     if (FI->usesPICBase())
991       BuildMI(MBB, MBBI, dl, StoreInst)
992         .addReg(PPC::R30)
993         .addImm(PBPOffset)
994         .addReg(SPReg);
995     if (HasBP)
996       BuildMI(MBB, MBBI, dl, StoreInst)
997         .addReg(BPReg)
998         .addImm(BPOffset)
999         .addReg(SPReg);
1000   }
1001 
1002   if (MustSaveLR)
1003     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1004       .addReg(ScratchReg, getKillRegState(true))
1005       .addImm(LROffset)
1006       .addReg(SPReg);
1007 
1008   if (MustSaveCR &&
1009       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1010     assert(HasRedZone && "A red zone is always available on PPC64");
1011     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1012       .addReg(TempReg, getKillRegState(true))
1013       .addImm(getCRSaveOffset())
1014       .addReg(SPReg);
1015   }
1016 
1017   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1018   if (!FrameSize)
1019     return;
1020 
1021   // Adjust stack pointer: r1 += NegFrameSize.
1022   // If there is a preferred stack alignment, align R1 now
1023 
1024   if (HasBP && HasRedZone) {
1025     // Save a copy of r1 as the base pointer.
1026     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1027       .addReg(SPReg)
1028       .addReg(SPReg);
1029   }
1030 
1031   // Have we generated a STUX instruction to claim stack frame? If so,
1032   // the negated frame size will be placed in ScratchReg.
1033   bool HasSTUX = false;
1034 
1035   // This condition must be kept in sync with canUseAsPrologue.
1036   if (HasBP && MaxAlign > 1) {
1037     if (isPPC64)
1038       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1039         .addReg(SPReg)
1040         .addImm(0)
1041         .addImm(64 - Log2_32(MaxAlign));
1042     else // PPC32...
1043       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1044         .addReg(SPReg)
1045         .addImm(0)
1046         .addImm(32 - Log2_32(MaxAlign))
1047         .addImm(31);
1048     if (!isLargeFrame) {
1049       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1050         .addReg(ScratchReg, RegState::Kill)
1051         .addImm(NegFrameSize);
1052     } else {
1053       assert(!SingleScratchReg && "Only a single scratch reg available");
1054       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1055         .addImm(NegFrameSize >> 16);
1056       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1057         .addReg(TempReg, RegState::Kill)
1058         .addImm(NegFrameSize & 0xFFFF);
1059       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1060         .addReg(ScratchReg, RegState::Kill)
1061         .addReg(TempReg, RegState::Kill);
1062     }
1063 
1064     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1065       .addReg(SPReg, RegState::Kill)
1066       .addReg(SPReg)
1067       .addReg(ScratchReg);
1068     HasSTUX = true;
1069 
1070   } else if (!isLargeFrame) {
1071     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1072       .addReg(SPReg)
1073       .addImm(NegFrameSize)
1074       .addReg(SPReg);
1075 
1076   } else {
1077     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1078       .addImm(NegFrameSize >> 16);
1079     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1080       .addReg(ScratchReg, RegState::Kill)
1081       .addImm(NegFrameSize & 0xFFFF);
1082     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1083       .addReg(SPReg, RegState::Kill)
1084       .addReg(SPReg)
1085       .addReg(ScratchReg);
1086     HasSTUX = true;
1087   }
1088 
1089   // Save the TOC register after the stack pointer update if a prologue TOC
1090   // save is required for the function.
1091   if (MustSaveTOC) {
1092     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1093     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1094       .addReg(TOCReg, getKillRegState(true))
1095       .addImm(TOCSaveOffset)
1096       .addReg(SPReg);
1097   }
1098 
1099   if (!HasRedZone) {
1100     assert(!isPPC64 && "A red zone is always available on PPC64");
1101     if (HasSTUX) {
1102       // The negated frame size is in ScratchReg, and the SPReg has been
1103       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1104       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1105       // the stack frame (i.e. the old SP), ideally, we would put the old
1106       // SP into a register and use it as the base for the stores. The
1107       // problem is that the only available register may be ScratchReg,
1108       // which could be R0, and R0 cannot be used as a base address.
1109 
1110       // First, set ScratchReg to the old SP. This may need to be modified
1111       // later.
1112       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1113         .addReg(ScratchReg, RegState::Kill)
1114         .addReg(SPReg);
1115 
1116       if (ScratchReg == PPC::R0) {
1117         // R0 cannot be used as a base register, but it can be used as an
1118         // index in a store-indexed.
1119         int LastOffset = 0;
1120         if (HasFP)  {
1121           // R0 += (FPOffset-LastOffset).
1122           // Need addic, since addi treats R0 as 0.
1123           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1124             .addReg(ScratchReg)
1125             .addImm(FPOffset-LastOffset);
1126           LastOffset = FPOffset;
1127           // Store FP into *R0.
1128           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1129             .addReg(FPReg, RegState::Kill)  // Save FP.
1130             .addReg(PPC::ZERO)
1131             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1132         }
1133         if (FI->usesPICBase()) {
1134           // R0 += (PBPOffset-LastOffset).
1135           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1136             .addReg(ScratchReg)
1137             .addImm(PBPOffset-LastOffset);
1138           LastOffset = PBPOffset;
1139           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1140             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1141             .addReg(PPC::ZERO)
1142             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1143         }
1144         if (HasBP) {
1145           // R0 += (BPOffset-LastOffset).
1146           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1147             .addReg(ScratchReg)
1148             .addImm(BPOffset-LastOffset);
1149           LastOffset = BPOffset;
1150           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1151             .addReg(BPReg, RegState::Kill)  // Save BP.
1152             .addReg(PPC::ZERO)
1153             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1154           // BP = R0-LastOffset
1155           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1156             .addReg(ScratchReg, RegState::Kill)
1157             .addImm(-LastOffset);
1158         }
1159       } else {
1160         // ScratchReg is not R0, so use it as the base register. It is
1161         // already set to the old SP, so we can use the offsets directly.
1162 
1163         // Now that the stack frame has been allocated, save all the necessary
1164         // registers using ScratchReg as the base address.
1165         if (HasFP)
1166           BuildMI(MBB, MBBI, dl, StoreInst)
1167             .addReg(FPReg)
1168             .addImm(FPOffset)
1169             .addReg(ScratchReg);
1170         if (FI->usesPICBase())
1171           BuildMI(MBB, MBBI, dl, StoreInst)
1172             .addReg(PPC::R30)
1173             .addImm(PBPOffset)
1174             .addReg(ScratchReg);
1175         if (HasBP) {
1176           BuildMI(MBB, MBBI, dl, StoreInst)
1177             .addReg(BPReg)
1178             .addImm(BPOffset)
1179             .addReg(ScratchReg);
1180           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1181             .addReg(ScratchReg, RegState::Kill)
1182             .addReg(ScratchReg);
1183         }
1184       }
1185     } else {
1186       // The frame size is a known 16-bit constant (fitting in the immediate
1187       // field of STWU). To be here we have to be compiling for PPC32.
1188       // Since the SPReg has been decreased by FrameSize, add it back to each
1189       // offset.
1190       if (HasFP)
1191         BuildMI(MBB, MBBI, dl, StoreInst)
1192           .addReg(FPReg)
1193           .addImm(FrameSize + FPOffset)
1194           .addReg(SPReg);
1195       if (FI->usesPICBase())
1196         BuildMI(MBB, MBBI, dl, StoreInst)
1197           .addReg(PPC::R30)
1198           .addImm(FrameSize + PBPOffset)
1199           .addReg(SPReg);
1200       if (HasBP) {
1201         BuildMI(MBB, MBBI, dl, StoreInst)
1202           .addReg(BPReg)
1203           .addImm(FrameSize + BPOffset)
1204           .addReg(SPReg);
1205         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1206           .addReg(SPReg)
1207           .addImm(FrameSize);
1208       }
1209     }
1210   }
1211 
1212   // Add Call Frame Information for the instructions we generated above.
1213   if (needsCFI) {
1214     unsigned CFIIndex;
1215 
1216     if (HasBP) {
1217       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1218       // because if the stack needed aligning then CFA won't be at a fixed
1219       // offset from FP/SP.
1220       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1221       CFIIndex = MF.addFrameInst(
1222           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1223     } else {
1224       // Adjust the definition of CFA to account for the change in SP.
1225       assert(NegFrameSize);
1226       CFIIndex = MF.addFrameInst(
1227           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1228     }
1229     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1230         .addCFIIndex(CFIIndex);
1231 
1232     if (HasFP) {
1233       // Describe where FP was saved, at a fixed offset from CFA.
1234       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1235       CFIIndex = MF.addFrameInst(
1236           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1237       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1238           .addCFIIndex(CFIIndex);
1239     }
1240 
1241     if (FI->usesPICBase()) {
1242       // Describe where FP was saved, at a fixed offset from CFA.
1243       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1244       CFIIndex = MF.addFrameInst(
1245           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1246       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1247           .addCFIIndex(CFIIndex);
1248     }
1249 
1250     if (HasBP) {
1251       // Describe where BP was saved, at a fixed offset from CFA.
1252       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1253       CFIIndex = MF.addFrameInst(
1254           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1255       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1256           .addCFIIndex(CFIIndex);
1257     }
1258 
1259     if (MustSaveLR) {
1260       // Describe where LR was saved, at a fixed offset from CFA.
1261       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1262       CFIIndex = MF.addFrameInst(
1263           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1264       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1265           .addCFIIndex(CFIIndex);
1266     }
1267   }
1268 
1269   // If there is a frame pointer, copy R1 into R31
1270   if (HasFP) {
1271     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1272       .addReg(SPReg)
1273       .addReg(SPReg);
1274 
1275     if (!HasBP && needsCFI) {
1276       // Change the definition of CFA from SP+offset to FP+offset, because SP
1277       // will change at every alloca.
1278       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1279       unsigned CFIIndex = MF.addFrameInst(
1280           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1281 
1282       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1283           .addCFIIndex(CFIIndex);
1284     }
1285   }
1286 
1287   if (needsCFI) {
1288     // Describe where callee saved registers were saved, at fixed offsets from
1289     // CFA.
1290     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1291     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1292       unsigned Reg = CSI[I].getReg();
1293       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1294 
1295       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1296       // subregisters of CR2. We just need to emit a move of CR2.
1297       if (PPC::CRBITRCRegClass.contains(Reg))
1298         continue;
1299 
1300       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1301         continue;
1302 
1303       // For SVR4, don't emit a move for the CR spill slot if we haven't
1304       // spilled CRs.
1305       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1306           && !MustSaveCR)
1307         continue;
1308 
1309       // For 64-bit SVR4 when we have spilled CRs, the spill location
1310       // is SP+8, not a frame-relative slot.
1311       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1312         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1313         // the whole CR word.  In the ELFv2 ABI, every CR that was
1314         // actually saved gets its own CFI record.
1315         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1316         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1317             nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
1318         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1319             .addCFIIndex(CFIIndex);
1320         continue;
1321       }
1322 
1323       if (CSI[I].isSpilledToReg()) {
1324         unsigned SpilledReg = CSI[I].getDstReg();
1325         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1326             nullptr, MRI->getDwarfRegNum(Reg, true),
1327             MRI->getDwarfRegNum(SpilledReg, true)));
1328         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1329           .addCFIIndex(CFIRegister);
1330       } else {
1331         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1332         // We have changed the object offset above but we do not want to change
1333         // the actual offsets in the CFI instruction so we have to undo the
1334         // offset change here.
1335         if (MovingStackUpdateDown)
1336           Offset -= NegFrameSize;
1337 
1338         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1339             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1340         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1341             .addCFIIndex(CFIIndex);
1342       }
1343     }
1344   }
1345 }
1346 
1347 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1348                                     MachineBasicBlock &MBB) const {
1349   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1350   DebugLoc dl;
1351 
1352   if (MBBI != MBB.end())
1353     dl = MBBI->getDebugLoc();
1354 
1355   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1356   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1357 
1358   // Get alignment info so we know how to restore the SP.
1359   const MachineFrameInfo &MFI = MF.getFrameInfo();
1360 
1361   // Get the number of bytes allocated from the FrameInfo.
1362   int FrameSize = MFI.getStackSize();
1363 
1364   // Get processor type.
1365   bool isPPC64 = Subtarget.isPPC64();
1366   // Get the ABI.
1367   bool isSVR4ABI = Subtarget.isSVR4ABI();
1368 
1369   // Check if the link register (LR) has been saved.
1370   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1371   bool MustSaveLR = FI->mustSaveLR();
1372   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1373   bool MustSaveCR = !MustSaveCRs.empty();
1374   // Do we have a frame pointer and/or base pointer for this function?
1375   bool HasFP = hasFP(MF);
1376   bool HasBP = RegInfo->hasBasePointer(MF);
1377   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1378 
1379   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1380   Register BPReg = RegInfo->getBaseRegister(MF);
1381   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1382   unsigned ScratchReg = 0;
1383   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1384   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1385                                                  : PPC::MTLR );
1386   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1387                                                  : PPC::LWZ );
1388   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1389                                                            : PPC::LIS );
1390   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1391                                               : PPC::OR );
1392   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1393                                                   : PPC::ORI );
1394   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1395                                                    : PPC::ADDI );
1396   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1397                                                 : PPC::ADD4 );
1398 
1399   int LROffset = getReturnSaveOffset();
1400 
1401   int FPOffset = 0;
1402 
1403   // Using the same bool variable as below to suppress compiler warnings.
1404   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1405                                               &TempReg);
1406   assert(SingleScratchReg &&
1407          "Could not find an available scratch register");
1408 
1409   SingleScratchReg = ScratchReg == TempReg;
1410 
1411   if (HasFP) {
1412     if (isSVR4ABI) {
1413       int FPIndex = FI->getFramePointerSaveIndex();
1414       assert(FPIndex && "No Frame Pointer Save Slot!");
1415       FPOffset = MFI.getObjectOffset(FPIndex);
1416     } else {
1417       FPOffset = getFramePointerSaveOffset();
1418     }
1419   }
1420 
1421   int BPOffset = 0;
1422   if (HasBP) {
1423     if (isSVR4ABI) {
1424       int BPIndex = FI->getBasePointerSaveIndex();
1425       assert(BPIndex && "No Base Pointer Save Slot!");
1426       BPOffset = MFI.getObjectOffset(BPIndex);
1427     } else {
1428       BPOffset = getBasePointerSaveOffset();
1429     }
1430   }
1431 
1432   int PBPOffset = 0;
1433   if (FI->usesPICBase()) {
1434     int PBPIndex = FI->getPICBasePointerSaveIndex();
1435     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1436     PBPOffset = MFI.getObjectOffset(PBPIndex);
1437   }
1438 
1439   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1440 
1441   if (IsReturnBlock) {
1442     unsigned RetOpcode = MBBI->getOpcode();
1443     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1444                       RetOpcode == PPC::TCRETURNdi ||
1445                       RetOpcode == PPC::TCRETURNai ||
1446                       RetOpcode == PPC::TCRETURNri8 ||
1447                       RetOpcode == PPC::TCRETURNdi8 ||
1448                       RetOpcode == PPC::TCRETURNai8;
1449 
1450     if (UsesTCRet) {
1451       int MaxTCRetDelta = FI->getTailCallSPDelta();
1452       MachineOperand &StackAdjust = MBBI->getOperand(1);
1453       assert(StackAdjust.isImm() && "Expecting immediate value.");
1454       // Adjust stack pointer.
1455       int StackAdj = StackAdjust.getImm();
1456       int Delta = StackAdj - MaxTCRetDelta;
1457       assert((Delta >= 0) && "Delta must be positive");
1458       if (MaxTCRetDelta>0)
1459         FrameSize += (StackAdj +Delta);
1460       else
1461         FrameSize += StackAdj;
1462     }
1463   }
1464 
1465   // Frames of 32KB & larger require special handling because they cannot be
1466   // indexed into with a simple LD/LWZ immediate offset operand.
1467   bool isLargeFrame = !isInt<16>(FrameSize);
1468 
1469   // On targets without red zone, the SP needs to be restored last, so that
1470   // all live contents of the stack frame are upwards of the SP. This means
1471   // that we cannot restore SP just now, since there may be more registers
1472   // to restore from the stack frame (e.g. R31). If the frame size is not
1473   // a simple immediate value, we will need a spare register to hold the
1474   // restored SP. If the frame size is known and small, we can simply adjust
1475   // the offsets of the registers to be restored, and still use SP to restore
1476   // them. In such case, the final update of SP will be to add the frame
1477   // size to it.
1478   // To simplify the code, set RBReg to the base register used to restore
1479   // values from the stack, and set SPAdd to the value that needs to be added
1480   // to the SP at the end. The default values are as if red zone was present.
1481   unsigned RBReg = SPReg;
1482   unsigned SPAdd = 0;
1483 
1484   // Check if we can move the stack update instruction up the epilogue
1485   // past the callee saves. This will allow the move to LR instruction
1486   // to be executed before the restores of the callee saves which means
1487   // that the callee saves can hide the latency from the MTLR instrcution.
1488   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1489   if (stackUpdateCanBeMoved(MF)) {
1490     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1491     for (CalleeSavedInfo CSI : Info) {
1492       int FrIdx = CSI.getFrameIdx();
1493       // If the frame index is not negative the callee saved info belongs to a
1494       // stack object that is not a fixed stack object. We ignore non-fixed
1495       // stack objects because we won't move the update of the stack pointer
1496       // past them.
1497       if (FrIdx >= 0)
1498         continue;
1499 
1500       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1501         StackUpdateLoc--;
1502       else {
1503         // Abort the operation as we can't update all CSR restores.
1504         StackUpdateLoc = MBBI;
1505         break;
1506       }
1507     }
1508   }
1509 
1510   if (FrameSize) {
1511     // In the prologue, the loaded (or persistent) stack pointer value is
1512     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1513     // zone add this offset back now.
1514 
1515     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1516     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1517     // call which invalidates the stack pointer value in SP(0). So we use the
1518     // value of R31 in this case.
1519     if (FI->hasFastCall()) {
1520       assert(HasFP && "Expecting a valid frame pointer.");
1521       if (!HasRedZone)
1522         RBReg = FPReg;
1523       if (!isLargeFrame) {
1524         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1525           .addReg(FPReg).addImm(FrameSize);
1526       } else {
1527         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1528           .addImm(FrameSize >> 16);
1529         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1530           .addReg(ScratchReg, RegState::Kill)
1531           .addImm(FrameSize & 0xFFFF);
1532         BuildMI(MBB, MBBI, dl, AddInst)
1533           .addReg(RBReg)
1534           .addReg(FPReg)
1535           .addReg(ScratchReg);
1536       }
1537     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1538       if (HasRedZone) {
1539         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1540           .addReg(SPReg)
1541           .addImm(FrameSize);
1542       } else {
1543         // Make sure that adding FrameSize will not overflow the max offset
1544         // size.
1545         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1546                "Local offsets should be negative");
1547         SPAdd = FrameSize;
1548         FPOffset += FrameSize;
1549         BPOffset += FrameSize;
1550         PBPOffset += FrameSize;
1551       }
1552     } else {
1553       // We don't want to use ScratchReg as a base register, because it
1554       // could happen to be R0. Use FP instead, but make sure to preserve it.
1555       if (!HasRedZone) {
1556         // If FP is not saved, copy it to ScratchReg.
1557         if (!HasFP)
1558           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1559             .addReg(FPReg)
1560             .addReg(FPReg);
1561         RBReg = FPReg;
1562       }
1563       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1564         .addImm(0)
1565         .addReg(SPReg);
1566     }
1567   }
1568   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1569   // If there is no red zone, ScratchReg may be needed for holding a useful
1570   // value (although not the base register). Make sure it is not overwritten
1571   // too early.
1572 
1573   assert((isPPC64 || !MustSaveCR) &&
1574          "Epilogue CR restoring supported only in 64-bit mode");
1575 
1576   // If we need to restore both the LR and the CR and we only have one
1577   // available scratch register, we must do them one at a time.
1578   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1579     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1580     // is live here.
1581     assert(HasRedZone && "Expecting red zone");
1582     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1583       .addImm(getCRSaveOffset())
1584       .addReg(SPReg);
1585     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1586       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1587         .addReg(TempReg, getKillRegState(i == e-1));
1588   }
1589 
1590   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1591   // LR is stored in the caller's stack frame. ScratchReg will be needed
1592   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1593   // a base register anyway, because it may happen to be R0.
1594   bool LoadedLR = false;
1595   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1596     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1597       .addImm(LROffset+SPAdd)
1598       .addReg(RBReg);
1599     LoadedLR = true;
1600   }
1601 
1602   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1603     // This will only occur for PPC64.
1604     assert(isPPC64 && "Expecting 64-bit mode");
1605     assert(RBReg == SPReg && "Should be using SP as a base register");
1606     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1607       .addImm(getCRSaveOffset())
1608       .addReg(RBReg);
1609   }
1610 
1611   if (HasFP) {
1612     // If there is red zone, restore FP directly, since SP has already been
1613     // restored. Otherwise, restore the value of FP into ScratchReg.
1614     if (HasRedZone || RBReg == SPReg)
1615       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1616         .addImm(FPOffset)
1617         .addReg(SPReg);
1618     else
1619       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1620         .addImm(FPOffset)
1621         .addReg(RBReg);
1622   }
1623 
1624   if (FI->usesPICBase())
1625     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1626       .addImm(PBPOffset)
1627       .addReg(RBReg);
1628 
1629   if (HasBP)
1630     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1631       .addImm(BPOffset)
1632       .addReg(RBReg);
1633 
1634   // There is nothing more to be loaded from the stack, so now we can
1635   // restore SP: SP = RBReg + SPAdd.
1636   if (RBReg != SPReg || SPAdd != 0) {
1637     assert(!HasRedZone && "This should not happen with red zone");
1638     // If SPAdd is 0, generate a copy.
1639     if (SPAdd == 0)
1640       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1641         .addReg(RBReg)
1642         .addReg(RBReg);
1643     else
1644       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1645         .addReg(RBReg)
1646         .addImm(SPAdd);
1647 
1648     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1649     if (RBReg == FPReg)
1650       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1651         .addReg(ScratchReg)
1652         .addReg(ScratchReg);
1653 
1654     // Now load the LR from the caller's stack frame.
1655     if (MustSaveLR && !LoadedLR)
1656       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1657         .addImm(LROffset)
1658         .addReg(SPReg);
1659   }
1660 
1661   if (MustSaveCR &&
1662       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1663     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1664       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1665         .addReg(TempReg, getKillRegState(i == e-1));
1666 
1667   if (MustSaveLR)
1668     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1669 
1670   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1671   // call optimization
1672   if (IsReturnBlock) {
1673     unsigned RetOpcode = MBBI->getOpcode();
1674     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1675         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1676         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1677       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1678       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1679 
1680       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1681         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1682           .addReg(SPReg).addImm(CallerAllocatedAmt);
1683       } else {
1684         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1685           .addImm(CallerAllocatedAmt >> 16);
1686         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1687           .addReg(ScratchReg, RegState::Kill)
1688           .addImm(CallerAllocatedAmt & 0xFFFF);
1689         BuildMI(MBB, MBBI, dl, AddInst)
1690           .addReg(SPReg)
1691           .addReg(FPReg)
1692           .addReg(ScratchReg);
1693       }
1694     } else {
1695       createTailCallBranchInstr(MBB);
1696     }
1697   }
1698 }
1699 
1700 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1701   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1702 
1703   // If we got this far a first terminator should exist.
1704   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1705 
1706   DebugLoc dl = MBBI->getDebugLoc();
1707   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1708 
1709   // Create branch instruction for pseudo tail call return instruction
1710   unsigned RetOpcode = MBBI->getOpcode();
1711   if (RetOpcode == PPC::TCRETURNdi) {
1712     MBBI = MBB.getLastNonDebugInstr();
1713     MachineOperand &JumpTarget = MBBI->getOperand(0);
1714     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1715       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1716   } else if (RetOpcode == PPC::TCRETURNri) {
1717     MBBI = MBB.getLastNonDebugInstr();
1718     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1719     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1720   } else if (RetOpcode == PPC::TCRETURNai) {
1721     MBBI = MBB.getLastNonDebugInstr();
1722     MachineOperand &JumpTarget = MBBI->getOperand(0);
1723     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1724   } else if (RetOpcode == PPC::TCRETURNdi8) {
1725     MBBI = MBB.getLastNonDebugInstr();
1726     MachineOperand &JumpTarget = MBBI->getOperand(0);
1727     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1728       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1729   } else if (RetOpcode == PPC::TCRETURNri8) {
1730     MBBI = MBB.getLastNonDebugInstr();
1731     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1732     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1733   } else if (RetOpcode == PPC::TCRETURNai8) {
1734     MBBI = MBB.getLastNonDebugInstr();
1735     MachineOperand &JumpTarget = MBBI->getOperand(0);
1736     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1737   }
1738 }
1739 
1740 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1741                                             BitVector &SavedRegs,
1742                                             RegScavenger *RS) const {
1743   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1744 
1745   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1746 
1747   //  Save and clear the LR state.
1748   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1749   unsigned LR = RegInfo->getRARegister();
1750   FI->setMustSaveLR(MustSaveLR(MF, LR));
1751   SavedRegs.reset(LR);
1752 
1753   //  Save R31 if necessary
1754   int FPSI = FI->getFramePointerSaveIndex();
1755   const bool isPPC64 = Subtarget.isPPC64();
1756   MachineFrameInfo &MFI = MF.getFrameInfo();
1757 
1758   // If the frame pointer save index hasn't been defined yet.
1759   if (!FPSI && needsFP(MF)) {
1760     // Find out what the fix offset of the frame pointer save area.
1761     int FPOffset = getFramePointerSaveOffset();
1762     // Allocate the frame index for frame pointer save area.
1763     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1764     // Save the result.
1765     FI->setFramePointerSaveIndex(FPSI);
1766   }
1767 
1768   int BPSI = FI->getBasePointerSaveIndex();
1769   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1770     int BPOffset = getBasePointerSaveOffset();
1771     // Allocate the frame index for the base pointer save area.
1772     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1773     // Save the result.
1774     FI->setBasePointerSaveIndex(BPSI);
1775   }
1776 
1777   // Reserve stack space for the PIC Base register (R30).
1778   // Only used in SVR4 32-bit.
1779   if (FI->usesPICBase()) {
1780     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1781     FI->setPICBasePointerSaveIndex(PBPSI);
1782   }
1783 
1784   // Make sure we don't explicitly spill r31, because, for example, we have
1785   // some inline asm which explicitly clobbers it, when we otherwise have a
1786   // frame pointer and are using r31's spill slot for the prologue/epilogue
1787   // code. Same goes for the base pointer and the PIC base register.
1788   if (needsFP(MF))
1789     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1790   if (RegInfo->hasBasePointer(MF))
1791     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1792   if (FI->usesPICBase())
1793     SavedRegs.reset(PPC::R30);
1794 
1795   // Reserve stack space to move the linkage area to in case of a tail call.
1796   int TCSPDelta = 0;
1797   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1798       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1799     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1800   }
1801 
1802   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1803   // function uses CR 2, 3, or 4. For 64-bit SVR4 we create a FixedStack
1804   // object at the offset of the CR-save slot in the linkage area. The actual
1805   // save and restore of the condition register will be created as part of the
1806   // prologue and epilogue insertion, but the FixedStack object is needed to
1807   // keep the CalleSavedInfo valid.
1808   if (Subtarget.isSVR4ABI() &&
1809       (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1810        SavedRegs.test(PPC::CR4))) {
1811     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1812     const int64_t SpillOffset = Subtarget.isPPC64() ? 8 : -4;
1813     int FrameIdx =
1814         MFI.CreateFixedObject(SpillSize, SpillOffset,
1815                               /* IsImmutable */ true, /* IsAliased */ false);
1816     FI->setCRSpillFrameIndex(FrameIdx);
1817   }
1818 }
1819 
1820 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1821                                                        RegScavenger *RS) const {
1822   // Early exit if not using the SVR4 ABI.
1823   if (!Subtarget.isSVR4ABI()) {
1824     addScavengingSpillSlot(MF, RS);
1825     return;
1826   }
1827 
1828   // Get callee saved register information.
1829   MachineFrameInfo &MFI = MF.getFrameInfo();
1830   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1831 
1832   // If the function is shrink-wrapped, and if the function has a tail call, the
1833   // tail call might not be in the new RestoreBlock, so real branch instruction
1834   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1835   // RestoreBlock. So we handle this case here.
1836   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1837     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1838     for (MachineBasicBlock &MBB : MF) {
1839       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1840         createTailCallBranchInstr(MBB);
1841     }
1842   }
1843 
1844   // Early exit if no callee saved registers are modified!
1845   if (CSI.empty() && !needsFP(MF)) {
1846     addScavengingSpillSlot(MF, RS);
1847     return;
1848   }
1849 
1850   unsigned MinGPR = PPC::R31;
1851   unsigned MinG8R = PPC::X31;
1852   unsigned MinFPR = PPC::F31;
1853   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1854 
1855   bool HasGPSaveArea = false;
1856   bool HasG8SaveArea = false;
1857   bool HasFPSaveArea = false;
1858   bool HasVRSAVESaveArea = false;
1859   bool HasVRSaveArea = false;
1860 
1861   SmallVector<CalleeSavedInfo, 18> GPRegs;
1862   SmallVector<CalleeSavedInfo, 18> G8Regs;
1863   SmallVector<CalleeSavedInfo, 18> FPRegs;
1864   SmallVector<CalleeSavedInfo, 18> VRegs;
1865 
1866   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1867     unsigned Reg = CSI[i].getReg();
1868     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1869             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1870            "Not expecting to try to spill R2 in a function that must save TOC");
1871     if (PPC::GPRCRegClass.contains(Reg)) {
1872       HasGPSaveArea = true;
1873 
1874       GPRegs.push_back(CSI[i]);
1875 
1876       if (Reg < MinGPR) {
1877         MinGPR = Reg;
1878       }
1879     } else if (PPC::G8RCRegClass.contains(Reg)) {
1880       HasG8SaveArea = true;
1881 
1882       G8Regs.push_back(CSI[i]);
1883 
1884       if (Reg < MinG8R) {
1885         MinG8R = Reg;
1886       }
1887     } else if (PPC::F8RCRegClass.contains(Reg)) {
1888       HasFPSaveArea = true;
1889 
1890       FPRegs.push_back(CSI[i]);
1891 
1892       if (Reg < MinFPR) {
1893         MinFPR = Reg;
1894       }
1895     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1896                PPC::CRRCRegClass.contains(Reg)) {
1897       ; // do nothing, as we already know whether CRs are spilled
1898     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1899       HasVRSAVESaveArea = true;
1900     } else if (PPC::VRRCRegClass.contains(Reg) ||
1901                PPC::SPERCRegClass.contains(Reg)) {
1902       // Altivec and SPE are mutually exclusive, but have the same stack
1903       // alignment requirements, so overload the save area for both cases.
1904       HasVRSaveArea = true;
1905 
1906       VRegs.push_back(CSI[i]);
1907 
1908       if (Reg < MinVR) {
1909         MinVR = Reg;
1910       }
1911     } else {
1912       llvm_unreachable("Unknown RegisterClass!");
1913     }
1914   }
1915 
1916   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1917   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1918 
1919   int64_t LowerBound = 0;
1920 
1921   // Take into account stack space reserved for tail calls.
1922   int TCSPDelta = 0;
1923   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1924       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1925     LowerBound = TCSPDelta;
1926   }
1927 
1928   // The Floating-point register save area is right below the back chain word
1929   // of the previous stack frame.
1930   if (HasFPSaveArea) {
1931     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1932       int FI = FPRegs[i].getFrameIdx();
1933 
1934       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1935     }
1936 
1937     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1938   }
1939 
1940   // Check whether the frame pointer register is allocated. If so, make sure it
1941   // is spilled to the correct offset.
1942   if (needsFP(MF)) {
1943     int FI = PFI->getFramePointerSaveIndex();
1944     assert(FI && "No Frame Pointer Save Slot!");
1945     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1946     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1947     HasGPSaveArea = true;
1948   }
1949 
1950   if (PFI->usesPICBase()) {
1951     int FI = PFI->getPICBasePointerSaveIndex();
1952     assert(FI && "No PIC Base Pointer Save Slot!");
1953     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1954 
1955     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1956     HasGPSaveArea = true;
1957   }
1958 
1959   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1960   if (RegInfo->hasBasePointer(MF)) {
1961     int FI = PFI->getBasePointerSaveIndex();
1962     assert(FI && "No Base Pointer Save Slot!");
1963     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1964 
1965     Register BP = RegInfo->getBaseRegister(MF);
1966     if (PPC::G8RCRegClass.contains(BP)) {
1967       MinG8R = std::min<unsigned>(MinG8R, BP);
1968       HasG8SaveArea = true;
1969     } else if (PPC::GPRCRegClass.contains(BP)) {
1970       MinGPR = std::min<unsigned>(MinGPR, BP);
1971       HasGPSaveArea = true;
1972     }
1973   }
1974 
1975   // General register save area starts right below the Floating-point
1976   // register save area.
1977   if (HasGPSaveArea || HasG8SaveArea) {
1978     // Move general register save area spill slots down, taking into account
1979     // the size of the Floating-point register save area.
1980     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1981       if (!GPRegs[i].isSpilledToReg()) {
1982         int FI = GPRegs[i].getFrameIdx();
1983         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1984       }
1985     }
1986 
1987     // Move general register save area spill slots down, taking into account
1988     // the size of the Floating-point register save area.
1989     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1990       if (!G8Regs[i].isSpilledToReg()) {
1991         int FI = G8Regs[i].getFrameIdx();
1992         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1993       }
1994     }
1995 
1996     unsigned MinReg =
1997       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1998                          TRI->getEncodingValue(MinG8R));
1999 
2000     if (Subtarget.isPPC64()) {
2001       LowerBound -= (31 - MinReg + 1) * 8;
2002     } else {
2003       LowerBound -= (31 - MinReg + 1) * 4;
2004     }
2005   }
2006 
2007   // For 32-bit only, the CR save area is below the general register
2008   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2009   // to the stack pointer and hence does not need an adjustment here.
2010   // Only CR2 (the first nonvolatile spilled) has an associated frame
2011   // index so that we have a single uniform save area.
2012   if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
2013     // Adjust the frame index of the CR spill slot.
2014     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2015       unsigned Reg = CSI[i].getReg();
2016 
2017       if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2018           // Leave Darwin logic as-is.
2019           || (!Subtarget.isSVR4ABI() &&
2020               (PPC::CRBITRCRegClass.contains(Reg) ||
2021                PPC::CRRCRegClass.contains(Reg)))) {
2022         int FI = CSI[i].getFrameIdx();
2023 
2024         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2025       }
2026     }
2027 
2028     LowerBound -= 4; // The CR save area is always 4 bytes long.
2029   }
2030 
2031   if (HasVRSAVESaveArea) {
2032     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2033     //             which have the VRSAVE register class?
2034     // Adjust the frame index of the VRSAVE spill slot.
2035     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2036       unsigned Reg = CSI[i].getReg();
2037 
2038       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2039         int FI = CSI[i].getFrameIdx();
2040 
2041         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2042       }
2043     }
2044 
2045     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2046   }
2047 
2048   // Both Altivec and SPE have the same alignment and padding requirements
2049   // within the stack frame.
2050   if (HasVRSaveArea) {
2051     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2052     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2053     // we are using negative number here (the stack grows downward). We should
2054     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2055     // is the alignment size ( n = 16 here) and y is the size after aligning.
2056     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2057     LowerBound &= ~(15);
2058 
2059     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2060       int FI = VRegs[i].getFrameIdx();
2061 
2062       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2063     }
2064   }
2065 
2066   addScavengingSpillSlot(MF, RS);
2067 }
2068 
2069 void
2070 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2071                                          RegScavenger *RS) const {
2072   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2073   // a large stack, which will require scavenging a register to materialize a
2074   // large offset.
2075 
2076   // We need to have a scavenger spill slot for spills if the frame size is
2077   // large. In case there is no free register for large-offset addressing,
2078   // this slot is used for the necessary emergency spill. Also, we need the
2079   // slot for dynamic stack allocations.
2080 
2081   // The scavenger might be invoked if the frame offset does not fit into
2082   // the 16-bit immediate. We don't know the complete frame size here
2083   // because we've not yet computed callee-saved register spills or the
2084   // needed alignment padding.
2085   unsigned StackSize = determineFrameLayout(MF, true);
2086   MachineFrameInfo &MFI = MF.getFrameInfo();
2087   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2088       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2089     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2090     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2091     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2092     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2093     unsigned Size = TRI.getSpillSize(RC);
2094     unsigned Align = TRI.getSpillAlignment(RC);
2095     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2096 
2097     // Might we have over-aligned allocas?
2098     bool HasAlVars = MFI.hasVarSizedObjects() &&
2099                      MFI.getMaxAlignment() > getStackAlignment();
2100 
2101     // These kinds of spills might need two registers.
2102     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2103       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2104 
2105   }
2106 }
2107 
2108 // This function checks if a callee saved gpr can be spilled to a volatile
2109 // vector register. This occurs for leaf functions when the option
2110 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2111 // which were not spilled to vectors, return false so the target independent
2112 // code can handle them by assigning a FrameIdx to a stack slot.
2113 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2114     MachineFunction &MF, const TargetRegisterInfo *TRI,
2115     std::vector<CalleeSavedInfo> &CSI) const {
2116 
2117   if (CSI.empty())
2118     return true; // Early exit if no callee saved registers are modified!
2119 
2120   // Early exit if cannot spill gprs to volatile vector registers.
2121   MachineFrameInfo &MFI = MF.getFrameInfo();
2122   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2123     return false;
2124 
2125   // Build a BitVector of VSRs that can be used for spilling GPRs.
2126   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2127   BitVector BVCalleeSaved(TRI->getNumRegs());
2128   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2129   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2130   for (unsigned i = 0; CSRegs[i]; ++i)
2131     BVCalleeSaved.set(CSRegs[i]);
2132 
2133   for (unsigned Reg : BVAllocatable.set_bits()) {
2134     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2135     // used in the function.
2136     if (BVCalleeSaved[Reg] ||
2137         (!PPC::F8RCRegClass.contains(Reg) &&
2138          !PPC::VFRCRegClass.contains(Reg)) ||
2139         (MF.getRegInfo().isPhysRegUsed(Reg)))
2140       BVAllocatable.reset(Reg);
2141   }
2142 
2143   bool AllSpilledToReg = true;
2144   for (auto &CS : CSI) {
2145     if (BVAllocatable.none())
2146       return false;
2147 
2148     unsigned Reg = CS.getReg();
2149     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2150       AllSpilledToReg = false;
2151       continue;
2152     }
2153 
2154     unsigned VolatileVFReg = BVAllocatable.find_first();
2155     if (VolatileVFReg < BVAllocatable.size()) {
2156       CS.setDstReg(VolatileVFReg);
2157       BVAllocatable.reset(VolatileVFReg);
2158     } else {
2159       AllSpilledToReg = false;
2160     }
2161   }
2162   return AllSpilledToReg;
2163 }
2164 
2165 
2166 bool
2167 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2168                                      MachineBasicBlock::iterator MI,
2169                                      const std::vector<CalleeSavedInfo> &CSI,
2170                                      const TargetRegisterInfo *TRI) const {
2171 
2172   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2173   // Return false otherwise to maintain pre-existing behavior.
2174   if (!Subtarget.isSVR4ABI())
2175     return false;
2176 
2177   MachineFunction *MF = MBB.getParent();
2178   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2179   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2180   bool MustSaveTOC = FI->mustSaveTOC();
2181   DebugLoc DL;
2182   bool CRSpilled = false;
2183   MachineInstrBuilder CRMIB;
2184 
2185   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2186     unsigned Reg = CSI[i].getReg();
2187     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2188     if (Reg == PPC::VRSAVE)
2189       continue;
2190 
2191     // CR2 through CR4 are the nonvolatile CR fields.
2192     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2193 
2194     // Add the callee-saved register as live-in; it's killed at the spill.
2195     // Do not do this for callee-saved registers that are live-in to the
2196     // function because they will already be marked live-in and this will be
2197     // adding it for a second time. It is an error to add the same register
2198     // to the set more than once.
2199     const MachineRegisterInfo &MRI = MF->getRegInfo();
2200     bool IsLiveIn = MRI.isLiveIn(Reg);
2201     if (!IsLiveIn)
2202        MBB.addLiveIn(Reg);
2203 
2204     if (CRSpilled && IsCRField) {
2205       CRMIB.addReg(Reg, RegState::ImplicitKill);
2206       continue;
2207     }
2208 
2209     // The actual spill will happen in the prologue.
2210     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2211       continue;
2212 
2213     // Insert the spill to the stack frame.
2214     if (IsCRField) {
2215       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2216       if (Subtarget.isPPC64()) {
2217         // The actual spill will happen at the start of the prologue.
2218         FuncInfo->addMustSaveCR(Reg);
2219       } else {
2220         CRSpilled = true;
2221         FuncInfo->setSpillsCR();
2222 
2223         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2224         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2225         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2226                   .addReg(Reg, RegState::ImplicitKill);
2227 
2228         MBB.insert(MI, CRMIB);
2229         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2230                                          .addReg(PPC::R12,
2231                                                  getKillRegState(true)),
2232                                          CSI[i].getFrameIdx()));
2233       }
2234     } else {
2235       if (CSI[i].isSpilledToReg()) {
2236         NumPESpillVSR++;
2237         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2238           .addReg(Reg, getKillRegState(true));
2239       } else {
2240         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2241         // Use !IsLiveIn for the kill flag.
2242         // We do not want to kill registers that are live in this function
2243         // before their use because they will become undefined registers.
2244         // Functions without NoUnwind need to preserve the order of elements in
2245         // saved vector registers.
2246         if (Subtarget.needsSwapsForVSXMemOps() &&
2247             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2248           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2249                                        CSI[i].getFrameIdx(), RC, TRI);
2250         else
2251           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2252                                   RC, TRI);
2253       }
2254     }
2255   }
2256   return true;
2257 }
2258 
2259 static void
2260 restoreCRs(bool isPPC64, bool is31,
2261            bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2262            MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2263            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2264 
2265   MachineFunction *MF = MBB.getParent();
2266   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2267   DebugLoc DL;
2268   unsigned RestoreOp, MoveReg;
2269 
2270   if (isPPC64)
2271     // This is handled during epilogue generation.
2272     return;
2273   else {
2274     // 32-bit:  FP-relative
2275     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2276                                              PPC::R12),
2277                                      CSI[CSIIndex].getFrameIdx()));
2278     RestoreOp = PPC::MTOCRF;
2279     MoveReg = PPC::R12;
2280   }
2281 
2282   if (CR2Spilled)
2283     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2284                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2285 
2286   if (CR3Spilled)
2287     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2288                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2289 
2290   if (CR4Spilled)
2291     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2292                .addReg(MoveReg, getKillRegState(true)));
2293 }
2294 
2295 MachineBasicBlock::iterator PPCFrameLowering::
2296 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2297                               MachineBasicBlock::iterator I) const {
2298   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2299   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2300       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2301     // Add (actually subtract) back the amount the callee popped on return.
2302     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2303       bool is64Bit = Subtarget.isPPC64();
2304       CalleeAmt *= -1;
2305       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2306       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2307       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2308       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2309       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2310       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2311       const DebugLoc &dl = I->getDebugLoc();
2312 
2313       if (isInt<16>(CalleeAmt)) {
2314         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2315           .addReg(StackReg, RegState::Kill)
2316           .addImm(CalleeAmt);
2317       } else {
2318         MachineBasicBlock::iterator MBBI = I;
2319         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2320           .addImm(CalleeAmt >> 16);
2321         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2322           .addReg(TmpReg, RegState::Kill)
2323           .addImm(CalleeAmt & 0xFFFF);
2324         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2325           .addReg(StackReg, RegState::Kill)
2326           .addReg(TmpReg);
2327       }
2328     }
2329   }
2330   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2331   return MBB.erase(I);
2332 }
2333 
2334 bool
2335 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2336                                         MachineBasicBlock::iterator MI,
2337                                         std::vector<CalleeSavedInfo> &CSI,
2338                                         const TargetRegisterInfo *TRI) const {
2339 
2340   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2341   // Return false otherwise to maintain pre-existing behavior.
2342   if (!Subtarget.isSVR4ABI())
2343     return false;
2344 
2345   MachineFunction *MF = MBB.getParent();
2346   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2347   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2348   bool MustSaveTOC = FI->mustSaveTOC();
2349   bool CR2Spilled = false;
2350   bool CR3Spilled = false;
2351   bool CR4Spilled = false;
2352   unsigned CSIIndex = 0;
2353 
2354   // Initialize insertion-point logic; we will be restoring in reverse
2355   // order of spill.
2356   MachineBasicBlock::iterator I = MI, BeforeI = I;
2357   bool AtStart = I == MBB.begin();
2358 
2359   if (!AtStart)
2360     --BeforeI;
2361 
2362   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2363     unsigned Reg = CSI[i].getReg();
2364 
2365     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2366     if (Reg == PPC::VRSAVE)
2367       continue;
2368 
2369     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2370       continue;
2371 
2372     if (Reg == PPC::CR2) {
2373       CR2Spilled = true;
2374       // The spill slot is associated only with CR2, which is the
2375       // first nonvolatile spilled.  Save it here.
2376       CSIIndex = i;
2377       continue;
2378     } else if (Reg == PPC::CR3) {
2379       CR3Spilled = true;
2380       continue;
2381     } else if (Reg == PPC::CR4) {
2382       CR4Spilled = true;
2383       continue;
2384     } else {
2385       // When we first encounter a non-CR register after seeing at
2386       // least one CR register, restore all spilled CRs together.
2387       if ((CR2Spilled || CR3Spilled || CR4Spilled)
2388           && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2389         bool is31 = needsFP(*MF);
2390         restoreCRs(Subtarget.isPPC64(), is31,
2391                    CR2Spilled, CR3Spilled, CR4Spilled,
2392                    MBB, I, CSI, CSIIndex);
2393         CR2Spilled = CR3Spilled = CR4Spilled = false;
2394       }
2395 
2396       if (CSI[i].isSpilledToReg()) {
2397         DebugLoc DL;
2398         NumPEReloadVSR++;
2399         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2400             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2401       } else {
2402        // Default behavior for non-CR saves.
2403         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2404 
2405         // Functions without NoUnwind need to preserve the order of elements in
2406         // saved vector registers.
2407         if (Subtarget.needsSwapsForVSXMemOps() &&
2408             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2409           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2410                                         TRI);
2411         else
2412           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2413 
2414         assert(I != MBB.begin() &&
2415                "loadRegFromStackSlot didn't insert any code!");
2416       }
2417     }
2418 
2419     // Insert in reverse order.
2420     if (AtStart)
2421       I = MBB.begin();
2422     else {
2423       I = BeforeI;
2424       ++I;
2425     }
2426   }
2427 
2428   // If we haven't yet spilled the CRs, do so now.
2429   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2430     bool is31 = needsFP(*MF);
2431     restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2432                MBB, I, CSI, CSIIndex);
2433   }
2434 
2435   return true;
2436 }
2437 
2438 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2439   return TOCSaveOffset;
2440 }
2441 
2442 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2443   if (Subtarget.isAIXABI())
2444     report_fatal_error("FramePointer is not implemented on AIX yet.");
2445   return FramePointerSaveOffset;
2446 }
2447 
2448 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2449   if (Subtarget.isAIXABI())
2450     report_fatal_error("BasePointer is not implemented on AIX yet.");
2451   return BasePointerSaveOffset;
2452 }
2453 
2454 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2455   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2456     return false;
2457   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2458           MF.getSubtarget<PPCSubtarget>().isPPC64());
2459 }
2460