xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision a7dea1671b87c07d2d266f836bfa8b58efc7c134)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isDarwinABI() || STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // For the Darwin ABI:
64   // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
65   // for saving the frame pointer (if needed.)  While the published ABI has
66   // not used this slot since at least MacOSX 10.2, there is older code
67   // around that does use it, and that needs to continue to work.
68   if (STI.isDarwinABI())
69     return STI.isPPC64() ? -8U : -4U;
70 
71   // SVR4 ABI: First slot in the general register save area.
72   return STI.isPPC64() ? -8U : -4U;
73 }
74 
75 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
76   if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
77     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
78 
79   // 32-bit SVR4 ABI:
80   return 8;
81 }
82 
83 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
84   if (STI.isDarwinABI())
85     return STI.isPPC64() ? -16U : -8U;
86 
87   // SVR4 ABI: First slot in the general register save area.
88   return STI.isPPC64()
89              ? -16U
90              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
91 }
92 
93 static unsigned computeCRSaveOffset() {
94   // The condition register save offset needs to be updated for AIX PPC32.
95   return 8;
96 }
97 
98 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
99     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
100                           STI.getPlatformStackAlignment(), 0),
101       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
102       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
103       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
104       LinkageSize(computeLinkageSize(Subtarget)),
105       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
106       CRSaveOffset(computeCRSaveOffset()) {}
107 
108 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
109 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
110     unsigned &NumEntries) const {
111   if (Subtarget.isDarwinABI()) {
112     NumEntries = 1;
113     if (Subtarget.isPPC64()) {
114       static const SpillSlot darwin64Offsets = {PPC::X31, -8};
115       return &darwin64Offsets;
116     } else {
117       static const SpillSlot darwinOffsets = {PPC::R31, -4};
118       return &darwinOffsets;
119     }
120   }
121 
122   // Early exit if not using the SVR4 ABI.
123   if (!Subtarget.isSVR4ABI()) {
124     NumEntries = 0;
125     return nullptr;
126   }
127 
128   // Note that the offsets here overlap, but this is fixed up in
129   // processFunctionBeforeFrameFinalized.
130 
131   static const SpillSlot Offsets[] = {
132       // Floating-point register save area offsets.
133       {PPC::F31, -8},
134       {PPC::F30, -16},
135       {PPC::F29, -24},
136       {PPC::F28, -32},
137       {PPC::F27, -40},
138       {PPC::F26, -48},
139       {PPC::F25, -56},
140       {PPC::F24, -64},
141       {PPC::F23, -72},
142       {PPC::F22, -80},
143       {PPC::F21, -88},
144       {PPC::F20, -96},
145       {PPC::F19, -104},
146       {PPC::F18, -112},
147       {PPC::F17, -120},
148       {PPC::F16, -128},
149       {PPC::F15, -136},
150       {PPC::F14, -144},
151 
152       // General register save area offsets.
153       {PPC::R31, -4},
154       {PPC::R30, -8},
155       {PPC::R29, -12},
156       {PPC::R28, -16},
157       {PPC::R27, -20},
158       {PPC::R26, -24},
159       {PPC::R25, -28},
160       {PPC::R24, -32},
161       {PPC::R23, -36},
162       {PPC::R22, -40},
163       {PPC::R21, -44},
164       {PPC::R20, -48},
165       {PPC::R19, -52},
166       {PPC::R18, -56},
167       {PPC::R17, -60},
168       {PPC::R16, -64},
169       {PPC::R15, -68},
170       {PPC::R14, -72},
171 
172       // CR save area offset.  We map each of the nonvolatile CR fields
173       // to the slot for CR2, which is the first of the nonvolatile CR
174       // fields to be assigned, so that we only allocate one save slot.
175       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
176       {PPC::CR2, -4},
177 
178       // VRSAVE save area offset.
179       {PPC::VRSAVE, -4},
180 
181       // Vector register save area
182       {PPC::V31, -16},
183       {PPC::V30, -32},
184       {PPC::V29, -48},
185       {PPC::V28, -64},
186       {PPC::V27, -80},
187       {PPC::V26, -96},
188       {PPC::V25, -112},
189       {PPC::V24, -128},
190       {PPC::V23, -144},
191       {PPC::V22, -160},
192       {PPC::V21, -176},
193       {PPC::V20, -192},
194 
195       // SPE register save area (overlaps Vector save area).
196       {PPC::S31, -8},
197       {PPC::S30, -16},
198       {PPC::S29, -24},
199       {PPC::S28, -32},
200       {PPC::S27, -40},
201       {PPC::S26, -48},
202       {PPC::S25, -56},
203       {PPC::S24, -64},
204       {PPC::S23, -72},
205       {PPC::S22, -80},
206       {PPC::S21, -88},
207       {PPC::S20, -96},
208       {PPC::S19, -104},
209       {PPC::S18, -112},
210       {PPC::S17, -120},
211       {PPC::S16, -128},
212       {PPC::S15, -136},
213       {PPC::S14, -144}};
214 
215   static const SpillSlot Offsets64[] = {
216       // Floating-point register save area offsets.
217       {PPC::F31, -8},
218       {PPC::F30, -16},
219       {PPC::F29, -24},
220       {PPC::F28, -32},
221       {PPC::F27, -40},
222       {PPC::F26, -48},
223       {PPC::F25, -56},
224       {PPC::F24, -64},
225       {PPC::F23, -72},
226       {PPC::F22, -80},
227       {PPC::F21, -88},
228       {PPC::F20, -96},
229       {PPC::F19, -104},
230       {PPC::F18, -112},
231       {PPC::F17, -120},
232       {PPC::F16, -128},
233       {PPC::F15, -136},
234       {PPC::F14, -144},
235 
236       // General register save area offsets.
237       {PPC::X31, -8},
238       {PPC::X30, -16},
239       {PPC::X29, -24},
240       {PPC::X28, -32},
241       {PPC::X27, -40},
242       {PPC::X26, -48},
243       {PPC::X25, -56},
244       {PPC::X24, -64},
245       {PPC::X23, -72},
246       {PPC::X22, -80},
247       {PPC::X21, -88},
248       {PPC::X20, -96},
249       {PPC::X19, -104},
250       {PPC::X18, -112},
251       {PPC::X17, -120},
252       {PPC::X16, -128},
253       {PPC::X15, -136},
254       {PPC::X14, -144},
255 
256       // VRSAVE save area offset.
257       {PPC::VRSAVE, -4},
258 
259       // Vector register save area
260       {PPC::V31, -16},
261       {PPC::V30, -32},
262       {PPC::V29, -48},
263       {PPC::V28, -64},
264       {PPC::V27, -80},
265       {PPC::V26, -96},
266       {PPC::V25, -112},
267       {PPC::V24, -128},
268       {PPC::V23, -144},
269       {PPC::V22, -160},
270       {PPC::V21, -176},
271       {PPC::V20, -192}};
272 
273   if (Subtarget.isPPC64()) {
274     NumEntries = array_lengthof(Offsets64);
275 
276     return Offsets64;
277   } else {
278     NumEntries = array_lengthof(Offsets);
279 
280     return Offsets;
281   }
282 }
283 
284 /// RemoveVRSaveCode - We have found that this function does not need any code
285 /// to manipulate the VRSAVE register, even though it uses vector registers.
286 /// This can happen when the only registers used are known to be live in or out
287 /// of the function.  Remove all of the VRSAVE related code from the function.
288 /// FIXME: The removal of the code results in a compile failure at -O0 when the
289 /// function contains a function call, as the GPR containing original VRSAVE
290 /// contents is spilled and reloaded around the call.  Without the prolog code,
291 /// the spill instruction refers to an undefined register.  This code needs
292 /// to account for all uses of that GPR.
293 static void RemoveVRSaveCode(MachineInstr &MI) {
294   MachineBasicBlock *Entry = MI.getParent();
295   MachineFunction *MF = Entry->getParent();
296 
297   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
298   MachineBasicBlock::iterator MBBI = MI;
299   ++MBBI;
300   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
301   MBBI->eraseFromParent();
302 
303   bool RemovedAllMTVRSAVEs = true;
304   // See if we can find and remove the MTVRSAVE instruction from all of the
305   // epilog blocks.
306   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
307     // If last instruction is a return instruction, add an epilogue
308     if (I->isReturnBlock()) {
309       bool FoundIt = false;
310       for (MBBI = I->end(); MBBI != I->begin(); ) {
311         --MBBI;
312         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
313           MBBI->eraseFromParent();  // remove it.
314           FoundIt = true;
315           break;
316         }
317       }
318       RemovedAllMTVRSAVEs &= FoundIt;
319     }
320   }
321 
322   // If we found and removed all MTVRSAVE instructions, remove the read of
323   // VRSAVE as well.
324   if (RemovedAllMTVRSAVEs) {
325     MBBI = MI;
326     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
327     --MBBI;
328     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
329     MBBI->eraseFromParent();
330   }
331 
332   // Finally, nuke the UPDATE_VRSAVE.
333   MI.eraseFromParent();
334 }
335 
336 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
337 // instruction selector.  Based on the vector registers that have been used,
338 // transform this into the appropriate ORI instruction.
339 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
340   MachineFunction *MF = MI.getParent()->getParent();
341   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
342   DebugLoc dl = MI.getDebugLoc();
343 
344   const MachineRegisterInfo &MRI = MF->getRegInfo();
345   unsigned UsedRegMask = 0;
346   for (unsigned i = 0; i != 32; ++i)
347     if (MRI.isPhysRegModified(VRRegNo[i]))
348       UsedRegMask |= 1 << (31-i);
349 
350   // Live in and live out values already must be in the mask, so don't bother
351   // marking them.
352   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
353     unsigned RegNo = TRI->getEncodingValue(LI.first);
354     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
355       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
356   }
357 
358   // Live out registers appear as use operands on return instructions.
359   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
360        UsedRegMask != 0 && BI != BE; ++BI) {
361     const MachineBasicBlock &MBB = *BI;
362     if (!MBB.isReturnBlock())
363       continue;
364     const MachineInstr &Ret = MBB.back();
365     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
366       const MachineOperand &MO = Ret.getOperand(I);
367       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
368         continue;
369       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
370       UsedRegMask &= ~(1 << (31-RegNo));
371     }
372   }
373 
374   // If no registers are used, turn this into a copy.
375   if (UsedRegMask == 0) {
376     // Remove all VRSAVE code.
377     RemoveVRSaveCode(MI);
378     return;
379   }
380 
381   Register SrcReg = MI.getOperand(1).getReg();
382   Register DstReg = MI.getOperand(0).getReg();
383 
384   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
385     if (DstReg != SrcReg)
386       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
387           .addReg(SrcReg)
388           .addImm(UsedRegMask);
389     else
390       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
391           .addReg(SrcReg, RegState::Kill)
392           .addImm(UsedRegMask);
393   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
394     if (DstReg != SrcReg)
395       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
396           .addReg(SrcReg)
397           .addImm(UsedRegMask >> 16);
398     else
399       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
400           .addReg(SrcReg, RegState::Kill)
401           .addImm(UsedRegMask >> 16);
402   } else {
403     if (DstReg != SrcReg)
404       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
405           .addReg(SrcReg)
406           .addImm(UsedRegMask >> 16);
407     else
408       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
409           .addReg(SrcReg, RegState::Kill)
410           .addImm(UsedRegMask >> 16);
411 
412     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
413         .addReg(DstReg, RegState::Kill)
414         .addImm(UsedRegMask & 0xFFFF);
415   }
416 
417   // Remove the old UPDATE_VRSAVE instruction.
418   MI.eraseFromParent();
419 }
420 
421 static bool spillsCR(const MachineFunction &MF) {
422   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
423   return FuncInfo->isCRSpilled();
424 }
425 
426 static bool spillsVRSAVE(const MachineFunction &MF) {
427   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
428   return FuncInfo->isVRSAVESpilled();
429 }
430 
431 static bool hasSpills(const MachineFunction &MF) {
432   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
433   return FuncInfo->hasSpills();
434 }
435 
436 static bool hasNonRISpills(const MachineFunction &MF) {
437   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
438   return FuncInfo->hasNonRISpills();
439 }
440 
441 /// MustSaveLR - Return true if this function requires that we save the LR
442 /// register onto the stack in the prolog and restore it in the epilog of the
443 /// function.
444 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
445   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
446 
447   // We need a save/restore of LR if there is any def of LR (which is
448   // defined by calls, including the PIC setup sequence), or if there is
449   // some use of the LR stack slot (e.g. for builtin_return_address).
450   // (LR comes in 32 and 64 bit versions.)
451   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
452   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
453 }
454 
455 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
456 /// call frame size. Update the MachineFunction object with the stack size.
457 unsigned
458 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
459                                                 bool UseEstimate) const {
460   unsigned NewMaxCallFrameSize = 0;
461   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
462                                             &NewMaxCallFrameSize);
463   MF.getFrameInfo().setStackSize(FrameSize);
464   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
465   return FrameSize;
466 }
467 
468 /// determineFrameLayout - Determine the size of the frame and maximum call
469 /// frame size.
470 unsigned
471 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
472                                        bool UseEstimate,
473                                        unsigned *NewMaxCallFrameSize) const {
474   const MachineFrameInfo &MFI = MF.getFrameInfo();
475   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
476 
477   // Get the number of bytes to allocate from the FrameInfo
478   unsigned FrameSize =
479     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
480 
481   // Get stack alignments. The frame must be aligned to the greatest of these:
482   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
483   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
484   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
485 
486   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
487 
488   unsigned LR = RegInfo->getRARegister();
489   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
490   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
491                        !MFI.adjustsStack() &&       // No calls.
492                        !MustSaveLR(MF, LR) &&       // No need to save LR.
493                        !FI->mustSaveTOC() &&        // No need to save TOC.
494                        !RegInfo->hasBasePointer(MF); // No special alignment.
495 
496   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
497   // code if all local vars are reg-allocated.
498   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
499 
500   // Check whether we can skip adjusting the stack pointer (by using red zone)
501   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
502     // No need for frame
503     return 0;
504   }
505 
506   // Get the maximum call frame size of all the calls.
507   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
508 
509   // Maximum call frame needs to be at least big enough for linkage area.
510   unsigned minCallFrameSize = getLinkageSize();
511   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
512 
513   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
514   // that allocations will be aligned.
515   if (MFI.hasVarSizedObjects())
516     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
517 
518   // Update the new max call frame size if the caller passes in a valid pointer.
519   if (NewMaxCallFrameSize)
520     *NewMaxCallFrameSize = maxCallFrameSize;
521 
522   // Include call frame size in total.
523   FrameSize += maxCallFrameSize;
524 
525   // Make sure the frame is aligned.
526   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
527 
528   return FrameSize;
529 }
530 
531 // hasFP - Return true if the specified function actually has a dedicated frame
532 // pointer register.
533 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
534   const MachineFrameInfo &MFI = MF.getFrameInfo();
535   // FIXME: This is pretty much broken by design: hasFP() might be called really
536   // early, before the stack layout was calculated and thus hasFP() might return
537   // true or false here depending on the time of call.
538   return (MFI.getStackSize()) && needsFP(MF);
539 }
540 
541 // needsFP - Return true if the specified function should have a dedicated frame
542 // pointer register.  This is true if the function has variable sized allocas or
543 // if frame pointer elimination is disabled.
544 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
545   const MachineFrameInfo &MFI = MF.getFrameInfo();
546 
547   // Naked functions have no stack frame pushed, so we don't have a frame
548   // pointer.
549   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
550     return false;
551 
552   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
553     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
554     (MF.getTarget().Options.GuaranteedTailCallOpt &&
555      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
556 }
557 
558 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
559   bool is31 = needsFP(MF);
560   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
561   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
562 
563   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
564   bool HasBP = RegInfo->hasBasePointer(MF);
565   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
566   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
567 
568   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
569        BI != BE; ++BI)
570     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
571       --MBBI;
572       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
573         MachineOperand &MO = MBBI->getOperand(I);
574         if (!MO.isReg())
575           continue;
576 
577         switch (MO.getReg()) {
578         case PPC::FP:
579           MO.setReg(FPReg);
580           break;
581         case PPC::FP8:
582           MO.setReg(FP8Reg);
583           break;
584         case PPC::BP:
585           MO.setReg(BPReg);
586           break;
587         case PPC::BP8:
588           MO.setReg(BP8Reg);
589           break;
590 
591         }
592       }
593     }
594 }
595 
596 /*  This function will do the following:
597     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
598       respectively (defaults recommended by the ABI) and return true
599     - If MBB is not an entry block, initialize the register scavenger and look
600       for available registers.
601     - If the defaults (R0/R12) are available, return true
602     - If TwoUniqueRegsRequired is set to true, it looks for two unique
603       registers. Otherwise, look for a single available register.
604       - If the required registers are found, set SR1 and SR2 and return true.
605       - If the required registers are not found, set SR2 or both SR1 and SR2 to
606         PPC::NoRegister and return false.
607 
608     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
609     is not set, this function will attempt to find two different registers, but
610     still return true if only one register is available (and set SR1 == SR2).
611 */
612 bool
613 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
614                                       bool UseAtEnd,
615                                       bool TwoUniqueRegsRequired,
616                                       unsigned *SR1,
617                                       unsigned *SR2) const {
618   RegScavenger RS;
619   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
620   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
621 
622   // Set the defaults for the two scratch registers.
623   if (SR1)
624     *SR1 = R0;
625 
626   if (SR2) {
627     assert (SR1 && "Asking for the second scratch register but not the first?");
628     *SR2 = R12;
629   }
630 
631   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
632   if ((UseAtEnd && MBB->isReturnBlock()) ||
633       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
634     return true;
635 
636   RS.enterBasicBlock(*MBB);
637 
638   if (UseAtEnd && !MBB->empty()) {
639     // The scratch register will be used at the end of the block, so must
640     // consider all registers used within the block
641 
642     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
643     // If no terminator, back iterator up to previous instruction.
644     if (MBBI == MBB->end())
645       MBBI = std::prev(MBBI);
646 
647     if (MBBI != MBB->begin())
648       RS.forward(MBBI);
649   }
650 
651   // If the two registers are available, we're all good.
652   // Note that we only return here if both R0 and R12 are available because
653   // although the function may not require two unique registers, it may benefit
654   // from having two so we should try to provide them.
655   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
656     return true;
657 
658   // Get the list of callee-saved registers for the target.
659   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
660   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
661 
662   // Get all the available registers in the block.
663   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
664                                      &PPC::GPRCRegClass);
665 
666   // We shouldn't use callee-saved registers as scratch registers as they may be
667   // available when looking for a candidate block for shrink wrapping but not
668   // available when the actual prologue/epilogue is being emitted because they
669   // were added as live-in to the prologue block by PrologueEpilogueInserter.
670   for (int i = 0; CSRegs[i]; ++i)
671     BV.reset(CSRegs[i]);
672 
673   // Set the first scratch register to the first available one.
674   if (SR1) {
675     int FirstScratchReg = BV.find_first();
676     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
677   }
678 
679   // If there is another one available, set the second scratch register to that.
680   // Otherwise, set it to either PPC::NoRegister if this function requires two
681   // or to whatever SR1 is set to if this function doesn't require two.
682   if (SR2) {
683     int SecondScratchReg = BV.find_next(*SR1);
684     if (SecondScratchReg != -1)
685       *SR2 = SecondScratchReg;
686     else
687       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
688   }
689 
690   // Now that we've done our best to provide both registers, double check
691   // whether we were unable to provide enough.
692   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
693     return false;
694 
695   return true;
696 }
697 
698 // We need a scratch register for spilling LR and for spilling CR. By default,
699 // we use two scratch registers to hide latency. However, if only one scratch
700 // register is available, we can adjust for that by not overlapping the spill
701 // code. However, if we need to realign the stack (i.e. have a base pointer)
702 // and the stack frame is large, we need two scratch registers.
703 bool
704 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
705   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
706   MachineFunction &MF = *(MBB->getParent());
707   bool HasBP = RegInfo->hasBasePointer(MF);
708   unsigned FrameSize = determineFrameLayout(MF);
709   int NegFrameSize = -FrameSize;
710   bool IsLargeFrame = !isInt<16>(NegFrameSize);
711   MachineFrameInfo &MFI = MF.getFrameInfo();
712   unsigned MaxAlign = MFI.getMaxAlignment();
713   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
714 
715   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
716 }
717 
718 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
719   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
720 
721   return findScratchRegister(TmpMBB, false,
722                              twoUniqueScratchRegsRequired(TmpMBB));
723 }
724 
725 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
726   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
727 
728   return findScratchRegister(TmpMBB, true);
729 }
730 
731 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
732   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
733   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
734 
735   // Abort if there is no register info or function info.
736   if (!RegInfo || !FI)
737     return false;
738 
739   // Only move the stack update on ELFv2 ABI and PPC64.
740   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
741     return false;
742 
743   // Check the frame size first and return false if it does not fit the
744   // requirements.
745   // We need a non-zero frame size as well as a frame that will fit in the red
746   // zone. This is because by moving the stack pointer update we are now storing
747   // to the red zone until the stack pointer is updated. If we get an interrupt
748   // inside the prologue but before the stack update we now have a number of
749   // stores to the red zone and those stores must all fit.
750   MachineFrameInfo &MFI = MF.getFrameInfo();
751   unsigned FrameSize = MFI.getStackSize();
752   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
753     return false;
754 
755   // Frame pointers and base pointers complicate matters so don't do anything
756   // if we have them. For example having a frame pointer will sometimes require
757   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
758   // difficult.
759   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
760     return false;
761 
762   // Calls to fast_cc functions use different rules for passing parameters on
763   // the stack from the ABI and using PIC base in the function imposes
764   // similar restrictions to using the base pointer. It is not generally safe
765   // to move the stack pointer update in these situations.
766   if (FI->hasFastCall() || FI->usesPICBase())
767     return false;
768 
769   // Finally we can move the stack update if we do not require register
770   // scavenging. Register scavenging can introduce more spills and so
771   // may make the frame size larger than we have computed.
772   return !RegInfo->requiresFrameIndexScavenging(MF);
773 }
774 
775 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
776                                     MachineBasicBlock &MBB) const {
777   MachineBasicBlock::iterator MBBI = MBB.begin();
778   MachineFrameInfo &MFI = MF.getFrameInfo();
779   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
780   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
781 
782   MachineModuleInfo &MMI = MF.getMMI();
783   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
784   DebugLoc dl;
785   bool needsCFI = MMI.hasDebugInfo() ||
786     MF.getFunction().needsUnwindTableEntry();
787 
788   // Get processor type.
789   bool isPPC64 = Subtarget.isPPC64();
790   // Get the ABI.
791   bool isSVR4ABI = Subtarget.isSVR4ABI();
792   bool isAIXABI = Subtarget.isAIXABI();
793   bool isELFv2ABI = Subtarget.isELFv2ABI();
794   assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
795          "Unsupported PPC ABI.");
796 
797   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
798   // process it.
799   if (!isSVR4ABI)
800     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
801       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
802         if (isAIXABI)
803           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
804         HandleVRSaveUpdate(*MBBI, TII);
805         break;
806       }
807     }
808 
809   // Move MBBI back to the beginning of the prologue block.
810   MBBI = MBB.begin();
811 
812   // Work out frame sizes.
813   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
814   int NegFrameSize = -FrameSize;
815   if (!isInt<32>(NegFrameSize))
816     llvm_unreachable("Unhandled stack size!");
817 
818   if (MFI.isFrameAddressTaken())
819     replaceFPWithRealFP(MF);
820 
821   // Check if the link register (LR) must be saved.
822   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
823   bool MustSaveLR = FI->mustSaveLR();
824   bool MustSaveTOC = FI->mustSaveTOC();
825   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
826   bool MustSaveCR = !MustSaveCRs.empty();
827   // Do we have a frame pointer and/or base pointer for this function?
828   bool HasFP = hasFP(MF);
829   bool HasBP = RegInfo->hasBasePointer(MF);
830   bool HasRedZone = isPPC64 || !isSVR4ABI;
831 
832   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
833   Register BPReg = RegInfo->getBaseRegister(MF);
834   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
835   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
836   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
837   unsigned ScratchReg  = 0;
838   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
839   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
840   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
841                                                 : PPC::MFLR );
842   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
843                                                  : PPC::STW );
844   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
845                                                      : PPC::STWU );
846   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
847                                                         : PPC::STWUX);
848   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
849                                                           : PPC::LIS );
850   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
851                                                  : PPC::ORI );
852   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
853                                               : PPC::OR );
854   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
855                                                             : PPC::SUBFC);
856   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
857                                                                : PPC::SUBFIC);
858 
859   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
860   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
861   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
862   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
863   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
864          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
865 
866   // Using the same bool variable as below to suppress compiler warnings.
867   bool SingleScratchReg =
868     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
869                         &ScratchReg, &TempReg);
870   assert(SingleScratchReg &&
871          "Required number of registers not available in this block");
872 
873   SingleScratchReg = ScratchReg == TempReg;
874 
875   int LROffset = getReturnSaveOffset();
876 
877   int FPOffset = 0;
878   if (HasFP) {
879     if (isSVR4ABI) {
880       MachineFrameInfo &MFI = MF.getFrameInfo();
881       int FPIndex = FI->getFramePointerSaveIndex();
882       assert(FPIndex && "No Frame Pointer Save Slot!");
883       FPOffset = MFI.getObjectOffset(FPIndex);
884     } else {
885       FPOffset = getFramePointerSaveOffset();
886     }
887   }
888 
889   int BPOffset = 0;
890   if (HasBP) {
891     if (isSVR4ABI) {
892       MachineFrameInfo &MFI = MF.getFrameInfo();
893       int BPIndex = FI->getBasePointerSaveIndex();
894       assert(BPIndex && "No Base Pointer Save Slot!");
895       BPOffset = MFI.getObjectOffset(BPIndex);
896     } else {
897       BPOffset = getBasePointerSaveOffset();
898     }
899   }
900 
901   int PBPOffset = 0;
902   if (FI->usesPICBase()) {
903     MachineFrameInfo &MFI = MF.getFrameInfo();
904     int PBPIndex = FI->getPICBasePointerSaveIndex();
905     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
906     PBPOffset = MFI.getObjectOffset(PBPIndex);
907   }
908 
909   // Get stack alignments.
910   unsigned MaxAlign = MFI.getMaxAlignment();
911   if (HasBP && MaxAlign > 1)
912     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
913            "Invalid alignment!");
914 
915   // Frames of 32KB & larger require special handling because they cannot be
916   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
917   bool isLargeFrame = !isInt<16>(NegFrameSize);
918 
919   assert((isPPC64 || !MustSaveCR) &&
920          "Prologue CR saving supported only in 64-bit mode");
921 
922   if (MustSaveCR && isAIXABI)
923     report_fatal_error("Prologue CR saving is unimplemented on AIX.");
924 
925   // Check if we can move the stack update instruction (stdu) down the prologue
926   // past the callee saves. Hopefully this will avoid the situation where the
927   // saves are waiting for the update on the store with update to complete.
928   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
929   bool MovingStackUpdateDown = false;
930 
931   // Check if we can move the stack update.
932   if (stackUpdateCanBeMoved(MF)) {
933     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
934     for (CalleeSavedInfo CSI : Info) {
935       int FrIdx = CSI.getFrameIdx();
936       // If the frame index is not negative the callee saved info belongs to a
937       // stack object that is not a fixed stack object. We ignore non-fixed
938       // stack objects because we won't move the stack update pointer past them.
939       if (FrIdx >= 0)
940         continue;
941 
942       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
943         StackUpdateLoc++;
944         MovingStackUpdateDown = true;
945       } else {
946         // We need all of the Frame Indices to meet these conditions.
947         // If they do not, abort the whole operation.
948         StackUpdateLoc = MBBI;
949         MovingStackUpdateDown = false;
950         break;
951       }
952     }
953 
954     // If the operation was not aborted then update the object offset.
955     if (MovingStackUpdateDown) {
956       for (CalleeSavedInfo CSI : Info) {
957         int FrIdx = CSI.getFrameIdx();
958         if (FrIdx < 0)
959           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
960       }
961     }
962   }
963 
964   // If we need to spill the CR and the LR but we don't have two separate
965   // registers available, we must spill them one at a time
966   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
967     // In the ELFv2 ABI, we are not required to save all CR fields.
968     // If only one or two CR fields are clobbered, it is more efficient to use
969     // mfocrf to selectively save just those fields, because mfocrf has short
970     // latency compares to mfcr.
971     unsigned MfcrOpcode = PPC::MFCR8;
972     unsigned CrState = RegState::ImplicitKill;
973     if (isELFv2ABI && MustSaveCRs.size() == 1) {
974       MfcrOpcode = PPC::MFOCRF8;
975       CrState = RegState::Kill;
976     }
977     MachineInstrBuilder MIB =
978       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
979     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
980       MIB.addReg(MustSaveCRs[i], CrState);
981     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
982       .addReg(TempReg, getKillRegState(true))
983       .addImm(getCRSaveOffset())
984       .addReg(SPReg);
985   }
986 
987   if (MustSaveLR)
988     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
989 
990   if (MustSaveCR &&
991       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
992     // In the ELFv2 ABI, we are not required to save all CR fields.
993     // If only one or two CR fields are clobbered, it is more efficient to use
994     // mfocrf to selectively save just those fields, because mfocrf has short
995     // latency compares to mfcr.
996     unsigned MfcrOpcode = PPC::MFCR8;
997     unsigned CrState = RegState::ImplicitKill;
998     if (isELFv2ABI && MustSaveCRs.size() == 1) {
999       MfcrOpcode = PPC::MFOCRF8;
1000       CrState = RegState::Kill;
1001     }
1002     MachineInstrBuilder MIB =
1003       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
1004     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1005       MIB.addReg(MustSaveCRs[i], CrState);
1006   }
1007 
1008   if (HasRedZone) {
1009     if (HasFP)
1010       BuildMI(MBB, MBBI, dl, StoreInst)
1011         .addReg(FPReg)
1012         .addImm(FPOffset)
1013         .addReg(SPReg);
1014     if (FI->usesPICBase())
1015       BuildMI(MBB, MBBI, dl, StoreInst)
1016         .addReg(PPC::R30)
1017         .addImm(PBPOffset)
1018         .addReg(SPReg);
1019     if (HasBP)
1020       BuildMI(MBB, MBBI, dl, StoreInst)
1021         .addReg(BPReg)
1022         .addImm(BPOffset)
1023         .addReg(SPReg);
1024   }
1025 
1026   if (MustSaveLR)
1027     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1028       .addReg(ScratchReg, getKillRegState(true))
1029       .addImm(LROffset)
1030       .addReg(SPReg);
1031 
1032   if (MustSaveCR &&
1033       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1034     assert(HasRedZone && "A red zone is always available on PPC64");
1035     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1036       .addReg(TempReg, getKillRegState(true))
1037       .addImm(getCRSaveOffset())
1038       .addReg(SPReg);
1039   }
1040 
1041   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1042   if (!FrameSize)
1043     return;
1044 
1045   // Adjust stack pointer: r1 += NegFrameSize.
1046   // If there is a preferred stack alignment, align R1 now
1047 
1048   if (HasBP && HasRedZone) {
1049     // Save a copy of r1 as the base pointer.
1050     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1051       .addReg(SPReg)
1052       .addReg(SPReg);
1053   }
1054 
1055   // Have we generated a STUX instruction to claim stack frame? If so,
1056   // the negated frame size will be placed in ScratchReg.
1057   bool HasSTUX = false;
1058 
1059   // This condition must be kept in sync with canUseAsPrologue.
1060   if (HasBP && MaxAlign > 1) {
1061     if (isPPC64)
1062       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1063         .addReg(SPReg)
1064         .addImm(0)
1065         .addImm(64 - Log2_32(MaxAlign));
1066     else // PPC32...
1067       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1068         .addReg(SPReg)
1069         .addImm(0)
1070         .addImm(32 - Log2_32(MaxAlign))
1071         .addImm(31);
1072     if (!isLargeFrame) {
1073       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1074         .addReg(ScratchReg, RegState::Kill)
1075         .addImm(NegFrameSize);
1076     } else {
1077       assert(!SingleScratchReg && "Only a single scratch reg available");
1078       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1079         .addImm(NegFrameSize >> 16);
1080       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1081         .addReg(TempReg, RegState::Kill)
1082         .addImm(NegFrameSize & 0xFFFF);
1083       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1084         .addReg(ScratchReg, RegState::Kill)
1085         .addReg(TempReg, RegState::Kill);
1086     }
1087 
1088     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1089       .addReg(SPReg, RegState::Kill)
1090       .addReg(SPReg)
1091       .addReg(ScratchReg);
1092     HasSTUX = true;
1093 
1094   } else if (!isLargeFrame) {
1095     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1096       .addReg(SPReg)
1097       .addImm(NegFrameSize)
1098       .addReg(SPReg);
1099 
1100   } else {
1101     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1102       .addImm(NegFrameSize >> 16);
1103     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1104       .addReg(ScratchReg, RegState::Kill)
1105       .addImm(NegFrameSize & 0xFFFF);
1106     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1107       .addReg(SPReg, RegState::Kill)
1108       .addReg(SPReg)
1109       .addReg(ScratchReg);
1110     HasSTUX = true;
1111   }
1112 
1113   // Save the TOC register after the stack pointer update if a prologue TOC
1114   // save is required for the function.
1115   if (MustSaveTOC) {
1116     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1117     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1118       .addReg(TOCReg, getKillRegState(true))
1119       .addImm(TOCSaveOffset)
1120       .addReg(SPReg);
1121   }
1122 
1123   if (!HasRedZone) {
1124     assert(!isPPC64 && "A red zone is always available on PPC64");
1125     if (HasSTUX) {
1126       // The negated frame size is in ScratchReg, and the SPReg has been
1127       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1128       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1129       // the stack frame (i.e. the old SP), ideally, we would put the old
1130       // SP into a register and use it as the base for the stores. The
1131       // problem is that the only available register may be ScratchReg,
1132       // which could be R0, and R0 cannot be used as a base address.
1133 
1134       // First, set ScratchReg to the old SP. This may need to be modified
1135       // later.
1136       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1137         .addReg(ScratchReg, RegState::Kill)
1138         .addReg(SPReg);
1139 
1140       if (ScratchReg == PPC::R0) {
1141         // R0 cannot be used as a base register, but it can be used as an
1142         // index in a store-indexed.
1143         int LastOffset = 0;
1144         if (HasFP)  {
1145           // R0 += (FPOffset-LastOffset).
1146           // Need addic, since addi treats R0 as 0.
1147           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1148             .addReg(ScratchReg)
1149             .addImm(FPOffset-LastOffset);
1150           LastOffset = FPOffset;
1151           // Store FP into *R0.
1152           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1153             .addReg(FPReg, RegState::Kill)  // Save FP.
1154             .addReg(PPC::ZERO)
1155             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1156         }
1157         if (FI->usesPICBase()) {
1158           // R0 += (PBPOffset-LastOffset).
1159           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1160             .addReg(ScratchReg)
1161             .addImm(PBPOffset-LastOffset);
1162           LastOffset = PBPOffset;
1163           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1164             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1165             .addReg(PPC::ZERO)
1166             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1167         }
1168         if (HasBP) {
1169           // R0 += (BPOffset-LastOffset).
1170           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1171             .addReg(ScratchReg)
1172             .addImm(BPOffset-LastOffset);
1173           LastOffset = BPOffset;
1174           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1175             .addReg(BPReg, RegState::Kill)  // Save BP.
1176             .addReg(PPC::ZERO)
1177             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1178           // BP = R0-LastOffset
1179           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1180             .addReg(ScratchReg, RegState::Kill)
1181             .addImm(-LastOffset);
1182         }
1183       } else {
1184         // ScratchReg is not R0, so use it as the base register. It is
1185         // already set to the old SP, so we can use the offsets directly.
1186 
1187         // Now that the stack frame has been allocated, save all the necessary
1188         // registers using ScratchReg as the base address.
1189         if (HasFP)
1190           BuildMI(MBB, MBBI, dl, StoreInst)
1191             .addReg(FPReg)
1192             .addImm(FPOffset)
1193             .addReg(ScratchReg);
1194         if (FI->usesPICBase())
1195           BuildMI(MBB, MBBI, dl, StoreInst)
1196             .addReg(PPC::R30)
1197             .addImm(PBPOffset)
1198             .addReg(ScratchReg);
1199         if (HasBP) {
1200           BuildMI(MBB, MBBI, dl, StoreInst)
1201             .addReg(BPReg)
1202             .addImm(BPOffset)
1203             .addReg(ScratchReg);
1204           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1205             .addReg(ScratchReg, RegState::Kill)
1206             .addReg(ScratchReg);
1207         }
1208       }
1209     } else {
1210       // The frame size is a known 16-bit constant (fitting in the immediate
1211       // field of STWU). To be here we have to be compiling for PPC32.
1212       // Since the SPReg has been decreased by FrameSize, add it back to each
1213       // offset.
1214       if (HasFP)
1215         BuildMI(MBB, MBBI, dl, StoreInst)
1216           .addReg(FPReg)
1217           .addImm(FrameSize + FPOffset)
1218           .addReg(SPReg);
1219       if (FI->usesPICBase())
1220         BuildMI(MBB, MBBI, dl, StoreInst)
1221           .addReg(PPC::R30)
1222           .addImm(FrameSize + PBPOffset)
1223           .addReg(SPReg);
1224       if (HasBP) {
1225         BuildMI(MBB, MBBI, dl, StoreInst)
1226           .addReg(BPReg)
1227           .addImm(FrameSize + BPOffset)
1228           .addReg(SPReg);
1229         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1230           .addReg(SPReg)
1231           .addImm(FrameSize);
1232       }
1233     }
1234   }
1235 
1236   // Add Call Frame Information for the instructions we generated above.
1237   if (needsCFI) {
1238     unsigned CFIIndex;
1239 
1240     if (HasBP) {
1241       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1242       // because if the stack needed aligning then CFA won't be at a fixed
1243       // offset from FP/SP.
1244       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1245       CFIIndex = MF.addFrameInst(
1246           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1247     } else {
1248       // Adjust the definition of CFA to account for the change in SP.
1249       assert(NegFrameSize);
1250       CFIIndex = MF.addFrameInst(
1251           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1252     }
1253     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1254         .addCFIIndex(CFIIndex);
1255 
1256     if (HasFP) {
1257       // Describe where FP was saved, at a fixed offset from CFA.
1258       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1259       CFIIndex = MF.addFrameInst(
1260           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1261       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1262           .addCFIIndex(CFIIndex);
1263     }
1264 
1265     if (FI->usesPICBase()) {
1266       // Describe where FP was saved, at a fixed offset from CFA.
1267       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1268       CFIIndex = MF.addFrameInst(
1269           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1270       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1271           .addCFIIndex(CFIIndex);
1272     }
1273 
1274     if (HasBP) {
1275       // Describe where BP was saved, at a fixed offset from CFA.
1276       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1277       CFIIndex = MF.addFrameInst(
1278           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1279       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1280           .addCFIIndex(CFIIndex);
1281     }
1282 
1283     if (MustSaveLR) {
1284       // Describe where LR was saved, at a fixed offset from CFA.
1285       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1286       CFIIndex = MF.addFrameInst(
1287           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1288       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1289           .addCFIIndex(CFIIndex);
1290     }
1291   }
1292 
1293   // If there is a frame pointer, copy R1 into R31
1294   if (HasFP) {
1295     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1296       .addReg(SPReg)
1297       .addReg(SPReg);
1298 
1299     if (!HasBP && needsCFI) {
1300       // Change the definition of CFA from SP+offset to FP+offset, because SP
1301       // will change at every alloca.
1302       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1303       unsigned CFIIndex = MF.addFrameInst(
1304           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1305 
1306       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1307           .addCFIIndex(CFIIndex);
1308     }
1309   }
1310 
1311   if (needsCFI) {
1312     // Describe where callee saved registers were saved, at fixed offsets from
1313     // CFA.
1314     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1315     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1316       unsigned Reg = CSI[I].getReg();
1317       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1318 
1319       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1320       // subregisters of CR2. We just need to emit a move of CR2.
1321       if (PPC::CRBITRCRegClass.contains(Reg))
1322         continue;
1323 
1324       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1325         continue;
1326 
1327       // For SVR4, don't emit a move for the CR spill slot if we haven't
1328       // spilled CRs.
1329       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1330           && !MustSaveCR)
1331         continue;
1332 
1333       // For 64-bit SVR4 when we have spilled CRs, the spill location
1334       // is SP+8, not a frame-relative slot.
1335       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1336         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1337         // the whole CR word.  In the ELFv2 ABI, every CR that was
1338         // actually saved gets its own CFI record.
1339         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1340         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1341             nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
1342         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1343             .addCFIIndex(CFIIndex);
1344         continue;
1345       }
1346 
1347       if (CSI[I].isSpilledToReg()) {
1348         unsigned SpilledReg = CSI[I].getDstReg();
1349         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1350             nullptr, MRI->getDwarfRegNum(Reg, true),
1351             MRI->getDwarfRegNum(SpilledReg, true)));
1352         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1353           .addCFIIndex(CFIRegister);
1354       } else {
1355         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1356         // We have changed the object offset above but we do not want to change
1357         // the actual offsets in the CFI instruction so we have to undo the
1358         // offset change here.
1359         if (MovingStackUpdateDown)
1360           Offset -= NegFrameSize;
1361 
1362         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1363             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1364         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1365             .addCFIIndex(CFIIndex);
1366       }
1367     }
1368   }
1369 }
1370 
1371 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1372                                     MachineBasicBlock &MBB) const {
1373   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1374   DebugLoc dl;
1375 
1376   if (MBBI != MBB.end())
1377     dl = MBBI->getDebugLoc();
1378 
1379   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1380   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1381 
1382   // Get alignment info so we know how to restore the SP.
1383   const MachineFrameInfo &MFI = MF.getFrameInfo();
1384 
1385   // Get the number of bytes allocated from the FrameInfo.
1386   int FrameSize = MFI.getStackSize();
1387 
1388   // Get processor type.
1389   bool isPPC64 = Subtarget.isPPC64();
1390   // Get the ABI.
1391   bool isSVR4ABI = Subtarget.isSVR4ABI();
1392 
1393   // Check if the link register (LR) has been saved.
1394   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1395   bool MustSaveLR = FI->mustSaveLR();
1396   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1397   bool MustSaveCR = !MustSaveCRs.empty();
1398   // Do we have a frame pointer and/or base pointer for this function?
1399   bool HasFP = hasFP(MF);
1400   bool HasBP = RegInfo->hasBasePointer(MF);
1401   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1402 
1403   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1404   Register BPReg = RegInfo->getBaseRegister(MF);
1405   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1406   unsigned ScratchReg = 0;
1407   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1408   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1409                                                  : PPC::MTLR );
1410   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1411                                                  : PPC::LWZ );
1412   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1413                                                            : PPC::LIS );
1414   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1415                                               : PPC::OR );
1416   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1417                                                   : PPC::ORI );
1418   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1419                                                    : PPC::ADDI );
1420   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1421                                                 : PPC::ADD4 );
1422 
1423   int LROffset = getReturnSaveOffset();
1424 
1425   int FPOffset = 0;
1426 
1427   // Using the same bool variable as below to suppress compiler warnings.
1428   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1429                                               &TempReg);
1430   assert(SingleScratchReg &&
1431          "Could not find an available scratch register");
1432 
1433   SingleScratchReg = ScratchReg == TempReg;
1434 
1435   if (HasFP) {
1436     if (isSVR4ABI) {
1437       int FPIndex = FI->getFramePointerSaveIndex();
1438       assert(FPIndex && "No Frame Pointer Save Slot!");
1439       FPOffset = MFI.getObjectOffset(FPIndex);
1440     } else {
1441       FPOffset = getFramePointerSaveOffset();
1442     }
1443   }
1444 
1445   int BPOffset = 0;
1446   if (HasBP) {
1447     if (isSVR4ABI) {
1448       int BPIndex = FI->getBasePointerSaveIndex();
1449       assert(BPIndex && "No Base Pointer Save Slot!");
1450       BPOffset = MFI.getObjectOffset(BPIndex);
1451     } else {
1452       BPOffset = getBasePointerSaveOffset();
1453     }
1454   }
1455 
1456   int PBPOffset = 0;
1457   if (FI->usesPICBase()) {
1458     int PBPIndex = FI->getPICBasePointerSaveIndex();
1459     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1460     PBPOffset = MFI.getObjectOffset(PBPIndex);
1461   }
1462 
1463   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1464 
1465   if (IsReturnBlock) {
1466     unsigned RetOpcode = MBBI->getOpcode();
1467     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1468                       RetOpcode == PPC::TCRETURNdi ||
1469                       RetOpcode == PPC::TCRETURNai ||
1470                       RetOpcode == PPC::TCRETURNri8 ||
1471                       RetOpcode == PPC::TCRETURNdi8 ||
1472                       RetOpcode == PPC::TCRETURNai8;
1473 
1474     if (UsesTCRet) {
1475       int MaxTCRetDelta = FI->getTailCallSPDelta();
1476       MachineOperand &StackAdjust = MBBI->getOperand(1);
1477       assert(StackAdjust.isImm() && "Expecting immediate value.");
1478       // Adjust stack pointer.
1479       int StackAdj = StackAdjust.getImm();
1480       int Delta = StackAdj - MaxTCRetDelta;
1481       assert((Delta >= 0) && "Delta must be positive");
1482       if (MaxTCRetDelta>0)
1483         FrameSize += (StackAdj +Delta);
1484       else
1485         FrameSize += StackAdj;
1486     }
1487   }
1488 
1489   // Frames of 32KB & larger require special handling because they cannot be
1490   // indexed into with a simple LD/LWZ immediate offset operand.
1491   bool isLargeFrame = !isInt<16>(FrameSize);
1492 
1493   // On targets without red zone, the SP needs to be restored last, so that
1494   // all live contents of the stack frame are upwards of the SP. This means
1495   // that we cannot restore SP just now, since there may be more registers
1496   // to restore from the stack frame (e.g. R31). If the frame size is not
1497   // a simple immediate value, we will need a spare register to hold the
1498   // restored SP. If the frame size is known and small, we can simply adjust
1499   // the offsets of the registers to be restored, and still use SP to restore
1500   // them. In such case, the final update of SP will be to add the frame
1501   // size to it.
1502   // To simplify the code, set RBReg to the base register used to restore
1503   // values from the stack, and set SPAdd to the value that needs to be added
1504   // to the SP at the end. The default values are as if red zone was present.
1505   unsigned RBReg = SPReg;
1506   unsigned SPAdd = 0;
1507 
1508   // Check if we can move the stack update instruction up the epilogue
1509   // past the callee saves. This will allow the move to LR instruction
1510   // to be executed before the restores of the callee saves which means
1511   // that the callee saves can hide the latency from the MTLR instrcution.
1512   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1513   if (stackUpdateCanBeMoved(MF)) {
1514     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1515     for (CalleeSavedInfo CSI : Info) {
1516       int FrIdx = CSI.getFrameIdx();
1517       // If the frame index is not negative the callee saved info belongs to a
1518       // stack object that is not a fixed stack object. We ignore non-fixed
1519       // stack objects because we won't move the update of the stack pointer
1520       // past them.
1521       if (FrIdx >= 0)
1522         continue;
1523 
1524       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1525         StackUpdateLoc--;
1526       else {
1527         // Abort the operation as we can't update all CSR restores.
1528         StackUpdateLoc = MBBI;
1529         break;
1530       }
1531     }
1532   }
1533 
1534   if (FrameSize) {
1535     // In the prologue, the loaded (or persistent) stack pointer value is
1536     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1537     // zone add this offset back now.
1538 
1539     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1540     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1541     // call which invalidates the stack pointer value in SP(0). So we use the
1542     // value of R31 in this case.
1543     if (FI->hasFastCall()) {
1544       assert(HasFP && "Expecting a valid frame pointer.");
1545       if (!HasRedZone)
1546         RBReg = FPReg;
1547       if (!isLargeFrame) {
1548         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1549           .addReg(FPReg).addImm(FrameSize);
1550       } else {
1551         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1552           .addImm(FrameSize >> 16);
1553         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1554           .addReg(ScratchReg, RegState::Kill)
1555           .addImm(FrameSize & 0xFFFF);
1556         BuildMI(MBB, MBBI, dl, AddInst)
1557           .addReg(RBReg)
1558           .addReg(FPReg)
1559           .addReg(ScratchReg);
1560       }
1561     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1562       if (HasRedZone) {
1563         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1564           .addReg(SPReg)
1565           .addImm(FrameSize);
1566       } else {
1567         // Make sure that adding FrameSize will not overflow the max offset
1568         // size.
1569         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1570                "Local offsets should be negative");
1571         SPAdd = FrameSize;
1572         FPOffset += FrameSize;
1573         BPOffset += FrameSize;
1574         PBPOffset += FrameSize;
1575       }
1576     } else {
1577       // We don't want to use ScratchReg as a base register, because it
1578       // could happen to be R0. Use FP instead, but make sure to preserve it.
1579       if (!HasRedZone) {
1580         // If FP is not saved, copy it to ScratchReg.
1581         if (!HasFP)
1582           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1583             .addReg(FPReg)
1584             .addReg(FPReg);
1585         RBReg = FPReg;
1586       }
1587       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1588         .addImm(0)
1589         .addReg(SPReg);
1590     }
1591   }
1592   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1593   // If there is no red zone, ScratchReg may be needed for holding a useful
1594   // value (although not the base register). Make sure it is not overwritten
1595   // too early.
1596 
1597   assert((isPPC64 || !MustSaveCR) &&
1598          "Epilogue CR restoring supported only in 64-bit mode");
1599 
1600   // If we need to restore both the LR and the CR and we only have one
1601   // available scratch register, we must do them one at a time.
1602   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1603     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1604     // is live here.
1605     assert(HasRedZone && "Expecting red zone");
1606     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1607       .addImm(getCRSaveOffset())
1608       .addReg(SPReg);
1609     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1610       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1611         .addReg(TempReg, getKillRegState(i == e-1));
1612   }
1613 
1614   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1615   // LR is stored in the caller's stack frame. ScratchReg will be needed
1616   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1617   // a base register anyway, because it may happen to be R0.
1618   bool LoadedLR = false;
1619   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1620     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1621       .addImm(LROffset+SPAdd)
1622       .addReg(RBReg);
1623     LoadedLR = true;
1624   }
1625 
1626   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1627     // This will only occur for PPC64.
1628     assert(isPPC64 && "Expecting 64-bit mode");
1629     assert(RBReg == SPReg && "Should be using SP as a base register");
1630     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1631       .addImm(getCRSaveOffset())
1632       .addReg(RBReg);
1633   }
1634 
1635   if (HasFP) {
1636     // If there is red zone, restore FP directly, since SP has already been
1637     // restored. Otherwise, restore the value of FP into ScratchReg.
1638     if (HasRedZone || RBReg == SPReg)
1639       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1640         .addImm(FPOffset)
1641         .addReg(SPReg);
1642     else
1643       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1644         .addImm(FPOffset)
1645         .addReg(RBReg);
1646   }
1647 
1648   if (FI->usesPICBase())
1649     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1650       .addImm(PBPOffset)
1651       .addReg(RBReg);
1652 
1653   if (HasBP)
1654     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1655       .addImm(BPOffset)
1656       .addReg(RBReg);
1657 
1658   // There is nothing more to be loaded from the stack, so now we can
1659   // restore SP: SP = RBReg + SPAdd.
1660   if (RBReg != SPReg || SPAdd != 0) {
1661     assert(!HasRedZone && "This should not happen with red zone");
1662     // If SPAdd is 0, generate a copy.
1663     if (SPAdd == 0)
1664       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1665         .addReg(RBReg)
1666         .addReg(RBReg);
1667     else
1668       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1669         .addReg(RBReg)
1670         .addImm(SPAdd);
1671 
1672     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1673     if (RBReg == FPReg)
1674       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1675         .addReg(ScratchReg)
1676         .addReg(ScratchReg);
1677 
1678     // Now load the LR from the caller's stack frame.
1679     if (MustSaveLR && !LoadedLR)
1680       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1681         .addImm(LROffset)
1682         .addReg(SPReg);
1683   }
1684 
1685   if (MustSaveCR &&
1686       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1687     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1688       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1689         .addReg(TempReg, getKillRegState(i == e-1));
1690 
1691   if (MustSaveLR)
1692     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1693 
1694   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1695   // call optimization
1696   if (IsReturnBlock) {
1697     unsigned RetOpcode = MBBI->getOpcode();
1698     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1699         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1700         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1701       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1702       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1703 
1704       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1705         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1706           .addReg(SPReg).addImm(CallerAllocatedAmt);
1707       } else {
1708         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1709           .addImm(CallerAllocatedAmt >> 16);
1710         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1711           .addReg(ScratchReg, RegState::Kill)
1712           .addImm(CallerAllocatedAmt & 0xFFFF);
1713         BuildMI(MBB, MBBI, dl, AddInst)
1714           .addReg(SPReg)
1715           .addReg(FPReg)
1716           .addReg(ScratchReg);
1717       }
1718     } else {
1719       createTailCallBranchInstr(MBB);
1720     }
1721   }
1722 }
1723 
1724 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1725   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1726 
1727   // If we got this far a first terminator should exist.
1728   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1729 
1730   DebugLoc dl = MBBI->getDebugLoc();
1731   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1732 
1733   // Create branch instruction for pseudo tail call return instruction
1734   unsigned RetOpcode = MBBI->getOpcode();
1735   if (RetOpcode == PPC::TCRETURNdi) {
1736     MBBI = MBB.getLastNonDebugInstr();
1737     MachineOperand &JumpTarget = MBBI->getOperand(0);
1738     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1739       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1740   } else if (RetOpcode == PPC::TCRETURNri) {
1741     MBBI = MBB.getLastNonDebugInstr();
1742     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1743     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1744   } else if (RetOpcode == PPC::TCRETURNai) {
1745     MBBI = MBB.getLastNonDebugInstr();
1746     MachineOperand &JumpTarget = MBBI->getOperand(0);
1747     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1748   } else if (RetOpcode == PPC::TCRETURNdi8) {
1749     MBBI = MBB.getLastNonDebugInstr();
1750     MachineOperand &JumpTarget = MBBI->getOperand(0);
1751     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1752       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1753   } else if (RetOpcode == PPC::TCRETURNri8) {
1754     MBBI = MBB.getLastNonDebugInstr();
1755     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1756     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1757   } else if (RetOpcode == PPC::TCRETURNai8) {
1758     MBBI = MBB.getLastNonDebugInstr();
1759     MachineOperand &JumpTarget = MBBI->getOperand(0);
1760     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1761   }
1762 }
1763 
1764 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1765                                             BitVector &SavedRegs,
1766                                             RegScavenger *RS) const {
1767   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1768 
1769   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1770 
1771   //  Save and clear the LR state.
1772   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1773   unsigned LR = RegInfo->getRARegister();
1774   FI->setMustSaveLR(MustSaveLR(MF, LR));
1775   SavedRegs.reset(LR);
1776 
1777   //  Save R31 if necessary
1778   int FPSI = FI->getFramePointerSaveIndex();
1779   const bool isPPC64 = Subtarget.isPPC64();
1780   const bool IsDarwinABI  = Subtarget.isDarwinABI();
1781   MachineFrameInfo &MFI = MF.getFrameInfo();
1782 
1783   // If the frame pointer save index hasn't been defined yet.
1784   if (!FPSI && needsFP(MF)) {
1785     // Find out what the fix offset of the frame pointer save area.
1786     int FPOffset = getFramePointerSaveOffset();
1787     // Allocate the frame index for frame pointer save area.
1788     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1789     // Save the result.
1790     FI->setFramePointerSaveIndex(FPSI);
1791   }
1792 
1793   int BPSI = FI->getBasePointerSaveIndex();
1794   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1795     int BPOffset = getBasePointerSaveOffset();
1796     // Allocate the frame index for the base pointer save area.
1797     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1798     // Save the result.
1799     FI->setBasePointerSaveIndex(BPSI);
1800   }
1801 
1802   // Reserve stack space for the PIC Base register (R30).
1803   // Only used in SVR4 32-bit.
1804   if (FI->usesPICBase()) {
1805     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1806     FI->setPICBasePointerSaveIndex(PBPSI);
1807   }
1808 
1809   // Make sure we don't explicitly spill r31, because, for example, we have
1810   // some inline asm which explicitly clobbers it, when we otherwise have a
1811   // frame pointer and are using r31's spill slot for the prologue/epilogue
1812   // code. Same goes for the base pointer and the PIC base register.
1813   if (needsFP(MF))
1814     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1815   if (RegInfo->hasBasePointer(MF))
1816     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1817   if (FI->usesPICBase())
1818     SavedRegs.reset(PPC::R30);
1819 
1820   // Reserve stack space to move the linkage area to in case of a tail call.
1821   int TCSPDelta = 0;
1822   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1823       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1824     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1825   }
1826 
1827   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1828   // function uses CR 2, 3, or 4.
1829   if (!isPPC64 && !IsDarwinABI &&
1830       (SavedRegs.test(PPC::CR2) ||
1831        SavedRegs.test(PPC::CR3) ||
1832        SavedRegs.test(PPC::CR4))) {
1833     int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1834     FI->setCRSpillFrameIndex(FrameIdx);
1835   }
1836 }
1837 
1838 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1839                                                        RegScavenger *RS) const {
1840   // Early exit if not using the SVR4 ABI.
1841   if (!Subtarget.isSVR4ABI()) {
1842     addScavengingSpillSlot(MF, RS);
1843     return;
1844   }
1845 
1846   // Get callee saved register information.
1847   MachineFrameInfo &MFI = MF.getFrameInfo();
1848   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1849 
1850   // If the function is shrink-wrapped, and if the function has a tail call, the
1851   // tail call might not be in the new RestoreBlock, so real branch instruction
1852   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1853   // RestoreBlock. So we handle this case here.
1854   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1855     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1856     for (MachineBasicBlock &MBB : MF) {
1857       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1858         createTailCallBranchInstr(MBB);
1859     }
1860   }
1861 
1862   // Early exit if no callee saved registers are modified!
1863   if (CSI.empty() && !needsFP(MF)) {
1864     addScavengingSpillSlot(MF, RS);
1865     return;
1866   }
1867 
1868   unsigned MinGPR = PPC::R31;
1869   unsigned MinG8R = PPC::X31;
1870   unsigned MinFPR = PPC::F31;
1871   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1872 
1873   bool HasGPSaveArea = false;
1874   bool HasG8SaveArea = false;
1875   bool HasFPSaveArea = false;
1876   bool HasVRSAVESaveArea = false;
1877   bool HasVRSaveArea = false;
1878 
1879   SmallVector<CalleeSavedInfo, 18> GPRegs;
1880   SmallVector<CalleeSavedInfo, 18> G8Regs;
1881   SmallVector<CalleeSavedInfo, 18> FPRegs;
1882   SmallVector<CalleeSavedInfo, 18> VRegs;
1883 
1884   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1885     unsigned Reg = CSI[i].getReg();
1886     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1887             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1888            "Not expecting to try to spill R2 in a function that must save TOC");
1889     if (PPC::GPRCRegClass.contains(Reg)) {
1890       HasGPSaveArea = true;
1891 
1892       GPRegs.push_back(CSI[i]);
1893 
1894       if (Reg < MinGPR) {
1895         MinGPR = Reg;
1896       }
1897     } else if (PPC::G8RCRegClass.contains(Reg)) {
1898       HasG8SaveArea = true;
1899 
1900       G8Regs.push_back(CSI[i]);
1901 
1902       if (Reg < MinG8R) {
1903         MinG8R = Reg;
1904       }
1905     } else if (PPC::F8RCRegClass.contains(Reg)) {
1906       HasFPSaveArea = true;
1907 
1908       FPRegs.push_back(CSI[i]);
1909 
1910       if (Reg < MinFPR) {
1911         MinFPR = Reg;
1912       }
1913     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1914                PPC::CRRCRegClass.contains(Reg)) {
1915       ; // do nothing, as we already know whether CRs are spilled
1916     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1917       HasVRSAVESaveArea = true;
1918     } else if (PPC::VRRCRegClass.contains(Reg) ||
1919                PPC::SPERCRegClass.contains(Reg)) {
1920       // Altivec and SPE are mutually exclusive, but have the same stack
1921       // alignment requirements, so overload the save area for both cases.
1922       HasVRSaveArea = true;
1923 
1924       VRegs.push_back(CSI[i]);
1925 
1926       if (Reg < MinVR) {
1927         MinVR = Reg;
1928       }
1929     } else {
1930       llvm_unreachable("Unknown RegisterClass!");
1931     }
1932   }
1933 
1934   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1935   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1936 
1937   int64_t LowerBound = 0;
1938 
1939   // Take into account stack space reserved for tail calls.
1940   int TCSPDelta = 0;
1941   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1942       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1943     LowerBound = TCSPDelta;
1944   }
1945 
1946   // The Floating-point register save area is right below the back chain word
1947   // of the previous stack frame.
1948   if (HasFPSaveArea) {
1949     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1950       int FI = FPRegs[i].getFrameIdx();
1951 
1952       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1953     }
1954 
1955     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1956   }
1957 
1958   // Check whether the frame pointer register is allocated. If so, make sure it
1959   // is spilled to the correct offset.
1960   if (needsFP(MF)) {
1961     int FI = PFI->getFramePointerSaveIndex();
1962     assert(FI && "No Frame Pointer Save Slot!");
1963     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1964     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1965     HasGPSaveArea = true;
1966   }
1967 
1968   if (PFI->usesPICBase()) {
1969     int FI = PFI->getPICBasePointerSaveIndex();
1970     assert(FI && "No PIC Base Pointer Save Slot!");
1971     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1972 
1973     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1974     HasGPSaveArea = true;
1975   }
1976 
1977   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1978   if (RegInfo->hasBasePointer(MF)) {
1979     int FI = PFI->getBasePointerSaveIndex();
1980     assert(FI && "No Base Pointer Save Slot!");
1981     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1982 
1983     Register BP = RegInfo->getBaseRegister(MF);
1984     if (PPC::G8RCRegClass.contains(BP)) {
1985       MinG8R = std::min<unsigned>(MinG8R, BP);
1986       HasG8SaveArea = true;
1987     } else if (PPC::GPRCRegClass.contains(BP)) {
1988       MinGPR = std::min<unsigned>(MinGPR, BP);
1989       HasGPSaveArea = true;
1990     }
1991   }
1992 
1993   // General register save area starts right below the Floating-point
1994   // register save area.
1995   if (HasGPSaveArea || HasG8SaveArea) {
1996     // Move general register save area spill slots down, taking into account
1997     // the size of the Floating-point register save area.
1998     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1999       if (!GPRegs[i].isSpilledToReg()) {
2000         int FI = GPRegs[i].getFrameIdx();
2001         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2002       }
2003     }
2004 
2005     // Move general register save area spill slots down, taking into account
2006     // the size of the Floating-point register save area.
2007     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2008       if (!G8Regs[i].isSpilledToReg()) {
2009         int FI = G8Regs[i].getFrameIdx();
2010         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2011       }
2012     }
2013 
2014     unsigned MinReg =
2015       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2016                          TRI->getEncodingValue(MinG8R));
2017 
2018     if (Subtarget.isPPC64()) {
2019       LowerBound -= (31 - MinReg + 1) * 8;
2020     } else {
2021       LowerBound -= (31 - MinReg + 1) * 4;
2022     }
2023   }
2024 
2025   // For 32-bit only, the CR save area is below the general register
2026   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2027   // to the stack pointer and hence does not need an adjustment here.
2028   // Only CR2 (the first nonvolatile spilled) has an associated frame
2029   // index so that we have a single uniform save area.
2030   if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
2031     // Adjust the frame index of the CR spill slot.
2032     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2033       unsigned Reg = CSI[i].getReg();
2034 
2035       if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2036           // Leave Darwin logic as-is.
2037           || (!Subtarget.isSVR4ABI() &&
2038               (PPC::CRBITRCRegClass.contains(Reg) ||
2039                PPC::CRRCRegClass.contains(Reg)))) {
2040         int FI = CSI[i].getFrameIdx();
2041 
2042         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2043       }
2044     }
2045 
2046     LowerBound -= 4; // The CR save area is always 4 bytes long.
2047   }
2048 
2049   if (HasVRSAVESaveArea) {
2050     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2051     //             which have the VRSAVE register class?
2052     // Adjust the frame index of the VRSAVE spill slot.
2053     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2054       unsigned Reg = CSI[i].getReg();
2055 
2056       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2057         int FI = CSI[i].getFrameIdx();
2058 
2059         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2060       }
2061     }
2062 
2063     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2064   }
2065 
2066   // Both Altivec and SPE have the same alignment and padding requirements
2067   // within the stack frame.
2068   if (HasVRSaveArea) {
2069     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2070     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2071     // we are using negative number here (the stack grows downward). We should
2072     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2073     // is the alignment size ( n = 16 here) and y is the size after aligning.
2074     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2075     LowerBound &= ~(15);
2076 
2077     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2078       int FI = VRegs[i].getFrameIdx();
2079 
2080       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2081     }
2082   }
2083 
2084   addScavengingSpillSlot(MF, RS);
2085 }
2086 
2087 void
2088 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2089                                          RegScavenger *RS) const {
2090   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2091   // a large stack, which will require scavenging a register to materialize a
2092   // large offset.
2093 
2094   // We need to have a scavenger spill slot for spills if the frame size is
2095   // large. In case there is no free register for large-offset addressing,
2096   // this slot is used for the necessary emergency spill. Also, we need the
2097   // slot for dynamic stack allocations.
2098 
2099   // The scavenger might be invoked if the frame offset does not fit into
2100   // the 16-bit immediate. We don't know the complete frame size here
2101   // because we've not yet computed callee-saved register spills or the
2102   // needed alignment padding.
2103   unsigned StackSize = determineFrameLayout(MF, true);
2104   MachineFrameInfo &MFI = MF.getFrameInfo();
2105   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2106       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2107     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2108     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2109     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2110     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2111     unsigned Size = TRI.getSpillSize(RC);
2112     unsigned Align = TRI.getSpillAlignment(RC);
2113     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2114 
2115     // Might we have over-aligned allocas?
2116     bool HasAlVars = MFI.hasVarSizedObjects() &&
2117                      MFI.getMaxAlignment() > getStackAlignment();
2118 
2119     // These kinds of spills might need two registers.
2120     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2121       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2122 
2123   }
2124 }
2125 
2126 // This function checks if a callee saved gpr can be spilled to a volatile
2127 // vector register. This occurs for leaf functions when the option
2128 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2129 // which were not spilled to vectors, return false so the target independent
2130 // code can handle them by assigning a FrameIdx to a stack slot.
2131 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2132     MachineFunction &MF, const TargetRegisterInfo *TRI,
2133     std::vector<CalleeSavedInfo> &CSI) const {
2134 
2135   if (CSI.empty())
2136     return true; // Early exit if no callee saved registers are modified!
2137 
2138   // Early exit if cannot spill gprs to volatile vector registers.
2139   MachineFrameInfo &MFI = MF.getFrameInfo();
2140   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2141     return false;
2142 
2143   // Build a BitVector of VSRs that can be used for spilling GPRs.
2144   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2145   BitVector BVCalleeSaved(TRI->getNumRegs());
2146   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2147   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2148   for (unsigned i = 0; CSRegs[i]; ++i)
2149     BVCalleeSaved.set(CSRegs[i]);
2150 
2151   for (unsigned Reg : BVAllocatable.set_bits()) {
2152     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2153     // used in the function.
2154     if (BVCalleeSaved[Reg] ||
2155         (!PPC::F8RCRegClass.contains(Reg) &&
2156          !PPC::VFRCRegClass.contains(Reg)) ||
2157         (MF.getRegInfo().isPhysRegUsed(Reg)))
2158       BVAllocatable.reset(Reg);
2159   }
2160 
2161   bool AllSpilledToReg = true;
2162   for (auto &CS : CSI) {
2163     if (BVAllocatable.none())
2164       return false;
2165 
2166     unsigned Reg = CS.getReg();
2167     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2168       AllSpilledToReg = false;
2169       continue;
2170     }
2171 
2172     unsigned VolatileVFReg = BVAllocatable.find_first();
2173     if (VolatileVFReg < BVAllocatable.size()) {
2174       CS.setDstReg(VolatileVFReg);
2175       BVAllocatable.reset(VolatileVFReg);
2176     } else {
2177       AllSpilledToReg = false;
2178     }
2179   }
2180   return AllSpilledToReg;
2181 }
2182 
2183 
2184 bool
2185 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2186                                      MachineBasicBlock::iterator MI,
2187                                      const std::vector<CalleeSavedInfo> &CSI,
2188                                      const TargetRegisterInfo *TRI) const {
2189 
2190   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2191   // Return false otherwise to maintain pre-existing behavior.
2192   if (!Subtarget.isSVR4ABI())
2193     return false;
2194 
2195   MachineFunction *MF = MBB.getParent();
2196   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2197   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2198   bool MustSaveTOC = FI->mustSaveTOC();
2199   DebugLoc DL;
2200   bool CRSpilled = false;
2201   MachineInstrBuilder CRMIB;
2202 
2203   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2204     unsigned Reg = CSI[i].getReg();
2205     // Only Darwin actually uses the VRSAVE register, but it can still appear
2206     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2207     // Darwin, ignore it.
2208     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2209       continue;
2210 
2211     // CR2 through CR4 are the nonvolatile CR fields.
2212     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2213 
2214     // Add the callee-saved register as live-in; it's killed at the spill.
2215     // Do not do this for callee-saved registers that are live-in to the
2216     // function because they will already be marked live-in and this will be
2217     // adding it for a second time. It is an error to add the same register
2218     // to the set more than once.
2219     const MachineRegisterInfo &MRI = MF->getRegInfo();
2220     bool IsLiveIn = MRI.isLiveIn(Reg);
2221     if (!IsLiveIn)
2222        MBB.addLiveIn(Reg);
2223 
2224     if (CRSpilled && IsCRField) {
2225       CRMIB.addReg(Reg, RegState::ImplicitKill);
2226       continue;
2227     }
2228 
2229     // The actual spill will happen in the prologue.
2230     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2231       continue;
2232 
2233     // Insert the spill to the stack frame.
2234     if (IsCRField) {
2235       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2236       if (Subtarget.isPPC64()) {
2237         // The actual spill will happen at the start of the prologue.
2238         FuncInfo->addMustSaveCR(Reg);
2239       } else {
2240         CRSpilled = true;
2241         FuncInfo->setSpillsCR();
2242 
2243         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2244         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2245         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2246                   .addReg(Reg, RegState::ImplicitKill);
2247 
2248         MBB.insert(MI, CRMIB);
2249         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2250                                          .addReg(PPC::R12,
2251                                                  getKillRegState(true)),
2252                                          CSI[i].getFrameIdx()));
2253       }
2254     } else {
2255       if (CSI[i].isSpilledToReg()) {
2256         NumPESpillVSR++;
2257         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2258           .addReg(Reg, getKillRegState(true));
2259       } else {
2260         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2261         // Use !IsLiveIn for the kill flag.
2262         // We do not want to kill registers that are live in this function
2263         // before their use because they will become undefined registers.
2264         TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2265                                 CSI[i].getFrameIdx(), RC, TRI);
2266       }
2267     }
2268   }
2269   return true;
2270 }
2271 
2272 static void
2273 restoreCRs(bool isPPC64, bool is31,
2274            bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2275            MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2276            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2277 
2278   MachineFunction *MF = MBB.getParent();
2279   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2280   DebugLoc DL;
2281   unsigned RestoreOp, MoveReg;
2282 
2283   if (isPPC64)
2284     // This is handled during epilogue generation.
2285     return;
2286   else {
2287     // 32-bit:  FP-relative
2288     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2289                                              PPC::R12),
2290                                      CSI[CSIIndex].getFrameIdx()));
2291     RestoreOp = PPC::MTOCRF;
2292     MoveReg = PPC::R12;
2293   }
2294 
2295   if (CR2Spilled)
2296     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2297                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2298 
2299   if (CR3Spilled)
2300     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2301                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2302 
2303   if (CR4Spilled)
2304     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2305                .addReg(MoveReg, getKillRegState(true)));
2306 }
2307 
2308 MachineBasicBlock::iterator PPCFrameLowering::
2309 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2310                               MachineBasicBlock::iterator I) const {
2311   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2312   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2313       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2314     // Add (actually subtract) back the amount the callee popped on return.
2315     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2316       bool is64Bit = Subtarget.isPPC64();
2317       CalleeAmt *= -1;
2318       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2319       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2320       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2321       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2322       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2323       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2324       const DebugLoc &dl = I->getDebugLoc();
2325 
2326       if (isInt<16>(CalleeAmt)) {
2327         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2328           .addReg(StackReg, RegState::Kill)
2329           .addImm(CalleeAmt);
2330       } else {
2331         MachineBasicBlock::iterator MBBI = I;
2332         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2333           .addImm(CalleeAmt >> 16);
2334         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2335           .addReg(TmpReg, RegState::Kill)
2336           .addImm(CalleeAmt & 0xFFFF);
2337         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2338           .addReg(StackReg, RegState::Kill)
2339           .addReg(TmpReg);
2340       }
2341     }
2342   }
2343   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2344   return MBB.erase(I);
2345 }
2346 
2347 bool
2348 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2349                                         MachineBasicBlock::iterator MI,
2350                                         std::vector<CalleeSavedInfo> &CSI,
2351                                         const TargetRegisterInfo *TRI) const {
2352 
2353   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2354   // Return false otherwise to maintain pre-existing behavior.
2355   if (!Subtarget.isSVR4ABI())
2356     return false;
2357 
2358   MachineFunction *MF = MBB.getParent();
2359   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2360   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2361   bool MustSaveTOC = FI->mustSaveTOC();
2362   bool CR2Spilled = false;
2363   bool CR3Spilled = false;
2364   bool CR4Spilled = false;
2365   unsigned CSIIndex = 0;
2366 
2367   // Initialize insertion-point logic; we will be restoring in reverse
2368   // order of spill.
2369   MachineBasicBlock::iterator I = MI, BeforeI = I;
2370   bool AtStart = I == MBB.begin();
2371 
2372   if (!AtStart)
2373     --BeforeI;
2374 
2375   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2376     unsigned Reg = CSI[i].getReg();
2377 
2378     // Only Darwin actually uses the VRSAVE register, but it can still appear
2379     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2380     // Darwin, ignore it.
2381     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2382       continue;
2383 
2384     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2385       continue;
2386 
2387     if (Reg == PPC::CR2) {
2388       CR2Spilled = true;
2389       // The spill slot is associated only with CR2, which is the
2390       // first nonvolatile spilled.  Save it here.
2391       CSIIndex = i;
2392       continue;
2393     } else if (Reg == PPC::CR3) {
2394       CR3Spilled = true;
2395       continue;
2396     } else if (Reg == PPC::CR4) {
2397       CR4Spilled = true;
2398       continue;
2399     } else {
2400       // When we first encounter a non-CR register after seeing at
2401       // least one CR register, restore all spilled CRs together.
2402       if ((CR2Spilled || CR3Spilled || CR4Spilled)
2403           && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2404         bool is31 = needsFP(*MF);
2405         restoreCRs(Subtarget.isPPC64(), is31,
2406                    CR2Spilled, CR3Spilled, CR4Spilled,
2407                    MBB, I, CSI, CSIIndex);
2408         CR2Spilled = CR3Spilled = CR4Spilled = false;
2409       }
2410 
2411       if (CSI[i].isSpilledToReg()) {
2412         DebugLoc DL;
2413         NumPEReloadVSR++;
2414         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2415             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2416       } else {
2417        // Default behavior for non-CR saves.
2418         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2419         TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2420         assert(I != MBB.begin() &&
2421                "loadRegFromStackSlot didn't insert any code!");
2422       }
2423     }
2424 
2425     // Insert in reverse order.
2426     if (AtStart)
2427       I = MBB.begin();
2428     else {
2429       I = BeforeI;
2430       ++I;
2431     }
2432   }
2433 
2434   // If we haven't yet spilled the CRs, do so now.
2435   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2436     bool is31 = needsFP(*MF);
2437     restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2438                MBB, I, CSI, CSIIndex);
2439   }
2440 
2441   return true;
2442 }
2443 
2444 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2445   if (Subtarget.isAIXABI())
2446     // TOC save/restore is normally handled by the linker.
2447     // Indirect calls should hit this limitation.
2448     report_fatal_error("TOC save is not implemented on AIX yet.");
2449   return TOCSaveOffset;
2450 }
2451 
2452 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2453   if (Subtarget.isAIXABI())
2454     report_fatal_error("FramePointer is not implemented on AIX yet.");
2455   return FramePointerSaveOffset;
2456 }
2457 
2458 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2459   if (Subtarget.isAIXABI())
2460     report_fatal_error("BasePointer is not implemented on AIX yet.");
2461   return BasePointerSaveOffset;
2462 }
2463 
2464 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2465   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2466     return false;
2467   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2468           MF.getSubtarget<PPCSubtarget>().isPPC64());
2469 }
2470