xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 484a36b97ddfa3be2daec0241ac08bddefbc8a51)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // First slot in the general register save area.
64   return STI.isPPC64() ? -8U : -4U;
65 }
66 
67 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
68   if (STI.isAIXABI() || STI.isPPC64())
69     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
70 
71   // 32-bit SVR4 ABI:
72   return 8;
73 }
74 
75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
76   // Third slot in the general purpose register save area.
77   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
78     return -12U;
79 
80   // Second slot in the general purpose register save area.
81   return STI.isPPC64() ? -16U : -8U;
82 }
83 
84 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
85   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
86 }
87 
88 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
89     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
90                           STI.getPlatformStackAlignment(), 0),
91       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
92       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
93       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
94       LinkageSize(computeLinkageSize(Subtarget)),
95       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
96       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
97 
98 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
99 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
100     unsigned &NumEntries) const {
101 
102 // Floating-point register save area offsets.
103 #define CALLEE_SAVED_FPRS \
104       {PPC::F31, -8},     \
105       {PPC::F30, -16},    \
106       {PPC::F29, -24},    \
107       {PPC::F28, -32},    \
108       {PPC::F27, -40},    \
109       {PPC::F26, -48},    \
110       {PPC::F25, -56},    \
111       {PPC::F24, -64},    \
112       {PPC::F23, -72},    \
113       {PPC::F22, -80},    \
114       {PPC::F21, -88},    \
115       {PPC::F20, -96},    \
116       {PPC::F19, -104},   \
117       {PPC::F18, -112},   \
118       {PPC::F17, -120},   \
119       {PPC::F16, -128},   \
120       {PPC::F15, -136},   \
121       {PPC::F14, -144}
122 
123 // 32-bit general purpose register save area offsets shared by ELF and
124 // AIX. AIX has an extra CSR with r13.
125 #define CALLEE_SAVED_GPRS32 \
126       {PPC::R31, -4},       \
127       {PPC::R30, -8},       \
128       {PPC::R29, -12},      \
129       {PPC::R28, -16},      \
130       {PPC::R27, -20},      \
131       {PPC::R26, -24},      \
132       {PPC::R25, -28},      \
133       {PPC::R24, -32},      \
134       {PPC::R23, -36},      \
135       {PPC::R22, -40},      \
136       {PPC::R21, -44},      \
137       {PPC::R20, -48},      \
138       {PPC::R19, -52},      \
139       {PPC::R18, -56},      \
140       {PPC::R17, -60},      \
141       {PPC::R16, -64},      \
142       {PPC::R15, -68},      \
143       {PPC::R14, -72}
144 
145 // 64-bit general purpose register save area offsets.
146 #define CALLEE_SAVED_GPRS64 \
147       {PPC::X31, -8},       \
148       {PPC::X30, -16},      \
149       {PPC::X29, -24},      \
150       {PPC::X28, -32},      \
151       {PPC::X27, -40},      \
152       {PPC::X26, -48},      \
153       {PPC::X25, -56},      \
154       {PPC::X24, -64},      \
155       {PPC::X23, -72},      \
156       {PPC::X22, -80},      \
157       {PPC::X21, -88},      \
158       {PPC::X20, -96},      \
159       {PPC::X19, -104},     \
160       {PPC::X18, -112},     \
161       {PPC::X17, -120},     \
162       {PPC::X16, -128},     \
163       {PPC::X15, -136},     \
164       {PPC::X14, -144}
165 
166 // Vector register save area offsets.
167 #define CALLEE_SAVED_VRS \
168       {PPC::V31, -16},   \
169       {PPC::V30, -32},   \
170       {PPC::V29, -48},   \
171       {PPC::V28, -64},   \
172       {PPC::V27, -80},   \
173       {PPC::V26, -96},   \
174       {PPC::V25, -112},  \
175       {PPC::V24, -128},  \
176       {PPC::V23, -144},  \
177       {PPC::V22, -160},  \
178       {PPC::V21, -176},  \
179       {PPC::V20, -192}
180 
181   // Note that the offsets here overlap, but this is fixed up in
182   // processFunctionBeforeFrameFinalized.
183 
184   static const SpillSlot ELFOffsets32[] = {
185       CALLEE_SAVED_FPRS,
186       CALLEE_SAVED_GPRS32,
187 
188       // CR save area offset.  We map each of the nonvolatile CR fields
189       // to the slot for CR2, which is the first of the nonvolatile CR
190       // fields to be assigned, so that we only allocate one save slot.
191       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
192       {PPC::CR2, -4},
193 
194       // VRSAVE save area offset.
195       {PPC::VRSAVE, -4},
196 
197       CALLEE_SAVED_VRS,
198 
199       // SPE register save area (overlaps Vector save area).
200       {PPC::S31, -8},
201       {PPC::S30, -16},
202       {PPC::S29, -24},
203       {PPC::S28, -32},
204       {PPC::S27, -40},
205       {PPC::S26, -48},
206       {PPC::S25, -56},
207       {PPC::S24, -64},
208       {PPC::S23, -72},
209       {PPC::S22, -80},
210       {PPC::S21, -88},
211       {PPC::S20, -96},
212       {PPC::S19, -104},
213       {PPC::S18, -112},
214       {PPC::S17, -120},
215       {PPC::S16, -128},
216       {PPC::S15, -136},
217       {PPC::S14, -144}};
218 
219   static const SpillSlot ELFOffsets64[] = {
220       CALLEE_SAVED_FPRS,
221       CALLEE_SAVED_GPRS64,
222 
223       // VRSAVE save area offset.
224       {PPC::VRSAVE, -4},
225       CALLEE_SAVED_VRS
226   };
227 
228   static const SpillSlot AIXOffsets32[] = {
229       CALLEE_SAVED_FPRS,
230       CALLEE_SAVED_GPRS32,
231       // Add AIX's extra CSR.
232       {PPC::R13, -76},
233       // TODO: Update when we add vector support for AIX.
234   };
235 
236   static const SpillSlot AIXOffsets64[] = {
237       CALLEE_SAVED_FPRS,
238       CALLEE_SAVED_GPRS64,
239       // TODO: Update when we add vector support for AIX.
240   };
241 
242   if (Subtarget.is64BitELFABI()) {
243     NumEntries = array_lengthof(ELFOffsets64);
244     return ELFOffsets64;
245   }
246 
247   if (Subtarget.is32BitELFABI()) {
248     NumEntries = array_lengthof(ELFOffsets32);
249     return ELFOffsets32;
250   }
251 
252   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
253 
254   if (Subtarget.isPPC64()) {
255     NumEntries = array_lengthof(AIXOffsets64);
256     return AIXOffsets64;
257   }
258 
259   NumEntries = array_lengthof(AIXOffsets32);
260   return AIXOffsets32;
261 }
262 
263 /// RemoveVRSaveCode - We have found that this function does not need any code
264 /// to manipulate the VRSAVE register, even though it uses vector registers.
265 /// This can happen when the only registers used are known to be live in or out
266 /// of the function.  Remove all of the VRSAVE related code from the function.
267 /// FIXME: The removal of the code results in a compile failure at -O0 when the
268 /// function contains a function call, as the GPR containing original VRSAVE
269 /// contents is spilled and reloaded around the call.  Without the prolog code,
270 /// the spill instruction refers to an undefined register.  This code needs
271 /// to account for all uses of that GPR.
272 static void RemoveVRSaveCode(MachineInstr &MI) {
273   MachineBasicBlock *Entry = MI.getParent();
274   MachineFunction *MF = Entry->getParent();
275 
276   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
277   MachineBasicBlock::iterator MBBI = MI;
278   ++MBBI;
279   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
280   MBBI->eraseFromParent();
281 
282   bool RemovedAllMTVRSAVEs = true;
283   // See if we can find and remove the MTVRSAVE instruction from all of the
284   // epilog blocks.
285   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
286     // If last instruction is a return instruction, add an epilogue
287     if (I->isReturnBlock()) {
288       bool FoundIt = false;
289       for (MBBI = I->end(); MBBI != I->begin(); ) {
290         --MBBI;
291         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
292           MBBI->eraseFromParent();  // remove it.
293           FoundIt = true;
294           break;
295         }
296       }
297       RemovedAllMTVRSAVEs &= FoundIt;
298     }
299   }
300 
301   // If we found and removed all MTVRSAVE instructions, remove the read of
302   // VRSAVE as well.
303   if (RemovedAllMTVRSAVEs) {
304     MBBI = MI;
305     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
306     --MBBI;
307     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
308     MBBI->eraseFromParent();
309   }
310 
311   // Finally, nuke the UPDATE_VRSAVE.
312   MI.eraseFromParent();
313 }
314 
315 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
316 // instruction selector.  Based on the vector registers that have been used,
317 // transform this into the appropriate ORI instruction.
318 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
319   MachineFunction *MF = MI.getParent()->getParent();
320   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
321   DebugLoc dl = MI.getDebugLoc();
322 
323   const MachineRegisterInfo &MRI = MF->getRegInfo();
324   unsigned UsedRegMask = 0;
325   for (unsigned i = 0; i != 32; ++i)
326     if (MRI.isPhysRegModified(VRRegNo[i]))
327       UsedRegMask |= 1 << (31-i);
328 
329   // Live in and live out values already must be in the mask, so don't bother
330   // marking them.
331   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
332     unsigned RegNo = TRI->getEncodingValue(LI.first);
333     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
334       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
335   }
336 
337   // Live out registers appear as use operands on return instructions.
338   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
339        UsedRegMask != 0 && BI != BE; ++BI) {
340     const MachineBasicBlock &MBB = *BI;
341     if (!MBB.isReturnBlock())
342       continue;
343     const MachineInstr &Ret = MBB.back();
344     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
345       const MachineOperand &MO = Ret.getOperand(I);
346       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
347         continue;
348       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
349       UsedRegMask &= ~(1 << (31-RegNo));
350     }
351   }
352 
353   // If no registers are used, turn this into a copy.
354   if (UsedRegMask == 0) {
355     // Remove all VRSAVE code.
356     RemoveVRSaveCode(MI);
357     return;
358   }
359 
360   Register SrcReg = MI.getOperand(1).getReg();
361   Register DstReg = MI.getOperand(0).getReg();
362 
363   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
364     if (DstReg != SrcReg)
365       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
366           .addReg(SrcReg)
367           .addImm(UsedRegMask);
368     else
369       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
370           .addReg(SrcReg, RegState::Kill)
371           .addImm(UsedRegMask);
372   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
373     if (DstReg != SrcReg)
374       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
375           .addReg(SrcReg)
376           .addImm(UsedRegMask >> 16);
377     else
378       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
379           .addReg(SrcReg, RegState::Kill)
380           .addImm(UsedRegMask >> 16);
381   } else {
382     if (DstReg != SrcReg)
383       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
384           .addReg(SrcReg)
385           .addImm(UsedRegMask >> 16);
386     else
387       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
388           .addReg(SrcReg, RegState::Kill)
389           .addImm(UsedRegMask >> 16);
390 
391     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
392         .addReg(DstReg, RegState::Kill)
393         .addImm(UsedRegMask & 0xFFFF);
394   }
395 
396   // Remove the old UPDATE_VRSAVE instruction.
397   MI.eraseFromParent();
398 }
399 
400 static bool spillsCR(const MachineFunction &MF) {
401   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
402   return FuncInfo->isCRSpilled();
403 }
404 
405 static bool spillsVRSAVE(const MachineFunction &MF) {
406   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
407   return FuncInfo->isVRSAVESpilled();
408 }
409 
410 static bool hasSpills(const MachineFunction &MF) {
411   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
412   return FuncInfo->hasSpills();
413 }
414 
415 static bool hasNonRISpills(const MachineFunction &MF) {
416   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
417   return FuncInfo->hasNonRISpills();
418 }
419 
420 /// MustSaveLR - Return true if this function requires that we save the LR
421 /// register onto the stack in the prolog and restore it in the epilog of the
422 /// function.
423 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
424   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
425 
426   // We need a save/restore of LR if there is any def of LR (which is
427   // defined by calls, including the PIC setup sequence), or if there is
428   // some use of the LR stack slot (e.g. for builtin_return_address).
429   // (LR comes in 32 and 64 bit versions.)
430   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
431   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
432 }
433 
434 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
435 /// call frame size. Update the MachineFunction object with the stack size.
436 unsigned
437 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
438                                                 bool UseEstimate) const {
439   unsigned NewMaxCallFrameSize = 0;
440   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
441                                             &NewMaxCallFrameSize);
442   MF.getFrameInfo().setStackSize(FrameSize);
443   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
444   return FrameSize;
445 }
446 
447 /// determineFrameLayout - Determine the size of the frame and maximum call
448 /// frame size.
449 unsigned
450 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
451                                        bool UseEstimate,
452                                        unsigned *NewMaxCallFrameSize) const {
453   const MachineFrameInfo &MFI = MF.getFrameInfo();
454   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
455 
456   // Get the number of bytes to allocate from the FrameInfo
457   unsigned FrameSize =
458     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
459 
460   // Get stack alignments. The frame must be aligned to the greatest of these:
461   Align TargetAlign = getStackAlign(); // alignment required per the ABI
462   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
463   Align Alignment = std::max(TargetAlign, MaxAlign);
464 
465   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
466 
467   unsigned LR = RegInfo->getRARegister();
468   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
469   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
470                        !MFI.adjustsStack() &&       // No calls.
471                        !MustSaveLR(MF, LR) &&       // No need to save LR.
472                        !FI->mustSaveTOC() &&        // No need to save TOC.
473                        !RegInfo->hasBasePointer(MF); // No special alignment.
474 
475   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
476   // code if all local vars are reg-allocated.
477   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
478 
479   // Check whether we can skip adjusting the stack pointer (by using red zone)
480   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
481     // No need for frame
482     return 0;
483   }
484 
485   // Get the maximum call frame size of all the calls.
486   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
487 
488   // Maximum call frame needs to be at least big enough for linkage area.
489   unsigned minCallFrameSize = getLinkageSize();
490   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
491 
492   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
493   // that allocations will be aligned.
494   if (MFI.hasVarSizedObjects())
495     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
496 
497   // Update the new max call frame size if the caller passes in a valid pointer.
498   if (NewMaxCallFrameSize)
499     *NewMaxCallFrameSize = maxCallFrameSize;
500 
501   // Include call frame size in total.
502   FrameSize += maxCallFrameSize;
503 
504   // Make sure the frame is aligned.
505   FrameSize = alignTo(FrameSize, Alignment);
506 
507   return FrameSize;
508 }
509 
510 // hasFP - Return true if the specified function actually has a dedicated frame
511 // pointer register.
512 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
513   const MachineFrameInfo &MFI = MF.getFrameInfo();
514   // FIXME: This is pretty much broken by design: hasFP() might be called really
515   // early, before the stack layout was calculated and thus hasFP() might return
516   // true or false here depending on the time of call.
517   return (MFI.getStackSize()) && needsFP(MF);
518 }
519 
520 // needsFP - Return true if the specified function should have a dedicated frame
521 // pointer register.  This is true if the function has variable sized allocas or
522 // if frame pointer elimination is disabled.
523 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
524   const MachineFrameInfo &MFI = MF.getFrameInfo();
525 
526   // Naked functions have no stack frame pushed, so we don't have a frame
527   // pointer.
528   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
529     return false;
530 
531   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
532     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
533     (MF.getTarget().Options.GuaranteedTailCallOpt &&
534      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
535 }
536 
537 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
538   bool is31 = needsFP(MF);
539   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
540   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
541 
542   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
543   bool HasBP = RegInfo->hasBasePointer(MF);
544   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
545   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
546 
547   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
548        BI != BE; ++BI)
549     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
550       --MBBI;
551       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
552         MachineOperand &MO = MBBI->getOperand(I);
553         if (!MO.isReg())
554           continue;
555 
556         switch (MO.getReg()) {
557         case PPC::FP:
558           MO.setReg(FPReg);
559           break;
560         case PPC::FP8:
561           MO.setReg(FP8Reg);
562           break;
563         case PPC::BP:
564           MO.setReg(BPReg);
565           break;
566         case PPC::BP8:
567           MO.setReg(BP8Reg);
568           break;
569 
570         }
571       }
572     }
573 }
574 
575 /*  This function will do the following:
576     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
577       respectively (defaults recommended by the ABI) and return true
578     - If MBB is not an entry block, initialize the register scavenger and look
579       for available registers.
580     - If the defaults (R0/R12) are available, return true
581     - If TwoUniqueRegsRequired is set to true, it looks for two unique
582       registers. Otherwise, look for a single available register.
583       - If the required registers are found, set SR1 and SR2 and return true.
584       - If the required registers are not found, set SR2 or both SR1 and SR2 to
585         PPC::NoRegister and return false.
586 
587     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
588     is not set, this function will attempt to find two different registers, but
589     still return true if only one register is available (and set SR1 == SR2).
590 */
591 bool
592 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
593                                       bool UseAtEnd,
594                                       bool TwoUniqueRegsRequired,
595                                       Register *SR1,
596                                       Register *SR2) const {
597   RegScavenger RS;
598   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
599   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
600 
601   // Set the defaults for the two scratch registers.
602   if (SR1)
603     *SR1 = R0;
604 
605   if (SR2) {
606     assert (SR1 && "Asking for the second scratch register but not the first?");
607     *SR2 = R12;
608   }
609 
610   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
611   if ((UseAtEnd && MBB->isReturnBlock()) ||
612       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
613     return true;
614 
615   RS.enterBasicBlock(*MBB);
616 
617   if (UseAtEnd && !MBB->empty()) {
618     // The scratch register will be used at the end of the block, so must
619     // consider all registers used within the block
620 
621     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
622     // If no terminator, back iterator up to previous instruction.
623     if (MBBI == MBB->end())
624       MBBI = std::prev(MBBI);
625 
626     if (MBBI != MBB->begin())
627       RS.forward(MBBI);
628   }
629 
630   // If the two registers are available, we're all good.
631   // Note that we only return here if both R0 and R12 are available because
632   // although the function may not require two unique registers, it may benefit
633   // from having two so we should try to provide them.
634   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
635     return true;
636 
637   // Get the list of callee-saved registers for the target.
638   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
639   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
640 
641   // Get all the available registers in the block.
642   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
643                                      &PPC::GPRCRegClass);
644 
645   // We shouldn't use callee-saved registers as scratch registers as they may be
646   // available when looking for a candidate block for shrink wrapping but not
647   // available when the actual prologue/epilogue is being emitted because they
648   // were added as live-in to the prologue block by PrologueEpilogueInserter.
649   for (int i = 0; CSRegs[i]; ++i)
650     BV.reset(CSRegs[i]);
651 
652   // Set the first scratch register to the first available one.
653   if (SR1) {
654     int FirstScratchReg = BV.find_first();
655     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
656   }
657 
658   // If there is another one available, set the second scratch register to that.
659   // Otherwise, set it to either PPC::NoRegister if this function requires two
660   // or to whatever SR1 is set to if this function doesn't require two.
661   if (SR2) {
662     int SecondScratchReg = BV.find_next(*SR1);
663     if (SecondScratchReg != -1)
664       *SR2 = SecondScratchReg;
665     else
666       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
667   }
668 
669   // Now that we've done our best to provide both registers, double check
670   // whether we were unable to provide enough.
671   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
672     return false;
673 
674   return true;
675 }
676 
677 // We need a scratch register for spilling LR and for spilling CR. By default,
678 // we use two scratch registers to hide latency. However, if only one scratch
679 // register is available, we can adjust for that by not overlapping the spill
680 // code. However, if we need to realign the stack (i.e. have a base pointer)
681 // and the stack frame is large, we need two scratch registers.
682 bool
683 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
684   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
685   MachineFunction &MF = *(MBB->getParent());
686   bool HasBP = RegInfo->hasBasePointer(MF);
687   unsigned FrameSize = determineFrameLayout(MF);
688   int NegFrameSize = -FrameSize;
689   bool IsLargeFrame = !isInt<16>(NegFrameSize);
690   MachineFrameInfo &MFI = MF.getFrameInfo();
691   Align MaxAlign = MFI.getMaxAlign();
692   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
693 
694   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
695 }
696 
697 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
698   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
699 
700   return findScratchRegister(TmpMBB, false,
701                              twoUniqueScratchRegsRequired(TmpMBB));
702 }
703 
704 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
705   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
706 
707   return findScratchRegister(TmpMBB, true);
708 }
709 
710 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
711   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
712   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
713 
714   // Abort if there is no register info or function info.
715   if (!RegInfo || !FI)
716     return false;
717 
718   // Only move the stack update on ELFv2 ABI and PPC64.
719   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
720     return false;
721 
722   // Check the frame size first and return false if it does not fit the
723   // requirements.
724   // We need a non-zero frame size as well as a frame that will fit in the red
725   // zone. This is because by moving the stack pointer update we are now storing
726   // to the red zone until the stack pointer is updated. If we get an interrupt
727   // inside the prologue but before the stack update we now have a number of
728   // stores to the red zone and those stores must all fit.
729   MachineFrameInfo &MFI = MF.getFrameInfo();
730   unsigned FrameSize = MFI.getStackSize();
731   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
732     return false;
733 
734   // Frame pointers and base pointers complicate matters so don't do anything
735   // if we have them. For example having a frame pointer will sometimes require
736   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
737   // difficult.
738   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
739     return false;
740 
741   // Calls to fast_cc functions use different rules for passing parameters on
742   // the stack from the ABI and using PIC base in the function imposes
743   // similar restrictions to using the base pointer. It is not generally safe
744   // to move the stack pointer update in these situations.
745   if (FI->hasFastCall() || FI->usesPICBase())
746     return false;
747 
748   // Finally we can move the stack update if we do not require register
749   // scavenging. Register scavenging can introduce more spills and so
750   // may make the frame size larger than we have computed.
751   return !RegInfo->requiresFrameIndexScavenging(MF);
752 }
753 
754 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
755                                     MachineBasicBlock &MBB) const {
756   MachineBasicBlock::iterator MBBI = MBB.begin();
757   MachineFrameInfo &MFI = MF.getFrameInfo();
758   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
759   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
760 
761   MachineModuleInfo &MMI = MF.getMMI();
762   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
763   DebugLoc dl;
764   // AIX assembler does not support cfi directives.
765   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
766 
767   // Get processor type.
768   bool isPPC64 = Subtarget.isPPC64();
769   // Get the ABI.
770   bool isSVR4ABI = Subtarget.isSVR4ABI();
771   bool isAIXABI = Subtarget.isAIXABI();
772   bool isELFv2ABI = Subtarget.isELFv2ABI();
773   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
774 
775   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
776   // process it.
777   if (!isSVR4ABI)
778     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
779       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
780         if (isAIXABI)
781           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
782         HandleVRSaveUpdate(*MBBI, TII);
783         break;
784       }
785     }
786 
787   // Move MBBI back to the beginning of the prologue block.
788   MBBI = MBB.begin();
789 
790   // Work out frame sizes.
791   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
792   int NegFrameSize = -FrameSize;
793   if (!isInt<32>(NegFrameSize))
794     llvm_unreachable("Unhandled stack size!");
795 
796   if (MFI.isFrameAddressTaken())
797     replaceFPWithRealFP(MF);
798 
799   // Check if the link register (LR) must be saved.
800   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
801   bool MustSaveLR = FI->mustSaveLR();
802   bool MustSaveTOC = FI->mustSaveTOC();
803   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
804   bool MustSaveCR = !MustSaveCRs.empty();
805   // Do we have a frame pointer and/or base pointer for this function?
806   bool HasFP = hasFP(MF);
807   bool HasBP = RegInfo->hasBasePointer(MF);
808   bool HasRedZone = isPPC64 || !isSVR4ABI;
809 
810   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
811   Register BPReg = RegInfo->getBaseRegister(MF);
812   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
813   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
814   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
815   Register ScratchReg;
816   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
817   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
818   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
819                                                 : PPC::MFLR );
820   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
821                                                  : PPC::STW );
822   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
823                                                      : PPC::STWU );
824   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
825                                                         : PPC::STWUX);
826   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
827                                                           : PPC::LIS );
828   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
829                                                  : PPC::ORI );
830   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
831                                               : PPC::OR );
832   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
833                                                             : PPC::SUBFC);
834   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
835                                                                : PPC::SUBFIC);
836   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
837                                                            : PPC::MFCR);
838   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
839 
840   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
841   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
842   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
843   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
844   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
845          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
846 
847   // Using the same bool variable as below to suppress compiler warnings.
848   bool SingleScratchReg =
849     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
850                         &ScratchReg, &TempReg);
851   assert(SingleScratchReg &&
852          "Required number of registers not available in this block");
853 
854   SingleScratchReg = ScratchReg == TempReg;
855 
856   int LROffset = getReturnSaveOffset();
857 
858   int FPOffset = 0;
859   if (HasFP) {
860     if (isSVR4ABI) {
861       MachineFrameInfo &MFI = MF.getFrameInfo();
862       int FPIndex = FI->getFramePointerSaveIndex();
863       assert(FPIndex && "No Frame Pointer Save Slot!");
864       FPOffset = MFI.getObjectOffset(FPIndex);
865     } else {
866       FPOffset = getFramePointerSaveOffset();
867     }
868   }
869 
870   int BPOffset = 0;
871   if (HasBP) {
872     if (isSVR4ABI) {
873       MachineFrameInfo &MFI = MF.getFrameInfo();
874       int BPIndex = FI->getBasePointerSaveIndex();
875       assert(BPIndex && "No Base Pointer Save Slot!");
876       BPOffset = MFI.getObjectOffset(BPIndex);
877     } else {
878       BPOffset = getBasePointerSaveOffset();
879     }
880   }
881 
882   int PBPOffset = 0;
883   if (FI->usesPICBase()) {
884     MachineFrameInfo &MFI = MF.getFrameInfo();
885     int PBPIndex = FI->getPICBasePointerSaveIndex();
886     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
887     PBPOffset = MFI.getObjectOffset(PBPIndex);
888   }
889 
890   // Get stack alignments.
891   Align MaxAlign = MFI.getMaxAlign();
892   if (HasBP && MaxAlign > 1)
893     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
894 
895   // Frames of 32KB & larger require special handling because they cannot be
896   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
897   bool isLargeFrame = !isInt<16>(NegFrameSize);
898 
899   // Check if we can move the stack update instruction (stdu) down the prologue
900   // past the callee saves. Hopefully this will avoid the situation where the
901   // saves are waiting for the update on the store with update to complete.
902   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
903   bool MovingStackUpdateDown = false;
904 
905   // Check if we can move the stack update.
906   if (stackUpdateCanBeMoved(MF)) {
907     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
908     for (CalleeSavedInfo CSI : Info) {
909       int FrIdx = CSI.getFrameIdx();
910       // If the frame index is not negative the callee saved info belongs to a
911       // stack object that is not a fixed stack object. We ignore non-fixed
912       // stack objects because we won't move the stack update pointer past them.
913       if (FrIdx >= 0)
914         continue;
915 
916       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
917         StackUpdateLoc++;
918         MovingStackUpdateDown = true;
919       } else {
920         // We need all of the Frame Indices to meet these conditions.
921         // If they do not, abort the whole operation.
922         StackUpdateLoc = MBBI;
923         MovingStackUpdateDown = false;
924         break;
925       }
926     }
927 
928     // If the operation was not aborted then update the object offset.
929     if (MovingStackUpdateDown) {
930       for (CalleeSavedInfo CSI : Info) {
931         int FrIdx = CSI.getFrameIdx();
932         if (FrIdx < 0)
933           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
934       }
935     }
936   }
937 
938   // Where in the prologue we move the CR fields depends on how many scratch
939   // registers we have, and if we need to save the link register or not. This
940   // lambda is to avoid duplicating the logic in 2 places.
941   auto BuildMoveFromCR = [&]() {
942     if (isELFv2ABI && MustSaveCRs.size() == 1) {
943     // In the ELFv2 ABI, we are not required to save all CR fields.
944     // If only one CR field is clobbered, it is more efficient to use
945     // mfocrf to selectively save just that field, because mfocrf has short
946     // latency compares to mfcr.
947       assert(isPPC64 && "V2 ABI is 64-bit only.");
948       MachineInstrBuilder MIB =
949           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
950       MIB.addReg(MustSaveCRs[0], RegState::Kill);
951     } else {
952       MachineInstrBuilder MIB =
953           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
954       for (unsigned CRfield : MustSaveCRs)
955         MIB.addReg(CRfield, RegState::ImplicitKill);
956     }
957   };
958 
959   // If we need to spill the CR and the LR but we don't have two separate
960   // registers available, we must spill them one at a time
961   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
962     BuildMoveFromCR();
963     BuildMI(MBB, MBBI, dl, StoreWordInst)
964         .addReg(TempReg, getKillRegState(true))
965         .addImm(CRSaveOffset)
966         .addReg(SPReg);
967   }
968 
969   if (MustSaveLR)
970     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
971 
972   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
973     BuildMoveFromCR();
974 
975   if (HasRedZone) {
976     if (HasFP)
977       BuildMI(MBB, MBBI, dl, StoreInst)
978         .addReg(FPReg)
979         .addImm(FPOffset)
980         .addReg(SPReg);
981     if (FI->usesPICBase())
982       BuildMI(MBB, MBBI, dl, StoreInst)
983         .addReg(PPC::R30)
984         .addImm(PBPOffset)
985         .addReg(SPReg);
986     if (HasBP)
987       BuildMI(MBB, MBBI, dl, StoreInst)
988         .addReg(BPReg)
989         .addImm(BPOffset)
990         .addReg(SPReg);
991   }
992 
993   if (MustSaveLR)
994     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
995       .addReg(ScratchReg, getKillRegState(true))
996       .addImm(LROffset)
997       .addReg(SPReg);
998 
999   if (MustSaveCR &&
1000       !(SingleScratchReg && MustSaveLR)) {
1001     assert(HasRedZone && "A red zone is always available on PPC64");
1002     BuildMI(MBB, MBBI, dl, StoreWordInst)
1003       .addReg(TempReg, getKillRegState(true))
1004       .addImm(CRSaveOffset)
1005       .addReg(SPReg);
1006   }
1007 
1008   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1009   if (!FrameSize)
1010     return;
1011 
1012   // Adjust stack pointer: r1 += NegFrameSize.
1013   // If there is a preferred stack alignment, align R1 now
1014 
1015   if (HasBP && HasRedZone) {
1016     // Save a copy of r1 as the base pointer.
1017     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1018       .addReg(SPReg)
1019       .addReg(SPReg);
1020   }
1021 
1022   // Have we generated a STUX instruction to claim stack frame? If so,
1023   // the negated frame size will be placed in ScratchReg.
1024   bool HasSTUX = false;
1025 
1026   // This condition must be kept in sync with canUseAsPrologue.
1027   if (HasBP && MaxAlign > 1) {
1028     if (isPPC64)
1029       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1030           .addReg(SPReg)
1031           .addImm(0)
1032           .addImm(64 - Log2(MaxAlign));
1033     else // PPC32...
1034       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1035           .addReg(SPReg)
1036           .addImm(0)
1037           .addImm(32 - Log2(MaxAlign))
1038           .addImm(31);
1039     if (!isLargeFrame) {
1040       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1041         .addReg(ScratchReg, RegState::Kill)
1042         .addImm(NegFrameSize);
1043     } else {
1044       assert(!SingleScratchReg && "Only a single scratch reg available");
1045       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1046         .addImm(NegFrameSize >> 16);
1047       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1048         .addReg(TempReg, RegState::Kill)
1049         .addImm(NegFrameSize & 0xFFFF);
1050       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1051         .addReg(ScratchReg, RegState::Kill)
1052         .addReg(TempReg, RegState::Kill);
1053     }
1054 
1055     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1056       .addReg(SPReg, RegState::Kill)
1057       .addReg(SPReg)
1058       .addReg(ScratchReg);
1059     HasSTUX = true;
1060 
1061   } else if (!isLargeFrame) {
1062     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1063       .addReg(SPReg)
1064       .addImm(NegFrameSize)
1065       .addReg(SPReg);
1066 
1067   } else {
1068     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1069       .addImm(NegFrameSize >> 16);
1070     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1071       .addReg(ScratchReg, RegState::Kill)
1072       .addImm(NegFrameSize & 0xFFFF);
1073     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1074       .addReg(SPReg, RegState::Kill)
1075       .addReg(SPReg)
1076       .addReg(ScratchReg);
1077     HasSTUX = true;
1078   }
1079 
1080   // Save the TOC register after the stack pointer update if a prologue TOC
1081   // save is required for the function.
1082   if (MustSaveTOC) {
1083     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1084     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1085       .addReg(TOCReg, getKillRegState(true))
1086       .addImm(TOCSaveOffset)
1087       .addReg(SPReg);
1088   }
1089 
1090   if (!HasRedZone) {
1091     assert(!isPPC64 && "A red zone is always available on PPC64");
1092     if (HasSTUX) {
1093       // The negated frame size is in ScratchReg, and the SPReg has been
1094       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1095       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1096       // the stack frame (i.e. the old SP), ideally, we would put the old
1097       // SP into a register and use it as the base for the stores. The
1098       // problem is that the only available register may be ScratchReg,
1099       // which could be R0, and R0 cannot be used as a base address.
1100 
1101       // First, set ScratchReg to the old SP. This may need to be modified
1102       // later.
1103       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1104         .addReg(ScratchReg, RegState::Kill)
1105         .addReg(SPReg);
1106 
1107       if (ScratchReg == PPC::R0) {
1108         // R0 cannot be used as a base register, but it can be used as an
1109         // index in a store-indexed.
1110         int LastOffset = 0;
1111         if (HasFP)  {
1112           // R0 += (FPOffset-LastOffset).
1113           // Need addic, since addi treats R0 as 0.
1114           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1115             .addReg(ScratchReg)
1116             .addImm(FPOffset-LastOffset);
1117           LastOffset = FPOffset;
1118           // Store FP into *R0.
1119           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1120             .addReg(FPReg, RegState::Kill)  // Save FP.
1121             .addReg(PPC::ZERO)
1122             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1123         }
1124         if (FI->usesPICBase()) {
1125           // R0 += (PBPOffset-LastOffset).
1126           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1127             .addReg(ScratchReg)
1128             .addImm(PBPOffset-LastOffset);
1129           LastOffset = PBPOffset;
1130           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1131             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1132             .addReg(PPC::ZERO)
1133             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1134         }
1135         if (HasBP) {
1136           // R0 += (BPOffset-LastOffset).
1137           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1138             .addReg(ScratchReg)
1139             .addImm(BPOffset-LastOffset);
1140           LastOffset = BPOffset;
1141           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1142             .addReg(BPReg, RegState::Kill)  // Save BP.
1143             .addReg(PPC::ZERO)
1144             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1145           // BP = R0-LastOffset
1146           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1147             .addReg(ScratchReg, RegState::Kill)
1148             .addImm(-LastOffset);
1149         }
1150       } else {
1151         // ScratchReg is not R0, so use it as the base register. It is
1152         // already set to the old SP, so we can use the offsets directly.
1153 
1154         // Now that the stack frame has been allocated, save all the necessary
1155         // registers using ScratchReg as the base address.
1156         if (HasFP)
1157           BuildMI(MBB, MBBI, dl, StoreInst)
1158             .addReg(FPReg)
1159             .addImm(FPOffset)
1160             .addReg(ScratchReg);
1161         if (FI->usesPICBase())
1162           BuildMI(MBB, MBBI, dl, StoreInst)
1163             .addReg(PPC::R30)
1164             .addImm(PBPOffset)
1165             .addReg(ScratchReg);
1166         if (HasBP) {
1167           BuildMI(MBB, MBBI, dl, StoreInst)
1168             .addReg(BPReg)
1169             .addImm(BPOffset)
1170             .addReg(ScratchReg);
1171           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1172             .addReg(ScratchReg, RegState::Kill)
1173             .addReg(ScratchReg);
1174         }
1175       }
1176     } else {
1177       // The frame size is a known 16-bit constant (fitting in the immediate
1178       // field of STWU). To be here we have to be compiling for PPC32.
1179       // Since the SPReg has been decreased by FrameSize, add it back to each
1180       // offset.
1181       if (HasFP)
1182         BuildMI(MBB, MBBI, dl, StoreInst)
1183           .addReg(FPReg)
1184           .addImm(FrameSize + FPOffset)
1185           .addReg(SPReg);
1186       if (FI->usesPICBase())
1187         BuildMI(MBB, MBBI, dl, StoreInst)
1188           .addReg(PPC::R30)
1189           .addImm(FrameSize + PBPOffset)
1190           .addReg(SPReg);
1191       if (HasBP) {
1192         BuildMI(MBB, MBBI, dl, StoreInst)
1193           .addReg(BPReg)
1194           .addImm(FrameSize + BPOffset)
1195           .addReg(SPReg);
1196         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1197           .addReg(SPReg)
1198           .addImm(FrameSize);
1199       }
1200     }
1201   }
1202 
1203   // Add Call Frame Information for the instructions we generated above.
1204   if (needsCFI) {
1205     unsigned CFIIndex;
1206 
1207     if (HasBP) {
1208       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1209       // because if the stack needed aligning then CFA won't be at a fixed
1210       // offset from FP/SP.
1211       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1212       CFIIndex = MF.addFrameInst(
1213           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1214     } else {
1215       // Adjust the definition of CFA to account for the change in SP.
1216       assert(NegFrameSize);
1217       CFIIndex = MF.addFrameInst(
1218           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1219     }
1220     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1221         .addCFIIndex(CFIIndex);
1222 
1223     if (HasFP) {
1224       // Describe where FP was saved, at a fixed offset from CFA.
1225       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1226       CFIIndex = MF.addFrameInst(
1227           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1228       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1229           .addCFIIndex(CFIIndex);
1230     }
1231 
1232     if (FI->usesPICBase()) {
1233       // Describe where FP was saved, at a fixed offset from CFA.
1234       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1235       CFIIndex = MF.addFrameInst(
1236           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1237       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1238           .addCFIIndex(CFIIndex);
1239     }
1240 
1241     if (HasBP) {
1242       // Describe where BP was saved, at a fixed offset from CFA.
1243       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1244       CFIIndex = MF.addFrameInst(
1245           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1246       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1247           .addCFIIndex(CFIIndex);
1248     }
1249 
1250     if (MustSaveLR) {
1251       // Describe where LR was saved, at a fixed offset from CFA.
1252       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1253       CFIIndex = MF.addFrameInst(
1254           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1255       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1256           .addCFIIndex(CFIIndex);
1257     }
1258   }
1259 
1260   // If there is a frame pointer, copy R1 into R31
1261   if (HasFP) {
1262     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1263       .addReg(SPReg)
1264       .addReg(SPReg);
1265 
1266     if (!HasBP && needsCFI) {
1267       // Change the definition of CFA from SP+offset to FP+offset, because SP
1268       // will change at every alloca.
1269       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1270       unsigned CFIIndex = MF.addFrameInst(
1271           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1272 
1273       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1274           .addCFIIndex(CFIIndex);
1275     }
1276   }
1277 
1278   if (needsCFI) {
1279     // Describe where callee saved registers were saved, at fixed offsets from
1280     // CFA.
1281     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1282     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1283       unsigned Reg = CSI[I].getReg();
1284       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1285 
1286       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1287       // subregisters of CR2. We just need to emit a move of CR2.
1288       if (PPC::CRBITRCRegClass.contains(Reg))
1289         continue;
1290 
1291       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1292         continue;
1293 
1294       // For SVR4, don't emit a move for the CR spill slot if we haven't
1295       // spilled CRs.
1296       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1297           && !MustSaveCR)
1298         continue;
1299 
1300       // For 64-bit SVR4 when we have spilled CRs, the spill location
1301       // is SP+8, not a frame-relative slot.
1302       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1303         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1304         // the whole CR word.  In the ELFv2 ABI, every CR that was
1305         // actually saved gets its own CFI record.
1306         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1307         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1308             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1309         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1310             .addCFIIndex(CFIIndex);
1311         continue;
1312       }
1313 
1314       if (CSI[I].isSpilledToReg()) {
1315         unsigned SpilledReg = CSI[I].getDstReg();
1316         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1317             nullptr, MRI->getDwarfRegNum(Reg, true),
1318             MRI->getDwarfRegNum(SpilledReg, true)));
1319         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1320           .addCFIIndex(CFIRegister);
1321       } else {
1322         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1323         // We have changed the object offset above but we do not want to change
1324         // the actual offsets in the CFI instruction so we have to undo the
1325         // offset change here.
1326         if (MovingStackUpdateDown)
1327           Offset -= NegFrameSize;
1328 
1329         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1330             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1331         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1332             .addCFIIndex(CFIIndex);
1333       }
1334     }
1335   }
1336 }
1337 
1338 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1339                                     MachineBasicBlock &MBB) const {
1340   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1341   DebugLoc dl;
1342 
1343   if (MBBI != MBB.end())
1344     dl = MBBI->getDebugLoc();
1345 
1346   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1347   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1348 
1349   // Get alignment info so we know how to restore the SP.
1350   const MachineFrameInfo &MFI = MF.getFrameInfo();
1351 
1352   // Get the number of bytes allocated from the FrameInfo.
1353   int FrameSize = MFI.getStackSize();
1354 
1355   // Get processor type.
1356   bool isPPC64 = Subtarget.isPPC64();
1357   // Get the ABI.
1358   bool isSVR4ABI = Subtarget.isSVR4ABI();
1359 
1360   // Check if the link register (LR) has been saved.
1361   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1362   bool MustSaveLR = FI->mustSaveLR();
1363   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1364   bool MustSaveCR = !MustSaveCRs.empty();
1365   // Do we have a frame pointer and/or base pointer for this function?
1366   bool HasFP = hasFP(MF);
1367   bool HasBP = RegInfo->hasBasePointer(MF);
1368   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1369 
1370   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1371   Register BPReg = RegInfo->getBaseRegister(MF);
1372   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1373   Register ScratchReg;
1374   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1375   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1376                                                  : PPC::MTLR );
1377   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1378                                                  : PPC::LWZ );
1379   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1380                                                            : PPC::LIS );
1381   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1382                                               : PPC::OR );
1383   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1384                                                   : PPC::ORI );
1385   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1386                                                    : PPC::ADDI );
1387   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1388                                                 : PPC::ADD4 );
1389   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1390                                                      : PPC::LWZ);
1391   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1392                                                      : PPC::MTOCRF);
1393   int LROffset = getReturnSaveOffset();
1394 
1395   int FPOffset = 0;
1396 
1397   // Using the same bool variable as below to suppress compiler warnings.
1398   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1399                                               &TempReg);
1400   assert(SingleScratchReg &&
1401          "Could not find an available scratch register");
1402 
1403   SingleScratchReg = ScratchReg == TempReg;
1404 
1405   if (HasFP) {
1406     if (isSVR4ABI) {
1407       int FPIndex = FI->getFramePointerSaveIndex();
1408       assert(FPIndex && "No Frame Pointer Save Slot!");
1409       FPOffset = MFI.getObjectOffset(FPIndex);
1410     } else {
1411       FPOffset = getFramePointerSaveOffset();
1412     }
1413   }
1414 
1415   int BPOffset = 0;
1416   if (HasBP) {
1417     if (isSVR4ABI) {
1418       int BPIndex = FI->getBasePointerSaveIndex();
1419       assert(BPIndex && "No Base Pointer Save Slot!");
1420       BPOffset = MFI.getObjectOffset(BPIndex);
1421     } else {
1422       BPOffset = getBasePointerSaveOffset();
1423     }
1424   }
1425 
1426   int PBPOffset = 0;
1427   if (FI->usesPICBase()) {
1428     int PBPIndex = FI->getPICBasePointerSaveIndex();
1429     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1430     PBPOffset = MFI.getObjectOffset(PBPIndex);
1431   }
1432 
1433   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1434 
1435   if (IsReturnBlock) {
1436     unsigned RetOpcode = MBBI->getOpcode();
1437     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1438                       RetOpcode == PPC::TCRETURNdi ||
1439                       RetOpcode == PPC::TCRETURNai ||
1440                       RetOpcode == PPC::TCRETURNri8 ||
1441                       RetOpcode == PPC::TCRETURNdi8 ||
1442                       RetOpcode == PPC::TCRETURNai8;
1443 
1444     if (UsesTCRet) {
1445       int MaxTCRetDelta = FI->getTailCallSPDelta();
1446       MachineOperand &StackAdjust = MBBI->getOperand(1);
1447       assert(StackAdjust.isImm() && "Expecting immediate value.");
1448       // Adjust stack pointer.
1449       int StackAdj = StackAdjust.getImm();
1450       int Delta = StackAdj - MaxTCRetDelta;
1451       assert((Delta >= 0) && "Delta must be positive");
1452       if (MaxTCRetDelta>0)
1453         FrameSize += (StackAdj +Delta);
1454       else
1455         FrameSize += StackAdj;
1456     }
1457   }
1458 
1459   // Frames of 32KB & larger require special handling because they cannot be
1460   // indexed into with a simple LD/LWZ immediate offset operand.
1461   bool isLargeFrame = !isInt<16>(FrameSize);
1462 
1463   // On targets without red zone, the SP needs to be restored last, so that
1464   // all live contents of the stack frame are upwards of the SP. This means
1465   // that we cannot restore SP just now, since there may be more registers
1466   // to restore from the stack frame (e.g. R31). If the frame size is not
1467   // a simple immediate value, we will need a spare register to hold the
1468   // restored SP. If the frame size is known and small, we can simply adjust
1469   // the offsets of the registers to be restored, and still use SP to restore
1470   // them. In such case, the final update of SP will be to add the frame
1471   // size to it.
1472   // To simplify the code, set RBReg to the base register used to restore
1473   // values from the stack, and set SPAdd to the value that needs to be added
1474   // to the SP at the end. The default values are as if red zone was present.
1475   unsigned RBReg = SPReg;
1476   unsigned SPAdd = 0;
1477 
1478   // Check if we can move the stack update instruction up the epilogue
1479   // past the callee saves. This will allow the move to LR instruction
1480   // to be executed before the restores of the callee saves which means
1481   // that the callee saves can hide the latency from the MTLR instrcution.
1482   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1483   if (stackUpdateCanBeMoved(MF)) {
1484     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1485     for (CalleeSavedInfo CSI : Info) {
1486       int FrIdx = CSI.getFrameIdx();
1487       // If the frame index is not negative the callee saved info belongs to a
1488       // stack object that is not a fixed stack object. We ignore non-fixed
1489       // stack objects because we won't move the update of the stack pointer
1490       // past them.
1491       if (FrIdx >= 0)
1492         continue;
1493 
1494       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1495         StackUpdateLoc--;
1496       else {
1497         // Abort the operation as we can't update all CSR restores.
1498         StackUpdateLoc = MBBI;
1499         break;
1500       }
1501     }
1502   }
1503 
1504   if (FrameSize) {
1505     // In the prologue, the loaded (or persistent) stack pointer value is
1506     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1507     // zone add this offset back now.
1508 
1509     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1510     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1511     // call which invalidates the stack pointer value in SP(0). So we use the
1512     // value of R31 in this case.
1513     if (FI->hasFastCall()) {
1514       assert(HasFP && "Expecting a valid frame pointer.");
1515       if (!HasRedZone)
1516         RBReg = FPReg;
1517       if (!isLargeFrame) {
1518         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1519           .addReg(FPReg).addImm(FrameSize);
1520       } else {
1521         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1522           .addImm(FrameSize >> 16);
1523         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1524           .addReg(ScratchReg, RegState::Kill)
1525           .addImm(FrameSize & 0xFFFF);
1526         BuildMI(MBB, MBBI, dl, AddInst)
1527           .addReg(RBReg)
1528           .addReg(FPReg)
1529           .addReg(ScratchReg);
1530       }
1531     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1532       if (HasRedZone) {
1533         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1534           .addReg(SPReg)
1535           .addImm(FrameSize);
1536       } else {
1537         // Make sure that adding FrameSize will not overflow the max offset
1538         // size.
1539         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1540                "Local offsets should be negative");
1541         SPAdd = FrameSize;
1542         FPOffset += FrameSize;
1543         BPOffset += FrameSize;
1544         PBPOffset += FrameSize;
1545       }
1546     } else {
1547       // We don't want to use ScratchReg as a base register, because it
1548       // could happen to be R0. Use FP instead, but make sure to preserve it.
1549       if (!HasRedZone) {
1550         // If FP is not saved, copy it to ScratchReg.
1551         if (!HasFP)
1552           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1553             .addReg(FPReg)
1554             .addReg(FPReg);
1555         RBReg = FPReg;
1556       }
1557       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1558         .addImm(0)
1559         .addReg(SPReg);
1560     }
1561   }
1562   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1563   // If there is no red zone, ScratchReg may be needed for holding a useful
1564   // value (although not the base register). Make sure it is not overwritten
1565   // too early.
1566 
1567   // If we need to restore both the LR and the CR and we only have one
1568   // available scratch register, we must do them one at a time.
1569   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1570     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1571     // is live here.
1572     assert(HasRedZone && "Expecting red zone");
1573     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1574       .addImm(CRSaveOffset)
1575       .addReg(SPReg);
1576     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1577       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1578         .addReg(TempReg, getKillRegState(i == e-1));
1579   }
1580 
1581   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1582   // LR is stored in the caller's stack frame. ScratchReg will be needed
1583   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1584   // a base register anyway, because it may happen to be R0.
1585   bool LoadedLR = false;
1586   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1587     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1588       .addImm(LROffset+SPAdd)
1589       .addReg(RBReg);
1590     LoadedLR = true;
1591   }
1592 
1593   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1594     assert(RBReg == SPReg && "Should be using SP as a base register");
1595     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1596       .addImm(CRSaveOffset)
1597       .addReg(RBReg);
1598   }
1599 
1600   if (HasFP) {
1601     // If there is red zone, restore FP directly, since SP has already been
1602     // restored. Otherwise, restore the value of FP into ScratchReg.
1603     if (HasRedZone || RBReg == SPReg)
1604       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1605         .addImm(FPOffset)
1606         .addReg(SPReg);
1607     else
1608       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1609         .addImm(FPOffset)
1610         .addReg(RBReg);
1611   }
1612 
1613   if (FI->usesPICBase())
1614     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1615       .addImm(PBPOffset)
1616       .addReg(RBReg);
1617 
1618   if (HasBP)
1619     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1620       .addImm(BPOffset)
1621       .addReg(RBReg);
1622 
1623   // There is nothing more to be loaded from the stack, so now we can
1624   // restore SP: SP = RBReg + SPAdd.
1625   if (RBReg != SPReg || SPAdd != 0) {
1626     assert(!HasRedZone && "This should not happen with red zone");
1627     // If SPAdd is 0, generate a copy.
1628     if (SPAdd == 0)
1629       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1630         .addReg(RBReg)
1631         .addReg(RBReg);
1632     else
1633       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1634         .addReg(RBReg)
1635         .addImm(SPAdd);
1636 
1637     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1638     if (RBReg == FPReg)
1639       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1640         .addReg(ScratchReg)
1641         .addReg(ScratchReg);
1642 
1643     // Now load the LR from the caller's stack frame.
1644     if (MustSaveLR && !LoadedLR)
1645       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1646         .addImm(LROffset)
1647         .addReg(SPReg);
1648   }
1649 
1650   if (MustSaveCR &&
1651       !(SingleScratchReg && MustSaveLR))
1652     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1653       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1654         .addReg(TempReg, getKillRegState(i == e-1));
1655 
1656   if (MustSaveLR)
1657     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1658 
1659   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1660   // call optimization
1661   if (IsReturnBlock) {
1662     unsigned RetOpcode = MBBI->getOpcode();
1663     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1664         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1665         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1666       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1667       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1668 
1669       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1670         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1671           .addReg(SPReg).addImm(CallerAllocatedAmt);
1672       } else {
1673         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1674           .addImm(CallerAllocatedAmt >> 16);
1675         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1676           .addReg(ScratchReg, RegState::Kill)
1677           .addImm(CallerAllocatedAmt & 0xFFFF);
1678         BuildMI(MBB, MBBI, dl, AddInst)
1679           .addReg(SPReg)
1680           .addReg(FPReg)
1681           .addReg(ScratchReg);
1682       }
1683     } else {
1684       createTailCallBranchInstr(MBB);
1685     }
1686   }
1687 }
1688 
1689 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1690   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1691 
1692   // If we got this far a first terminator should exist.
1693   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1694 
1695   DebugLoc dl = MBBI->getDebugLoc();
1696   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1697 
1698   // Create branch instruction for pseudo tail call return instruction.
1699   // The TCRETURNdi variants are direct calls. Valid targets for those are
1700   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1701   // since we can tail call external functions with PC-Rel (i.e. we don't need
1702   // to worry about different TOC pointers). Some of the external functions will
1703   // be MO_GlobalAddress while others like memcpy for example, are going to
1704   // be MO_ExternalSymbol.
1705   unsigned RetOpcode = MBBI->getOpcode();
1706   if (RetOpcode == PPC::TCRETURNdi) {
1707     MBBI = MBB.getLastNonDebugInstr();
1708     MachineOperand &JumpTarget = MBBI->getOperand(0);
1709     if (JumpTarget.isGlobal())
1710       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1711         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1712     else if (JumpTarget.isSymbol())
1713       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1714         addExternalSymbol(JumpTarget.getSymbolName());
1715     else
1716       llvm_unreachable("Expecting Global or External Symbol");
1717   } else if (RetOpcode == PPC::TCRETURNri) {
1718     MBBI = MBB.getLastNonDebugInstr();
1719     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1720     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1721   } else if (RetOpcode == PPC::TCRETURNai) {
1722     MBBI = MBB.getLastNonDebugInstr();
1723     MachineOperand &JumpTarget = MBBI->getOperand(0);
1724     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1725   } else if (RetOpcode == PPC::TCRETURNdi8) {
1726     MBBI = MBB.getLastNonDebugInstr();
1727     MachineOperand &JumpTarget = MBBI->getOperand(0);
1728     if (JumpTarget.isGlobal())
1729       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1730         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1731     else if (JumpTarget.isSymbol())
1732       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1733         addExternalSymbol(JumpTarget.getSymbolName());
1734     else
1735       llvm_unreachable("Expecting Global or External Symbol");
1736   } else if (RetOpcode == PPC::TCRETURNri8) {
1737     MBBI = MBB.getLastNonDebugInstr();
1738     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1739     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1740   } else if (RetOpcode == PPC::TCRETURNai8) {
1741     MBBI = MBB.getLastNonDebugInstr();
1742     MachineOperand &JumpTarget = MBBI->getOperand(0);
1743     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1744   }
1745 }
1746 
1747 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1748                                             BitVector &SavedRegs,
1749                                             RegScavenger *RS) const {
1750   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1751 
1752   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1753 
1754   //  Save and clear the LR state.
1755   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1756   unsigned LR = RegInfo->getRARegister();
1757   FI->setMustSaveLR(MustSaveLR(MF, LR));
1758   SavedRegs.reset(LR);
1759 
1760   //  Save R31 if necessary
1761   int FPSI = FI->getFramePointerSaveIndex();
1762   const bool isPPC64 = Subtarget.isPPC64();
1763   MachineFrameInfo &MFI = MF.getFrameInfo();
1764 
1765   // If the frame pointer save index hasn't been defined yet.
1766   if (!FPSI && needsFP(MF)) {
1767     // Find out what the fix offset of the frame pointer save area.
1768     int FPOffset = getFramePointerSaveOffset();
1769     // Allocate the frame index for frame pointer save area.
1770     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1771     // Save the result.
1772     FI->setFramePointerSaveIndex(FPSI);
1773   }
1774 
1775   int BPSI = FI->getBasePointerSaveIndex();
1776   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1777     int BPOffset = getBasePointerSaveOffset();
1778     // Allocate the frame index for the base pointer save area.
1779     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1780     // Save the result.
1781     FI->setBasePointerSaveIndex(BPSI);
1782   }
1783 
1784   // Reserve stack space for the PIC Base register (R30).
1785   // Only used in SVR4 32-bit.
1786   if (FI->usesPICBase()) {
1787     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1788     FI->setPICBasePointerSaveIndex(PBPSI);
1789   }
1790 
1791   // Make sure we don't explicitly spill r31, because, for example, we have
1792   // some inline asm which explicitly clobbers it, when we otherwise have a
1793   // frame pointer and are using r31's spill slot for the prologue/epilogue
1794   // code. Same goes for the base pointer and the PIC base register.
1795   if (needsFP(MF))
1796     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1797   if (RegInfo->hasBasePointer(MF))
1798     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1799   if (FI->usesPICBase())
1800     SavedRegs.reset(PPC::R30);
1801 
1802   // Reserve stack space to move the linkage area to in case of a tail call.
1803   int TCSPDelta = 0;
1804   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1805       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1806     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1807   }
1808 
1809   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1810   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1811   // object at the offset of the CR-save slot in the linkage area. The actual
1812   // save and restore of the condition register will be created as part of the
1813   // prologue and epilogue insertion, but the FixedStack object is needed to
1814   // keep the CalleSavedInfo valid.
1815   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1816        SavedRegs.test(PPC::CR4))) {
1817     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1818     const int64_t SpillOffset =
1819         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1820     int FrameIdx =
1821         MFI.CreateFixedObject(SpillSize, SpillOffset,
1822                               /* IsImmutable */ true, /* IsAliased */ false);
1823     FI->setCRSpillFrameIndex(FrameIdx);
1824   }
1825 }
1826 
1827 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1828                                                        RegScavenger *RS) const {
1829   // Get callee saved register information.
1830   MachineFrameInfo &MFI = MF.getFrameInfo();
1831   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1832 
1833   // If the function is shrink-wrapped, and if the function has a tail call, the
1834   // tail call might not be in the new RestoreBlock, so real branch instruction
1835   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1836   // RestoreBlock. So we handle this case here.
1837   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1838     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1839     for (MachineBasicBlock &MBB : MF) {
1840       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1841         createTailCallBranchInstr(MBB);
1842     }
1843   }
1844 
1845   // Early exit if no callee saved registers are modified!
1846   if (CSI.empty() && !needsFP(MF)) {
1847     addScavengingSpillSlot(MF, RS);
1848     return;
1849   }
1850 
1851   unsigned MinGPR = PPC::R31;
1852   unsigned MinG8R = PPC::X31;
1853   unsigned MinFPR = PPC::F31;
1854   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1855 
1856   bool HasGPSaveArea = false;
1857   bool HasG8SaveArea = false;
1858   bool HasFPSaveArea = false;
1859   bool HasVRSAVESaveArea = false;
1860   bool HasVRSaveArea = false;
1861 
1862   SmallVector<CalleeSavedInfo, 18> GPRegs;
1863   SmallVector<CalleeSavedInfo, 18> G8Regs;
1864   SmallVector<CalleeSavedInfo, 18> FPRegs;
1865   SmallVector<CalleeSavedInfo, 18> VRegs;
1866 
1867   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1868     unsigned Reg = CSI[i].getReg();
1869     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1870             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1871            "Not expecting to try to spill R2 in a function that must save TOC");
1872     if (PPC::GPRCRegClass.contains(Reg)) {
1873       HasGPSaveArea = true;
1874 
1875       GPRegs.push_back(CSI[i]);
1876 
1877       if (Reg < MinGPR) {
1878         MinGPR = Reg;
1879       }
1880     } else if (PPC::G8RCRegClass.contains(Reg)) {
1881       HasG8SaveArea = true;
1882 
1883       G8Regs.push_back(CSI[i]);
1884 
1885       if (Reg < MinG8R) {
1886         MinG8R = Reg;
1887       }
1888     } else if (PPC::F8RCRegClass.contains(Reg)) {
1889       HasFPSaveArea = true;
1890 
1891       FPRegs.push_back(CSI[i]);
1892 
1893       if (Reg < MinFPR) {
1894         MinFPR = Reg;
1895       }
1896     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1897                PPC::CRRCRegClass.contains(Reg)) {
1898       ; // do nothing, as we already know whether CRs are spilled
1899     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1900       HasVRSAVESaveArea = true;
1901     } else if (PPC::VRRCRegClass.contains(Reg) ||
1902                PPC::SPERCRegClass.contains(Reg)) {
1903       // Altivec and SPE are mutually exclusive, but have the same stack
1904       // alignment requirements, so overload the save area for both cases.
1905       HasVRSaveArea = true;
1906 
1907       VRegs.push_back(CSI[i]);
1908 
1909       if (Reg < MinVR) {
1910         MinVR = Reg;
1911       }
1912     } else {
1913       llvm_unreachable("Unknown RegisterClass!");
1914     }
1915   }
1916 
1917   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1918   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1919 
1920   int64_t LowerBound = 0;
1921 
1922   // Take into account stack space reserved for tail calls.
1923   int TCSPDelta = 0;
1924   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1925       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1926     LowerBound = TCSPDelta;
1927   }
1928 
1929   // The Floating-point register save area is right below the back chain word
1930   // of the previous stack frame.
1931   if (HasFPSaveArea) {
1932     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1933       int FI = FPRegs[i].getFrameIdx();
1934 
1935       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1936     }
1937 
1938     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1939   }
1940 
1941   // Check whether the frame pointer register is allocated. If so, make sure it
1942   // is spilled to the correct offset.
1943   if (needsFP(MF)) {
1944     int FI = PFI->getFramePointerSaveIndex();
1945     assert(FI && "No Frame Pointer Save Slot!");
1946     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1947     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1948     HasGPSaveArea = true;
1949   }
1950 
1951   if (PFI->usesPICBase()) {
1952     int FI = PFI->getPICBasePointerSaveIndex();
1953     assert(FI && "No PIC Base Pointer Save Slot!");
1954     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1955 
1956     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1957     HasGPSaveArea = true;
1958   }
1959 
1960   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1961   if (RegInfo->hasBasePointer(MF)) {
1962     int FI = PFI->getBasePointerSaveIndex();
1963     assert(FI && "No Base Pointer Save Slot!");
1964     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1965 
1966     Register BP = RegInfo->getBaseRegister(MF);
1967     if (PPC::G8RCRegClass.contains(BP)) {
1968       MinG8R = std::min<unsigned>(MinG8R, BP);
1969       HasG8SaveArea = true;
1970     } else if (PPC::GPRCRegClass.contains(BP)) {
1971       MinGPR = std::min<unsigned>(MinGPR, BP);
1972       HasGPSaveArea = true;
1973     }
1974   }
1975 
1976   // General register save area starts right below the Floating-point
1977   // register save area.
1978   if (HasGPSaveArea || HasG8SaveArea) {
1979     // Move general register save area spill slots down, taking into account
1980     // the size of the Floating-point register save area.
1981     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1982       if (!GPRegs[i].isSpilledToReg()) {
1983         int FI = GPRegs[i].getFrameIdx();
1984         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1985       }
1986     }
1987 
1988     // Move general register save area spill slots down, taking into account
1989     // the size of the Floating-point register save area.
1990     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1991       if (!G8Regs[i].isSpilledToReg()) {
1992         int FI = G8Regs[i].getFrameIdx();
1993         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1994       }
1995     }
1996 
1997     unsigned MinReg =
1998       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1999                          TRI->getEncodingValue(MinG8R));
2000 
2001     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2002     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2003   }
2004 
2005   // For 32-bit only, the CR save area is below the general register
2006   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2007   // to the stack pointer and hence does not need an adjustment here.
2008   // Only CR2 (the first nonvolatile spilled) has an associated frame
2009   // index so that we have a single uniform save area.
2010   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2011     // Adjust the frame index of the CR spill slot.
2012     for (const auto &CSInfo : CSI) {
2013       if (CSInfo.getReg() == PPC::CR2) {
2014         int FI = CSInfo.getFrameIdx();
2015         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2016         break;
2017       }
2018     }
2019 
2020     LowerBound -= 4; // The CR save area is always 4 bytes long.
2021   }
2022 
2023   if (HasVRSAVESaveArea) {
2024     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2025     //             which have the VRSAVE register class?
2026     // Adjust the frame index of the VRSAVE spill slot.
2027     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2028       unsigned Reg = CSI[i].getReg();
2029 
2030       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2031         int FI = CSI[i].getFrameIdx();
2032 
2033         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2034       }
2035     }
2036 
2037     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2038   }
2039 
2040   // Both Altivec and SPE have the same alignment and padding requirements
2041   // within the stack frame.
2042   if (HasVRSaveArea) {
2043     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2044     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2045     // we are using negative number here (the stack grows downward). We should
2046     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2047     // is the alignment size ( n = 16 here) and y is the size after aligning.
2048     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2049     LowerBound &= ~(15);
2050 
2051     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2052       int FI = VRegs[i].getFrameIdx();
2053 
2054       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2055     }
2056   }
2057 
2058   addScavengingSpillSlot(MF, RS);
2059 }
2060 
2061 void
2062 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2063                                          RegScavenger *RS) const {
2064   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2065   // a large stack, which will require scavenging a register to materialize a
2066   // large offset.
2067 
2068   // We need to have a scavenger spill slot for spills if the frame size is
2069   // large. In case there is no free register for large-offset addressing,
2070   // this slot is used for the necessary emergency spill. Also, we need the
2071   // slot for dynamic stack allocations.
2072 
2073   // The scavenger might be invoked if the frame offset does not fit into
2074   // the 16-bit immediate. We don't know the complete frame size here
2075   // because we've not yet computed callee-saved register spills or the
2076   // needed alignment padding.
2077   unsigned StackSize = determineFrameLayout(MF, true);
2078   MachineFrameInfo &MFI = MF.getFrameInfo();
2079   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2080       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2081     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2082     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2083     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2084     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2085     unsigned Size = TRI.getSpillSize(RC);
2086     Align Alignment = TRI.getSpillAlign(RC);
2087     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2088 
2089     // Might we have over-aligned allocas?
2090     bool HasAlVars =
2091         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2092 
2093     // These kinds of spills might need two registers.
2094     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2095       RS->addScavengingFrameIndex(
2096           MFI.CreateStackObject(Size, Alignment, false));
2097   }
2098 }
2099 
2100 // This function checks if a callee saved gpr can be spilled to a volatile
2101 // vector register. This occurs for leaf functions when the option
2102 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2103 // which were not spilled to vectors, return false so the target independent
2104 // code can handle them by assigning a FrameIdx to a stack slot.
2105 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2106     MachineFunction &MF, const TargetRegisterInfo *TRI,
2107     std::vector<CalleeSavedInfo> &CSI) const {
2108 
2109   if (CSI.empty())
2110     return true; // Early exit if no callee saved registers are modified!
2111 
2112   // Early exit if cannot spill gprs to volatile vector registers.
2113   MachineFrameInfo &MFI = MF.getFrameInfo();
2114   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2115     return false;
2116 
2117   // Build a BitVector of VSRs that can be used for spilling GPRs.
2118   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2119   BitVector BVCalleeSaved(TRI->getNumRegs());
2120   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2121   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2122   for (unsigned i = 0; CSRegs[i]; ++i)
2123     BVCalleeSaved.set(CSRegs[i]);
2124 
2125   for (unsigned Reg : BVAllocatable.set_bits()) {
2126     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2127     // used in the function.
2128     if (BVCalleeSaved[Reg] ||
2129         (!PPC::F8RCRegClass.contains(Reg) &&
2130          !PPC::VFRCRegClass.contains(Reg)) ||
2131         (MF.getRegInfo().isPhysRegUsed(Reg)))
2132       BVAllocatable.reset(Reg);
2133   }
2134 
2135   bool AllSpilledToReg = true;
2136   for (auto &CS : CSI) {
2137     if (BVAllocatable.none())
2138       return false;
2139 
2140     unsigned Reg = CS.getReg();
2141     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2142       AllSpilledToReg = false;
2143       continue;
2144     }
2145 
2146     unsigned VolatileVFReg = BVAllocatable.find_first();
2147     if (VolatileVFReg < BVAllocatable.size()) {
2148       CS.setDstReg(VolatileVFReg);
2149       BVAllocatable.reset(VolatileVFReg);
2150     } else {
2151       AllSpilledToReg = false;
2152     }
2153   }
2154   return AllSpilledToReg;
2155 }
2156 
2157 bool PPCFrameLowering::spillCalleeSavedRegisters(
2158     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2159     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2160 
2161   MachineFunction *MF = MBB.getParent();
2162   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2163   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2164   bool MustSaveTOC = FI->mustSaveTOC();
2165   DebugLoc DL;
2166   bool CRSpilled = false;
2167   MachineInstrBuilder CRMIB;
2168 
2169   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2170     unsigned Reg = CSI[i].getReg();
2171     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2172     if (Reg == PPC::VRSAVE)
2173       continue;
2174 
2175     // CR2 through CR4 are the nonvolatile CR fields.
2176     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2177 
2178     // Add the callee-saved register as live-in; it's killed at the spill.
2179     // Do not do this for callee-saved registers that are live-in to the
2180     // function because they will already be marked live-in and this will be
2181     // adding it for a second time. It is an error to add the same register
2182     // to the set more than once.
2183     const MachineRegisterInfo &MRI = MF->getRegInfo();
2184     bool IsLiveIn = MRI.isLiveIn(Reg);
2185     if (!IsLiveIn)
2186        MBB.addLiveIn(Reg);
2187 
2188     if (CRSpilled && IsCRField) {
2189       CRMIB.addReg(Reg, RegState::ImplicitKill);
2190       continue;
2191     }
2192 
2193     // The actual spill will happen in the prologue.
2194     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2195       continue;
2196 
2197     // Insert the spill to the stack frame.
2198     if (IsCRField) {
2199       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2200       if (!Subtarget.is32BitELFABI()) {
2201         // The actual spill will happen at the start of the prologue.
2202         FuncInfo->addMustSaveCR(Reg);
2203       } else {
2204         CRSpilled = true;
2205         FuncInfo->setSpillsCR();
2206 
2207         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2208         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2209         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2210                   .addReg(Reg, RegState::ImplicitKill);
2211 
2212         MBB.insert(MI, CRMIB);
2213         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2214                                          .addReg(PPC::R12,
2215                                                  getKillRegState(true)),
2216                                          CSI[i].getFrameIdx()));
2217       }
2218     } else {
2219       if (CSI[i].isSpilledToReg()) {
2220         NumPESpillVSR++;
2221         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2222           .addReg(Reg, getKillRegState(true));
2223       } else {
2224         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2225         // Use !IsLiveIn for the kill flag.
2226         // We do not want to kill registers that are live in this function
2227         // before their use because they will become undefined registers.
2228         // Functions without NoUnwind need to preserve the order of elements in
2229         // saved vector registers.
2230         if (Subtarget.needsSwapsForVSXMemOps() &&
2231             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2232           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2233                                        CSI[i].getFrameIdx(), RC, TRI);
2234         else
2235           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2236                                   RC, TRI);
2237       }
2238     }
2239   }
2240   return true;
2241 }
2242 
2243 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2244                        bool CR4Spilled, MachineBasicBlock &MBB,
2245                        MachineBasicBlock::iterator MI,
2246                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2247 
2248   MachineFunction *MF = MBB.getParent();
2249   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2250   DebugLoc DL;
2251   unsigned MoveReg = PPC::R12;
2252 
2253   // 32-bit:  FP-relative
2254   MBB.insert(MI,
2255              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2256                                CSI[CSIIndex].getFrameIdx()));
2257 
2258   unsigned RestoreOp = PPC::MTOCRF;
2259   if (CR2Spilled)
2260     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2261                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2262 
2263   if (CR3Spilled)
2264     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2265                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2266 
2267   if (CR4Spilled)
2268     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2269                .addReg(MoveReg, getKillRegState(true)));
2270 }
2271 
2272 MachineBasicBlock::iterator PPCFrameLowering::
2273 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2274                               MachineBasicBlock::iterator I) const {
2275   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2276   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2277       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2278     // Add (actually subtract) back the amount the callee popped on return.
2279     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2280       bool is64Bit = Subtarget.isPPC64();
2281       CalleeAmt *= -1;
2282       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2283       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2284       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2285       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2286       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2287       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2288       const DebugLoc &dl = I->getDebugLoc();
2289 
2290       if (isInt<16>(CalleeAmt)) {
2291         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2292           .addReg(StackReg, RegState::Kill)
2293           .addImm(CalleeAmt);
2294       } else {
2295         MachineBasicBlock::iterator MBBI = I;
2296         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2297           .addImm(CalleeAmt >> 16);
2298         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2299           .addReg(TmpReg, RegState::Kill)
2300           .addImm(CalleeAmt & 0xFFFF);
2301         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2302           .addReg(StackReg, RegState::Kill)
2303           .addReg(TmpReg);
2304       }
2305     }
2306   }
2307   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2308   return MBB.erase(I);
2309 }
2310 
2311 static bool isCalleeSavedCR(unsigned Reg) {
2312   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2313 }
2314 
2315 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2316     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2317     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2318   MachineFunction *MF = MBB.getParent();
2319   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2320   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2321   bool MustSaveTOC = FI->mustSaveTOC();
2322   bool CR2Spilled = false;
2323   bool CR3Spilled = false;
2324   bool CR4Spilled = false;
2325   unsigned CSIIndex = 0;
2326 
2327   // Initialize insertion-point logic; we will be restoring in reverse
2328   // order of spill.
2329   MachineBasicBlock::iterator I = MI, BeforeI = I;
2330   bool AtStart = I == MBB.begin();
2331 
2332   if (!AtStart)
2333     --BeforeI;
2334 
2335   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2336     unsigned Reg = CSI[i].getReg();
2337 
2338     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2339     if (Reg == PPC::VRSAVE)
2340       continue;
2341 
2342     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2343       continue;
2344 
2345     // Restore of callee saved condition register field is handled during
2346     // epilogue insertion.
2347     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2348       continue;
2349 
2350     if (Reg == PPC::CR2) {
2351       CR2Spilled = true;
2352       // The spill slot is associated only with CR2, which is the
2353       // first nonvolatile spilled.  Save it here.
2354       CSIIndex = i;
2355       continue;
2356     } else if (Reg == PPC::CR3) {
2357       CR3Spilled = true;
2358       continue;
2359     } else if (Reg == PPC::CR4) {
2360       CR4Spilled = true;
2361       continue;
2362     } else {
2363       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2364       // least one CR register, restore all spilled CRs together.
2365       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2366         bool is31 = needsFP(*MF);
2367         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2368                    CSIIndex);
2369         CR2Spilled = CR3Spilled = CR4Spilled = false;
2370       }
2371 
2372       if (CSI[i].isSpilledToReg()) {
2373         DebugLoc DL;
2374         NumPEReloadVSR++;
2375         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2376             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2377       } else {
2378        // Default behavior for non-CR saves.
2379         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2380 
2381         // Functions without NoUnwind need to preserve the order of elements in
2382         // saved vector registers.
2383         if (Subtarget.needsSwapsForVSXMemOps() &&
2384             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2385           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2386                                         TRI);
2387         else
2388           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2389 
2390         assert(I != MBB.begin() &&
2391                "loadRegFromStackSlot didn't insert any code!");
2392       }
2393     }
2394 
2395     // Insert in reverse order.
2396     if (AtStart)
2397       I = MBB.begin();
2398     else {
2399       I = BeforeI;
2400       ++I;
2401     }
2402   }
2403 
2404   // If we haven't yet spilled the CRs, do so now.
2405   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2406     assert(Subtarget.is32BitELFABI() &&
2407            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2408     bool is31 = needsFP(*MF);
2409     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2410   }
2411 
2412   return true;
2413 }
2414 
2415 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2416   return TOCSaveOffset;
2417 }
2418 
2419 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2420   return FramePointerSaveOffset;
2421 }
2422 
2423 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2424   return BasePointerSaveOffset;
2425 }
2426 
2427 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2428   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2429     return false;
2430   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2431           MF.getSubtarget<PPCSubtarget>().isPPC64());
2432 }
2433