xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision dfb717da1f794c235b81a985a57dc238c82318e6)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/PPCPredicates.h"
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 STATISTIC(NumPrologProbed, "Number of prologues probed");
36 
37 static cl::opt<bool>
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39                      cl::desc("Enable spills in prologue to vector registers."),
40                      cl::init(false), cl::Hidden);
41 
42 /// VRRegNo - Map from a numbered VR register to its enum value.
43 ///
44 static const MCPhysReg VRRegNo[] = {
45  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
46  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
47  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
48  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
49 };
50 
51 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
52   if (STI.isAIXABI())
53     return STI.isPPC64() ? 16 : 8;
54   // SVR4 ABI:
55   return STI.isPPC64() ? 16 : 4;
56 }
57 
58 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
59   if (STI.isAIXABI())
60     return STI.isPPC64() ? 40 : 20;
61   return STI.isELFv2ABI() ? 24 : 40;
62 }
63 
64 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
65   // First slot in the general register save area.
66   return STI.isPPC64() ? -8U : -4U;
67 }
68 
69 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
70   if (STI.isAIXABI() || STI.isPPC64())
71     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
72 
73   // 32-bit SVR4 ABI:
74   return 8;
75 }
76 
77 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
78   // Third slot in the general purpose register save area.
79   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
80     return -12U;
81 
82   // Second slot in the general purpose register save area.
83   return STI.isPPC64() ? -16U : -8U;
84 }
85 
86 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
87   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
88 }
89 
90 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
91     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
92                           STI.getPlatformStackAlignment(), 0),
93       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
94       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
95       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
96       LinkageSize(computeLinkageSize(Subtarget)),
97       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
98       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
99 
100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
102     unsigned &NumEntries) const {
103 
104 // Floating-point register save area offsets.
105 #define CALLEE_SAVED_FPRS \
106       {PPC::F31, -8},     \
107       {PPC::F30, -16},    \
108       {PPC::F29, -24},    \
109       {PPC::F28, -32},    \
110       {PPC::F27, -40},    \
111       {PPC::F26, -48},    \
112       {PPC::F25, -56},    \
113       {PPC::F24, -64},    \
114       {PPC::F23, -72},    \
115       {PPC::F22, -80},    \
116       {PPC::F21, -88},    \
117       {PPC::F20, -96},    \
118       {PPC::F19, -104},   \
119       {PPC::F18, -112},   \
120       {PPC::F17, -120},   \
121       {PPC::F16, -128},   \
122       {PPC::F15, -136},   \
123       {PPC::F14, -144}
124 
125 // 32-bit general purpose register save area offsets shared by ELF and
126 // AIX. AIX has an extra CSR with r13.
127 #define CALLEE_SAVED_GPRS32 \
128       {PPC::R31, -4},       \
129       {PPC::R30, -8},       \
130       {PPC::R29, -12},      \
131       {PPC::R28, -16},      \
132       {PPC::R27, -20},      \
133       {PPC::R26, -24},      \
134       {PPC::R25, -28},      \
135       {PPC::R24, -32},      \
136       {PPC::R23, -36},      \
137       {PPC::R22, -40},      \
138       {PPC::R21, -44},      \
139       {PPC::R20, -48},      \
140       {PPC::R19, -52},      \
141       {PPC::R18, -56},      \
142       {PPC::R17, -60},      \
143       {PPC::R16, -64},      \
144       {PPC::R15, -68},      \
145       {PPC::R14, -72}
146 
147 // 64-bit general purpose register save area offsets.
148 #define CALLEE_SAVED_GPRS64 \
149       {PPC::X31, -8},       \
150       {PPC::X30, -16},      \
151       {PPC::X29, -24},      \
152       {PPC::X28, -32},      \
153       {PPC::X27, -40},      \
154       {PPC::X26, -48},      \
155       {PPC::X25, -56},      \
156       {PPC::X24, -64},      \
157       {PPC::X23, -72},      \
158       {PPC::X22, -80},      \
159       {PPC::X21, -88},      \
160       {PPC::X20, -96},      \
161       {PPC::X19, -104},     \
162       {PPC::X18, -112},     \
163       {PPC::X17, -120},     \
164       {PPC::X16, -128},     \
165       {PPC::X15, -136},     \
166       {PPC::X14, -144}
167 
168 // Vector register save area offsets.
169 #define CALLEE_SAVED_VRS \
170       {PPC::V31, -16},   \
171       {PPC::V30, -32},   \
172       {PPC::V29, -48},   \
173       {PPC::V28, -64},   \
174       {PPC::V27, -80},   \
175       {PPC::V26, -96},   \
176       {PPC::V25, -112},  \
177       {PPC::V24, -128},  \
178       {PPC::V23, -144},  \
179       {PPC::V22, -160},  \
180       {PPC::V21, -176},  \
181       {PPC::V20, -192}
182 
183   // Note that the offsets here overlap, but this is fixed up in
184   // processFunctionBeforeFrameFinalized.
185 
186   static const SpillSlot ELFOffsets32[] = {
187       CALLEE_SAVED_FPRS,
188       CALLEE_SAVED_GPRS32,
189 
190       // CR save area offset.  We map each of the nonvolatile CR fields
191       // to the slot for CR2, which is the first of the nonvolatile CR
192       // fields to be assigned, so that we only allocate one save slot.
193       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
194       {PPC::CR2, -4},
195 
196       // VRSAVE save area offset.
197       {PPC::VRSAVE, -4},
198 
199       CALLEE_SAVED_VRS,
200 
201       // SPE register save area (overlaps Vector save area).
202       {PPC::S31, -8},
203       {PPC::S30, -16},
204       {PPC::S29, -24},
205       {PPC::S28, -32},
206       {PPC::S27, -40},
207       {PPC::S26, -48},
208       {PPC::S25, -56},
209       {PPC::S24, -64},
210       {PPC::S23, -72},
211       {PPC::S22, -80},
212       {PPC::S21, -88},
213       {PPC::S20, -96},
214       {PPC::S19, -104},
215       {PPC::S18, -112},
216       {PPC::S17, -120},
217       {PPC::S16, -128},
218       {PPC::S15, -136},
219       {PPC::S14, -144}};
220 
221   static const SpillSlot ELFOffsets64[] = {
222       CALLEE_SAVED_FPRS,
223       CALLEE_SAVED_GPRS64,
224 
225       // VRSAVE save area offset.
226       {PPC::VRSAVE, -4},
227       CALLEE_SAVED_VRS
228   };
229 
230   static const SpillSlot AIXOffsets32[] = {
231       CALLEE_SAVED_FPRS,
232       CALLEE_SAVED_GPRS32,
233       // Add AIX's extra CSR.
234       {PPC::R13, -76},
235       // TODO: Update when we add vector support for AIX.
236   };
237 
238   static const SpillSlot AIXOffsets64[] = {
239       CALLEE_SAVED_FPRS,
240       CALLEE_SAVED_GPRS64,
241       // TODO: Update when we add vector support for AIX.
242   };
243 
244   if (Subtarget.is64BitELFABI()) {
245     NumEntries = array_lengthof(ELFOffsets64);
246     return ELFOffsets64;
247   }
248 
249   if (Subtarget.is32BitELFABI()) {
250     NumEntries = array_lengthof(ELFOffsets32);
251     return ELFOffsets32;
252   }
253 
254   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
255 
256   if (Subtarget.isPPC64()) {
257     NumEntries = array_lengthof(AIXOffsets64);
258     return AIXOffsets64;
259   }
260 
261   NumEntries = array_lengthof(AIXOffsets32);
262   return AIXOffsets32;
263 }
264 
265 static bool spillsCR(const MachineFunction &MF) {
266   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
267   return FuncInfo->isCRSpilled();
268 }
269 
270 static bool hasSpills(const MachineFunction &MF) {
271   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
272   return FuncInfo->hasSpills();
273 }
274 
275 static bool hasNonRISpills(const MachineFunction &MF) {
276   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
277   return FuncInfo->hasNonRISpills();
278 }
279 
280 /// MustSaveLR - Return true if this function requires that we save the LR
281 /// register onto the stack in the prolog and restore it in the epilog of the
282 /// function.
283 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
284   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
285 
286   // We need a save/restore of LR if there is any def of LR (which is
287   // defined by calls, including the PIC setup sequence), or if there is
288   // some use of the LR stack slot (e.g. for builtin_return_address).
289   // (LR comes in 32 and 64 bit versions.)
290   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
291   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
292 }
293 
294 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
295 /// call frame size. Update the MachineFunction object with the stack size.
296 unsigned
297 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
298                                                 bool UseEstimate) const {
299   unsigned NewMaxCallFrameSize = 0;
300   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
301                                             &NewMaxCallFrameSize);
302   MF.getFrameInfo().setStackSize(FrameSize);
303   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
304   return FrameSize;
305 }
306 
307 /// determineFrameLayout - Determine the size of the frame and maximum call
308 /// frame size.
309 unsigned
310 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
311                                        bool UseEstimate,
312                                        unsigned *NewMaxCallFrameSize) const {
313   const MachineFrameInfo &MFI = MF.getFrameInfo();
314   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
315 
316   // Get the number of bytes to allocate from the FrameInfo
317   unsigned FrameSize =
318     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
319 
320   // Get stack alignments. The frame must be aligned to the greatest of these:
321   Align TargetAlign = getStackAlign(); // alignment required per the ABI
322   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
323   Align Alignment = std::max(TargetAlign, MaxAlign);
324 
325   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
326 
327   unsigned LR = RegInfo->getRARegister();
328   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
329   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
330                        !MFI.adjustsStack() &&       // No calls.
331                        !MustSaveLR(MF, LR) &&       // No need to save LR.
332                        !FI->mustSaveTOC() &&        // No need to save TOC.
333                        !RegInfo->hasBasePointer(MF); // No special alignment.
334 
335   // Note: for PPC32 SVR4ABI, we can still generate stackless
336   // code if all local vars are reg-allocated.
337   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
338 
339   // Check whether we can skip adjusting the stack pointer (by using red zone)
340   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
341     // No need for frame
342     return 0;
343   }
344 
345   // Get the maximum call frame size of all the calls.
346   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
347 
348   // Maximum call frame needs to be at least big enough for linkage area.
349   unsigned minCallFrameSize = getLinkageSize();
350   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
351 
352   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
353   // that allocations will be aligned.
354   if (MFI.hasVarSizedObjects())
355     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
356 
357   // Update the new max call frame size if the caller passes in a valid pointer.
358   if (NewMaxCallFrameSize)
359     *NewMaxCallFrameSize = maxCallFrameSize;
360 
361   // Include call frame size in total.
362   FrameSize += maxCallFrameSize;
363 
364   // Make sure the frame is aligned.
365   FrameSize = alignTo(FrameSize, Alignment);
366 
367   return FrameSize;
368 }
369 
370 // hasFP - Return true if the specified function actually has a dedicated frame
371 // pointer register.
372 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
373   const MachineFrameInfo &MFI = MF.getFrameInfo();
374   // FIXME: This is pretty much broken by design: hasFP() might be called really
375   // early, before the stack layout was calculated and thus hasFP() might return
376   // true or false here depending on the time of call.
377   return (MFI.getStackSize()) && needsFP(MF);
378 }
379 
380 // needsFP - Return true if the specified function should have a dedicated frame
381 // pointer register.  This is true if the function has variable sized allocas or
382 // if frame pointer elimination is disabled.
383 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
384   const MachineFrameInfo &MFI = MF.getFrameInfo();
385 
386   // Naked functions have no stack frame pushed, so we don't have a frame
387   // pointer.
388   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
389     return false;
390 
391   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
392     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
393     (MF.getTarget().Options.GuaranteedTailCallOpt &&
394      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
395 }
396 
397 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
398   bool is31 = needsFP(MF);
399   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
400   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
401 
402   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
403   bool HasBP = RegInfo->hasBasePointer(MF);
404   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
405   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
406 
407   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
408        BI != BE; ++BI)
409     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
410       --MBBI;
411       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
412         MachineOperand &MO = MBBI->getOperand(I);
413         if (!MO.isReg())
414           continue;
415 
416         switch (MO.getReg()) {
417         case PPC::FP:
418           MO.setReg(FPReg);
419           break;
420         case PPC::FP8:
421           MO.setReg(FP8Reg);
422           break;
423         case PPC::BP:
424           MO.setReg(BPReg);
425           break;
426         case PPC::BP8:
427           MO.setReg(BP8Reg);
428           break;
429 
430         }
431       }
432     }
433 }
434 
435 /*  This function will do the following:
436     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
437       respectively (defaults recommended by the ABI) and return true
438     - If MBB is not an entry block, initialize the register scavenger and look
439       for available registers.
440     - If the defaults (R0/R12) are available, return true
441     - If TwoUniqueRegsRequired is set to true, it looks for two unique
442       registers. Otherwise, look for a single available register.
443       - If the required registers are found, set SR1 and SR2 and return true.
444       - If the required registers are not found, set SR2 or both SR1 and SR2 to
445         PPC::NoRegister and return false.
446 
447     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
448     is not set, this function will attempt to find two different registers, but
449     still return true if only one register is available (and set SR1 == SR2).
450 */
451 bool
452 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
453                                       bool UseAtEnd,
454                                       bool TwoUniqueRegsRequired,
455                                       Register *SR1,
456                                       Register *SR2) const {
457   RegScavenger RS;
458   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
459   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
460 
461   // Set the defaults for the two scratch registers.
462   if (SR1)
463     *SR1 = R0;
464 
465   if (SR2) {
466     assert (SR1 && "Asking for the second scratch register but not the first?");
467     *SR2 = R12;
468   }
469 
470   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
471   if ((UseAtEnd && MBB->isReturnBlock()) ||
472       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
473     return true;
474 
475   RS.enterBasicBlock(*MBB);
476 
477   if (UseAtEnd && !MBB->empty()) {
478     // The scratch register will be used at the end of the block, so must
479     // consider all registers used within the block
480 
481     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
482     // If no terminator, back iterator up to previous instruction.
483     if (MBBI == MBB->end())
484       MBBI = std::prev(MBBI);
485 
486     if (MBBI != MBB->begin())
487       RS.forward(MBBI);
488   }
489 
490   // If the two registers are available, we're all good.
491   // Note that we only return here if both R0 and R12 are available because
492   // although the function may not require two unique registers, it may benefit
493   // from having two so we should try to provide them.
494   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
495     return true;
496 
497   // Get the list of callee-saved registers for the target.
498   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
499   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
500 
501   // Get all the available registers in the block.
502   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
503                                      &PPC::GPRCRegClass);
504 
505   // We shouldn't use callee-saved registers as scratch registers as they may be
506   // available when looking for a candidate block for shrink wrapping but not
507   // available when the actual prologue/epilogue is being emitted because they
508   // were added as live-in to the prologue block by PrologueEpilogueInserter.
509   for (int i = 0; CSRegs[i]; ++i)
510     BV.reset(CSRegs[i]);
511 
512   // Set the first scratch register to the first available one.
513   if (SR1) {
514     int FirstScratchReg = BV.find_first();
515     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
516   }
517 
518   // If there is another one available, set the second scratch register to that.
519   // Otherwise, set it to either PPC::NoRegister if this function requires two
520   // or to whatever SR1 is set to if this function doesn't require two.
521   if (SR2) {
522     int SecondScratchReg = BV.find_next(*SR1);
523     if (SecondScratchReg != -1)
524       *SR2 = SecondScratchReg;
525     else
526       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
527   }
528 
529   // Now that we've done our best to provide both registers, double check
530   // whether we were unable to provide enough.
531   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
532     return false;
533 
534   return true;
535 }
536 
537 // We need a scratch register for spilling LR and for spilling CR. By default,
538 // we use two scratch registers to hide latency. However, if only one scratch
539 // register is available, we can adjust for that by not overlapping the spill
540 // code. However, if we need to realign the stack (i.e. have a base pointer)
541 // and the stack frame is large, we need two scratch registers.
542 bool
543 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
544   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
545   MachineFunction &MF = *(MBB->getParent());
546   bool HasBP = RegInfo->hasBasePointer(MF);
547   unsigned FrameSize = determineFrameLayout(MF);
548   int NegFrameSize = -FrameSize;
549   bool IsLargeFrame = !isInt<16>(NegFrameSize);
550   MachineFrameInfo &MFI = MF.getFrameInfo();
551   Align MaxAlign = MFI.getMaxAlign();
552   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
553 
554   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
555 }
556 
557 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
558   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
559 
560   return findScratchRegister(TmpMBB, false,
561                              twoUniqueScratchRegsRequired(TmpMBB));
562 }
563 
564 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
565   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
566 
567   return findScratchRegister(TmpMBB, true);
568 }
569 
570 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
571   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
572   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
573 
574   // Abort if there is no register info or function info.
575   if (!RegInfo || !FI)
576     return false;
577 
578   // Only move the stack update on ELFv2 ABI and PPC64.
579   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
580     return false;
581 
582   // Check the frame size first and return false if it does not fit the
583   // requirements.
584   // We need a non-zero frame size as well as a frame that will fit in the red
585   // zone. This is because by moving the stack pointer update we are now storing
586   // to the red zone until the stack pointer is updated. If we get an interrupt
587   // inside the prologue but before the stack update we now have a number of
588   // stores to the red zone and those stores must all fit.
589   MachineFrameInfo &MFI = MF.getFrameInfo();
590   unsigned FrameSize = MFI.getStackSize();
591   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
592     return false;
593 
594   // Frame pointers and base pointers complicate matters so don't do anything
595   // if we have them. For example having a frame pointer will sometimes require
596   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
597   // difficult.
598   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
599     return false;
600 
601   // Calls to fast_cc functions use different rules for passing parameters on
602   // the stack from the ABI and using PIC base in the function imposes
603   // similar restrictions to using the base pointer. It is not generally safe
604   // to move the stack pointer update in these situations.
605   if (FI->hasFastCall() || FI->usesPICBase())
606     return false;
607 
608   // Finally we can move the stack update if we do not require register
609   // scavenging. Register scavenging can introduce more spills and so
610   // may make the frame size larger than we have computed.
611   return !RegInfo->requiresFrameIndexScavenging(MF);
612 }
613 
614 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
615                                     MachineBasicBlock &MBB) const {
616   MachineBasicBlock::iterator MBBI = MBB.begin();
617   MachineFrameInfo &MFI = MF.getFrameInfo();
618   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
619   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
620   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
621 
622   MachineModuleInfo &MMI = MF.getMMI();
623   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
624   DebugLoc dl;
625   // AIX assembler does not support cfi directives.
626   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
627 
628   // Get processor type.
629   bool isPPC64 = Subtarget.isPPC64();
630   // Get the ABI.
631   bool isSVR4ABI = Subtarget.isSVR4ABI();
632   bool isAIXABI = Subtarget.isAIXABI();
633   bool isELFv2ABI = Subtarget.isELFv2ABI();
634   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
635 
636   // Work out frame sizes.
637   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
638   int NegFrameSize = -FrameSize;
639   if (!isInt<32>(NegFrameSize))
640     llvm_unreachable("Unhandled stack size!");
641 
642   if (MFI.isFrameAddressTaken())
643     replaceFPWithRealFP(MF);
644 
645   // Check if the link register (LR) must be saved.
646   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
647   bool MustSaveLR = FI->mustSaveLR();
648   bool MustSaveTOC = FI->mustSaveTOC();
649   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
650   bool MustSaveCR = !MustSaveCRs.empty();
651   // Do we have a frame pointer and/or base pointer for this function?
652   bool HasFP = hasFP(MF);
653   bool HasBP = RegInfo->hasBasePointer(MF);
654   bool HasRedZone = isPPC64 || !isSVR4ABI;
655 
656   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
657   Register BPReg = RegInfo->getBaseRegister(MF);
658   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
659   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
660   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
661   Register ScratchReg;
662   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
663   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
664   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
665                                                 : PPC::MFLR );
666   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
667                                                  : PPC::STW );
668   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
669                                                      : PPC::STWU );
670   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
671                                                         : PPC::STWUX);
672   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
673                                                           : PPC::LIS );
674   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
675                                                  : PPC::ORI );
676   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
677                                               : PPC::OR );
678   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
679                                                             : PPC::SUBFC);
680   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
681                                                                : PPC::SUBFIC);
682   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
683                                                            : PPC::MFCR);
684   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
685 
686   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
687   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
688   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
689   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
690   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
691          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
692 
693   // Using the same bool variable as below to suppress compiler warnings.
694   // Stack probe requires two scratch registers, one for old sp, one for large
695   // frame and large probe size.
696   bool SingleScratchReg = findScratchRegister(
697       &MBB, false,
698       twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
699       &ScratchReg, &TempReg);
700   assert(SingleScratchReg &&
701          "Required number of registers not available in this block");
702 
703   SingleScratchReg = ScratchReg == TempReg;
704 
705   int LROffset = getReturnSaveOffset();
706 
707   int FPOffset = 0;
708   if (HasFP) {
709     MachineFrameInfo &MFI = MF.getFrameInfo();
710     int FPIndex = FI->getFramePointerSaveIndex();
711     assert(FPIndex && "No Frame Pointer Save Slot!");
712     FPOffset = MFI.getObjectOffset(FPIndex);
713   }
714 
715   int BPOffset = 0;
716   if (HasBP) {
717     MachineFrameInfo &MFI = MF.getFrameInfo();
718     int BPIndex = FI->getBasePointerSaveIndex();
719     assert(BPIndex && "No Base Pointer Save Slot!");
720     BPOffset = MFI.getObjectOffset(BPIndex);
721   }
722 
723   int PBPOffset = 0;
724   if (FI->usesPICBase()) {
725     MachineFrameInfo &MFI = MF.getFrameInfo();
726     int PBPIndex = FI->getPICBasePointerSaveIndex();
727     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
728     PBPOffset = MFI.getObjectOffset(PBPIndex);
729   }
730 
731   // Get stack alignments.
732   Align MaxAlign = MFI.getMaxAlign();
733   if (HasBP && MaxAlign > 1)
734     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
735 
736   // Frames of 32KB & larger require special handling because they cannot be
737   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
738   bool isLargeFrame = !isInt<16>(NegFrameSize);
739 
740   // Check if we can move the stack update instruction (stdu) down the prologue
741   // past the callee saves. Hopefully this will avoid the situation where the
742   // saves are waiting for the update on the store with update to complete.
743   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
744   bool MovingStackUpdateDown = false;
745 
746   // Check if we can move the stack update.
747   if (stackUpdateCanBeMoved(MF)) {
748     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
749     for (CalleeSavedInfo CSI : Info) {
750       int FrIdx = CSI.getFrameIdx();
751       // If the frame index is not negative the callee saved info belongs to a
752       // stack object that is not a fixed stack object. We ignore non-fixed
753       // stack objects because we won't move the stack update pointer past them.
754       if (FrIdx >= 0)
755         continue;
756 
757       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
758         StackUpdateLoc++;
759         MovingStackUpdateDown = true;
760       } else {
761         // We need all of the Frame Indices to meet these conditions.
762         // If they do not, abort the whole operation.
763         StackUpdateLoc = MBBI;
764         MovingStackUpdateDown = false;
765         break;
766       }
767     }
768 
769     // If the operation was not aborted then update the object offset.
770     if (MovingStackUpdateDown) {
771       for (CalleeSavedInfo CSI : Info) {
772         int FrIdx = CSI.getFrameIdx();
773         if (FrIdx < 0)
774           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
775       }
776     }
777   }
778 
779   // Where in the prologue we move the CR fields depends on how many scratch
780   // registers we have, and if we need to save the link register or not. This
781   // lambda is to avoid duplicating the logic in 2 places.
782   auto BuildMoveFromCR = [&]() {
783     if (isELFv2ABI && MustSaveCRs.size() == 1) {
784     // In the ELFv2 ABI, we are not required to save all CR fields.
785     // If only one CR field is clobbered, it is more efficient to use
786     // mfocrf to selectively save just that field, because mfocrf has short
787     // latency compares to mfcr.
788       assert(isPPC64 && "V2 ABI is 64-bit only.");
789       MachineInstrBuilder MIB =
790           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
791       MIB.addReg(MustSaveCRs[0], RegState::Kill);
792     } else {
793       MachineInstrBuilder MIB =
794           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
795       for (unsigned CRfield : MustSaveCRs)
796         MIB.addReg(CRfield, RegState::ImplicitKill);
797     }
798   };
799 
800   // If we need to spill the CR and the LR but we don't have two separate
801   // registers available, we must spill them one at a time
802   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
803     BuildMoveFromCR();
804     BuildMI(MBB, MBBI, dl, StoreWordInst)
805         .addReg(TempReg, getKillRegState(true))
806         .addImm(CRSaveOffset)
807         .addReg(SPReg);
808   }
809 
810   if (MustSaveLR)
811     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
812 
813   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
814     BuildMoveFromCR();
815 
816   if (HasRedZone) {
817     if (HasFP)
818       BuildMI(MBB, MBBI, dl, StoreInst)
819         .addReg(FPReg)
820         .addImm(FPOffset)
821         .addReg(SPReg);
822     if (FI->usesPICBase())
823       BuildMI(MBB, MBBI, dl, StoreInst)
824         .addReg(PPC::R30)
825         .addImm(PBPOffset)
826         .addReg(SPReg);
827     if (HasBP)
828       BuildMI(MBB, MBBI, dl, StoreInst)
829         .addReg(BPReg)
830         .addImm(BPOffset)
831         .addReg(SPReg);
832   }
833 
834   if (MustSaveLR)
835     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
836       .addReg(ScratchReg, getKillRegState(true))
837       .addImm(LROffset)
838       .addReg(SPReg);
839 
840   if (MustSaveCR &&
841       !(SingleScratchReg && MustSaveLR)) {
842     assert(HasRedZone && "A red zone is always available on PPC64");
843     BuildMI(MBB, MBBI, dl, StoreWordInst)
844       .addReg(TempReg, getKillRegState(true))
845       .addImm(CRSaveOffset)
846       .addReg(SPReg);
847   }
848 
849   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
850   if (!FrameSize)
851     return;
852 
853   // Adjust stack pointer: r1 += NegFrameSize.
854   // If there is a preferred stack alignment, align R1 now
855 
856   if (HasBP && HasRedZone) {
857     // Save a copy of r1 as the base pointer.
858     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
859       .addReg(SPReg)
860       .addReg(SPReg);
861   }
862 
863   // Have we generated a STUX instruction to claim stack frame? If so,
864   // the negated frame size will be placed in ScratchReg.
865   bool HasSTUX = false;
866 
867   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
868   // pointer is always stored at SP, we will get a free probe due to an essential
869   // STU(X) instruction.
870   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
871     // To be consistent with other targets, a pseudo instruction is emitted and
872     // will be later expanded in `inlineStackProbe`.
873     BuildMI(MBB, MBBI, dl,
874             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
875                             : PPC::PROBED_STACKALLOC_32))
876         .addDef(ScratchReg)
877         .addDef(TempReg) // TempReg stores the old sp.
878         .addImm(NegFrameSize);
879     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
880     // update the ScratchReg to meet the assumption that ScratchReg contains
881     // the NegFrameSize. This solution is rather tricky.
882     if (!HasRedZone) {
883       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
884           .addReg(TempReg)
885           .addReg(SPReg);
886       HasSTUX = true;
887     }
888   } else {
889     // This condition must be kept in sync with canUseAsPrologue.
890     if (HasBP && MaxAlign > 1) {
891       if (isPPC64)
892         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
893             .addReg(SPReg)
894             .addImm(0)
895             .addImm(64 - Log2(MaxAlign));
896       else // PPC32...
897         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
898             .addReg(SPReg)
899             .addImm(0)
900             .addImm(32 - Log2(MaxAlign))
901             .addImm(31);
902       if (!isLargeFrame) {
903         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
904             .addReg(ScratchReg, RegState::Kill)
905             .addImm(NegFrameSize);
906       } else {
907         assert(!SingleScratchReg && "Only a single scratch reg available");
908         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
909             .addImm(NegFrameSize >> 16);
910         BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
911             .addReg(TempReg, RegState::Kill)
912             .addImm(NegFrameSize & 0xFFFF);
913         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
914             .addReg(ScratchReg, RegState::Kill)
915             .addReg(TempReg, RegState::Kill);
916       }
917 
918       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
919           .addReg(SPReg, RegState::Kill)
920           .addReg(SPReg)
921           .addReg(ScratchReg);
922       HasSTUX = true;
923 
924     } else if (!isLargeFrame) {
925       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
926           .addReg(SPReg)
927           .addImm(NegFrameSize)
928           .addReg(SPReg);
929 
930     } else {
931       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
932           .addImm(NegFrameSize >> 16);
933       BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
934           .addReg(ScratchReg, RegState::Kill)
935           .addImm(NegFrameSize & 0xFFFF);
936       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
937           .addReg(SPReg, RegState::Kill)
938           .addReg(SPReg)
939           .addReg(ScratchReg);
940       HasSTUX = true;
941     }
942   }
943 
944   // Save the TOC register after the stack pointer update if a prologue TOC
945   // save is required for the function.
946   if (MustSaveTOC) {
947     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
948     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
949       .addReg(TOCReg, getKillRegState(true))
950       .addImm(TOCSaveOffset)
951       .addReg(SPReg);
952   }
953 
954   if (!HasRedZone) {
955     assert(!isPPC64 && "A red zone is always available on PPC64");
956     if (HasSTUX) {
957       // The negated frame size is in ScratchReg, and the SPReg has been
958       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
959       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
960       // the stack frame (i.e. the old SP), ideally, we would put the old
961       // SP into a register and use it as the base for the stores. The
962       // problem is that the only available register may be ScratchReg,
963       // which could be R0, and R0 cannot be used as a base address.
964 
965       // First, set ScratchReg to the old SP. This may need to be modified
966       // later.
967       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
968         .addReg(ScratchReg, RegState::Kill)
969         .addReg(SPReg);
970 
971       if (ScratchReg == PPC::R0) {
972         // R0 cannot be used as a base register, but it can be used as an
973         // index in a store-indexed.
974         int LastOffset = 0;
975         if (HasFP)  {
976           // R0 += (FPOffset-LastOffset).
977           // Need addic, since addi treats R0 as 0.
978           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
979             .addReg(ScratchReg)
980             .addImm(FPOffset-LastOffset);
981           LastOffset = FPOffset;
982           // Store FP into *R0.
983           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
984             .addReg(FPReg, RegState::Kill)  // Save FP.
985             .addReg(PPC::ZERO)
986             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
987         }
988         if (FI->usesPICBase()) {
989           // R0 += (PBPOffset-LastOffset).
990           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
991             .addReg(ScratchReg)
992             .addImm(PBPOffset-LastOffset);
993           LastOffset = PBPOffset;
994           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
995             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
996             .addReg(PPC::ZERO)
997             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
998         }
999         if (HasBP) {
1000           // R0 += (BPOffset-LastOffset).
1001           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1002             .addReg(ScratchReg)
1003             .addImm(BPOffset-LastOffset);
1004           LastOffset = BPOffset;
1005           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1006             .addReg(BPReg, RegState::Kill)  // Save BP.
1007             .addReg(PPC::ZERO)
1008             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1009           // BP = R0-LastOffset
1010           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1011             .addReg(ScratchReg, RegState::Kill)
1012             .addImm(-LastOffset);
1013         }
1014       } else {
1015         // ScratchReg is not R0, so use it as the base register. It is
1016         // already set to the old SP, so we can use the offsets directly.
1017 
1018         // Now that the stack frame has been allocated, save all the necessary
1019         // registers using ScratchReg as the base address.
1020         if (HasFP)
1021           BuildMI(MBB, MBBI, dl, StoreInst)
1022             .addReg(FPReg)
1023             .addImm(FPOffset)
1024             .addReg(ScratchReg);
1025         if (FI->usesPICBase())
1026           BuildMI(MBB, MBBI, dl, StoreInst)
1027             .addReg(PPC::R30)
1028             .addImm(PBPOffset)
1029             .addReg(ScratchReg);
1030         if (HasBP) {
1031           BuildMI(MBB, MBBI, dl, StoreInst)
1032             .addReg(BPReg)
1033             .addImm(BPOffset)
1034             .addReg(ScratchReg);
1035           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1036             .addReg(ScratchReg, RegState::Kill)
1037             .addReg(ScratchReg);
1038         }
1039       }
1040     } else {
1041       // The frame size is a known 16-bit constant (fitting in the immediate
1042       // field of STWU). To be here we have to be compiling for PPC32.
1043       // Since the SPReg has been decreased by FrameSize, add it back to each
1044       // offset.
1045       if (HasFP)
1046         BuildMI(MBB, MBBI, dl, StoreInst)
1047           .addReg(FPReg)
1048           .addImm(FrameSize + FPOffset)
1049           .addReg(SPReg);
1050       if (FI->usesPICBase())
1051         BuildMI(MBB, MBBI, dl, StoreInst)
1052           .addReg(PPC::R30)
1053           .addImm(FrameSize + PBPOffset)
1054           .addReg(SPReg);
1055       if (HasBP) {
1056         BuildMI(MBB, MBBI, dl, StoreInst)
1057           .addReg(BPReg)
1058           .addImm(FrameSize + BPOffset)
1059           .addReg(SPReg);
1060         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1061           .addReg(SPReg)
1062           .addImm(FrameSize);
1063       }
1064     }
1065   }
1066 
1067   // Add Call Frame Information for the instructions we generated above.
1068   if (needsCFI) {
1069     unsigned CFIIndex;
1070 
1071     if (HasBP) {
1072       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1073       // because if the stack needed aligning then CFA won't be at a fixed
1074       // offset from FP/SP.
1075       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1076       CFIIndex = MF.addFrameInst(
1077           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1078     } else {
1079       // Adjust the definition of CFA to account for the change in SP.
1080       assert(NegFrameSize);
1081       CFIIndex = MF.addFrameInst(
1082           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1083     }
1084     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1085         .addCFIIndex(CFIIndex);
1086 
1087     if (HasFP) {
1088       // Describe where FP was saved, at a fixed offset from CFA.
1089       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1090       CFIIndex = MF.addFrameInst(
1091           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1092       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1093           .addCFIIndex(CFIIndex);
1094     }
1095 
1096     if (FI->usesPICBase()) {
1097       // Describe where FP was saved, at a fixed offset from CFA.
1098       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1099       CFIIndex = MF.addFrameInst(
1100           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1101       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1102           .addCFIIndex(CFIIndex);
1103     }
1104 
1105     if (HasBP) {
1106       // Describe where BP was saved, at a fixed offset from CFA.
1107       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1108       CFIIndex = MF.addFrameInst(
1109           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1110       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1111           .addCFIIndex(CFIIndex);
1112     }
1113 
1114     if (MustSaveLR) {
1115       // Describe where LR was saved, at a fixed offset from CFA.
1116       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1117       CFIIndex = MF.addFrameInst(
1118           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1119       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1120           .addCFIIndex(CFIIndex);
1121     }
1122   }
1123 
1124   // If there is a frame pointer, copy R1 into R31
1125   if (HasFP) {
1126     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1127       .addReg(SPReg)
1128       .addReg(SPReg);
1129 
1130     if (!HasBP && needsCFI) {
1131       // Change the definition of CFA from SP+offset to FP+offset, because SP
1132       // will change at every alloca.
1133       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1134       unsigned CFIIndex = MF.addFrameInst(
1135           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1136 
1137       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138           .addCFIIndex(CFIIndex);
1139     }
1140   }
1141 
1142   if (needsCFI) {
1143     // Describe where callee saved registers were saved, at fixed offsets from
1144     // CFA.
1145     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1146     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1147       unsigned Reg = CSI[I].getReg();
1148       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1149 
1150       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1151       // subregisters of CR2. We just need to emit a move of CR2.
1152       if (PPC::CRBITRCRegClass.contains(Reg))
1153         continue;
1154 
1155       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1156         continue;
1157 
1158       // For SVR4, don't emit a move for the CR spill slot if we haven't
1159       // spilled CRs.
1160       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1161           && !MustSaveCR)
1162         continue;
1163 
1164       // For 64-bit SVR4 when we have spilled CRs, the spill location
1165       // is SP+8, not a frame-relative slot.
1166       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1167         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1168         // the whole CR word.  In the ELFv2 ABI, every CR that was
1169         // actually saved gets its own CFI record.
1170         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1171         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1172             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1173         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1174             .addCFIIndex(CFIIndex);
1175         continue;
1176       }
1177 
1178       if (CSI[I].isSpilledToReg()) {
1179         unsigned SpilledReg = CSI[I].getDstReg();
1180         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1181             nullptr, MRI->getDwarfRegNum(Reg, true),
1182             MRI->getDwarfRegNum(SpilledReg, true)));
1183         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1184           .addCFIIndex(CFIRegister);
1185       } else {
1186         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1187         // We have changed the object offset above but we do not want to change
1188         // the actual offsets in the CFI instruction so we have to undo the
1189         // offset change here.
1190         if (MovingStackUpdateDown)
1191           Offset -= NegFrameSize;
1192 
1193         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1194             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1195         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1196             .addCFIIndex(CFIIndex);
1197       }
1198     }
1199   }
1200 }
1201 
1202 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1203                                         MachineBasicBlock &PrologMBB) const {
1204   // TODO: Generate CFI instructions.
1205   bool isPPC64 = Subtarget.isPPC64();
1206   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1207   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1208   MachineFrameInfo &MFI = MF.getFrameInfo();
1209   MachineModuleInfo &MMI = MF.getMMI();
1210   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1211   // AIX assembler does not support cfi directives.
1212   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1213   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1214     int Opc = MI.getOpcode();
1215     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1216   });
1217   if (StackAllocMIPos == PrologMBB.end())
1218     return;
1219   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1220   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1221   MachineInstr &MI = *StackAllocMIPos;
1222   int64_t NegFrameSize = MI.getOperand(2).getImm();
1223   int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
1224   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1225   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1226   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1227   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1228   Register ScratchReg = MI.getOperand(0).getReg();
1229   Register FPReg = MI.getOperand(1).getReg();
1230   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1231   bool HasBP = RegInfo->hasBasePointer(MF);
1232   Align MaxAlign = MFI.getMaxAlign();
1233   // Initialize current frame pointer.
1234   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1235   BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1236   // Subroutines to generate .cfi_* directives.
1237   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1238                             MachineBasicBlock::iterator MBBI, Register Reg) {
1239     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1240     unsigned CFIIndex = MF.addFrameInst(
1241         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1242     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1243         .addCFIIndex(CFIIndex);
1244   };
1245   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1246                          MachineBasicBlock::iterator MBBI, Register Reg,
1247                          int Offset) {
1248     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1249     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1250         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1251     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1252         .addCFIIndex(CFIIndex);
1253   };
1254   // Subroutine to determine if we can use the Imm as part of d-form.
1255   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1256   // Subroutine to materialize the Imm into TempReg.
1257   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1258                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1259                             Register &TempReg) {
1260     assert(isInt<32>(Imm) && "Unhandled imm");
1261     if (isInt<16>(Imm))
1262       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1263           .addImm(Imm);
1264     else {
1265       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1266           .addImm(Imm >> 16);
1267       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1268           .addReg(TempReg)
1269           .addImm(Imm & 0xFFFF);
1270     }
1271   };
1272   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1273   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1274                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1275                               Register NegSizeReg, bool UseDForm) {
1276     if (UseDForm)
1277       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1278           .addReg(FPReg)
1279           .addImm(NegSize)
1280           .addReg(SPReg);
1281     else
1282       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1283           .addReg(FPReg)
1284           .addReg(SPReg)
1285           .addReg(NegSizeReg);
1286   };
1287   // Use FPReg to calculate CFA.
1288   if (needsCFI)
1289     buildDefCFA(PrologMBB, {MI}, FPReg, 0);
1290   // For case HasBP && MaxAlign > 1, we have to align the SP by performing
1291   // SP = SP - SP % MaxAlign.
1292   if (HasBP && MaxAlign > 1) {
1293     if (isPPC64)
1294       BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1295           .addReg(FPReg)
1296           .addImm(0)
1297           .addImm(64 - Log2(MaxAlign));
1298     else
1299       BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1300           .addReg(FPReg)
1301           .addImm(0)
1302           .addImm(32 - Log2(MaxAlign))
1303           .addImm(31);
1304     BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
1305             SPReg)
1306         .addReg(ScratchReg)
1307         .addReg(SPReg);
1308   }
1309   // Probe residual part.
1310   if (NegResidualSize) {
1311     bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1312     if (!ResidualUseDForm)
1313       MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
1314     allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
1315                      ResidualUseDForm);
1316   }
1317   bool UseDForm = CanUseDForm(NegProbeSize);
1318   // If number of blocks is small, just probe them directly.
1319   if (NumBlocks < 3) {
1320     if (!UseDForm)
1321       MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1322     for (int i = 0; i < NumBlocks; ++i)
1323       allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
1324     if (needsCFI) {
1325       // Restore using SPReg to calculate CFA.
1326       buildDefCFAReg(PrologMBB, {MI}, SPReg);
1327     }
1328   } else {
1329     // Since CTR is a volatile register and current shrinkwrap implementation
1330     // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1331     // CTR loop to probe.
1332     // Calculate trip count and stores it in CTRReg.
1333     MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
1334     BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1335         .addReg(ScratchReg, RegState::Kill);
1336     if (!UseDForm)
1337       MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1338     // Create MBBs of the loop.
1339     MachineFunction::iterator MBBInsertPoint =
1340         std::next(PrologMBB.getIterator());
1341     MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1342     MF.insert(MBBInsertPoint, LoopMBB);
1343     MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1344     MF.insert(MBBInsertPoint, ExitMBB);
1345     // Synthesize the loop body.
1346     allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1347                      UseDForm);
1348     BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1349         .addMBB(LoopMBB);
1350     LoopMBB->addSuccessor(ExitMBB);
1351     LoopMBB->addSuccessor(LoopMBB);
1352     // Synthesize the exit MBB.
1353     ExitMBB->splice(ExitMBB->end(), &PrologMBB,
1354                     std::next(MachineBasicBlock::iterator(MI)),
1355                     PrologMBB.end());
1356     ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
1357     PrologMBB.addSuccessor(LoopMBB);
1358     if (needsCFI) {
1359       // Restore using SPReg to calculate CFA.
1360       buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1361     }
1362     // Update liveins.
1363     recomputeLiveIns(*LoopMBB);
1364     recomputeLiveIns(*ExitMBB);
1365   }
1366   ++NumPrologProbed;
1367   MI.eraseFromParent();
1368 }
1369 
1370 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1371                                     MachineBasicBlock &MBB) const {
1372   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1373   DebugLoc dl;
1374 
1375   if (MBBI != MBB.end())
1376     dl = MBBI->getDebugLoc();
1377 
1378   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1379   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1380 
1381   // Get alignment info so we know how to restore the SP.
1382   const MachineFrameInfo &MFI = MF.getFrameInfo();
1383 
1384   // Get the number of bytes allocated from the FrameInfo.
1385   int FrameSize = MFI.getStackSize();
1386 
1387   // Get processor type.
1388   bool isPPC64 = Subtarget.isPPC64();
1389 
1390   // Check if the link register (LR) has been saved.
1391   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1392   bool MustSaveLR = FI->mustSaveLR();
1393   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1394   bool MustSaveCR = !MustSaveCRs.empty();
1395   // Do we have a frame pointer and/or base pointer for this function?
1396   bool HasFP = hasFP(MF);
1397   bool HasBP = RegInfo->hasBasePointer(MF);
1398   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1399 
1400   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1401   Register BPReg = RegInfo->getBaseRegister(MF);
1402   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1403   Register ScratchReg;
1404   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1405   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1406                                                  : PPC::MTLR );
1407   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1408                                                  : PPC::LWZ );
1409   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1410                                                            : PPC::LIS );
1411   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1412                                               : PPC::OR );
1413   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1414                                                   : PPC::ORI );
1415   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1416                                                    : PPC::ADDI );
1417   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1418                                                 : PPC::ADD4 );
1419   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1420                                                      : PPC::LWZ);
1421   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1422                                                      : PPC::MTOCRF);
1423   int LROffset = getReturnSaveOffset();
1424 
1425   int FPOffset = 0;
1426 
1427   // Using the same bool variable as below to suppress compiler warnings.
1428   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1429                                               &TempReg);
1430   assert(SingleScratchReg &&
1431          "Could not find an available scratch register");
1432 
1433   SingleScratchReg = ScratchReg == TempReg;
1434 
1435   if (HasFP) {
1436     int FPIndex = FI->getFramePointerSaveIndex();
1437     assert(FPIndex && "No Frame Pointer Save Slot!");
1438     FPOffset = MFI.getObjectOffset(FPIndex);
1439   }
1440 
1441   int BPOffset = 0;
1442   if (HasBP) {
1443       int BPIndex = FI->getBasePointerSaveIndex();
1444       assert(BPIndex && "No Base Pointer Save Slot!");
1445       BPOffset = MFI.getObjectOffset(BPIndex);
1446   }
1447 
1448   int PBPOffset = 0;
1449   if (FI->usesPICBase()) {
1450     int PBPIndex = FI->getPICBasePointerSaveIndex();
1451     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1452     PBPOffset = MFI.getObjectOffset(PBPIndex);
1453   }
1454 
1455   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1456 
1457   if (IsReturnBlock) {
1458     unsigned RetOpcode = MBBI->getOpcode();
1459     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1460                       RetOpcode == PPC::TCRETURNdi ||
1461                       RetOpcode == PPC::TCRETURNai ||
1462                       RetOpcode == PPC::TCRETURNri8 ||
1463                       RetOpcode == PPC::TCRETURNdi8 ||
1464                       RetOpcode == PPC::TCRETURNai8;
1465 
1466     if (UsesTCRet) {
1467       int MaxTCRetDelta = FI->getTailCallSPDelta();
1468       MachineOperand &StackAdjust = MBBI->getOperand(1);
1469       assert(StackAdjust.isImm() && "Expecting immediate value.");
1470       // Adjust stack pointer.
1471       int StackAdj = StackAdjust.getImm();
1472       int Delta = StackAdj - MaxTCRetDelta;
1473       assert((Delta >= 0) && "Delta must be positive");
1474       if (MaxTCRetDelta>0)
1475         FrameSize += (StackAdj +Delta);
1476       else
1477         FrameSize += StackAdj;
1478     }
1479   }
1480 
1481   // Frames of 32KB & larger require special handling because they cannot be
1482   // indexed into with a simple LD/LWZ immediate offset operand.
1483   bool isLargeFrame = !isInt<16>(FrameSize);
1484 
1485   // On targets without red zone, the SP needs to be restored last, so that
1486   // all live contents of the stack frame are upwards of the SP. This means
1487   // that we cannot restore SP just now, since there may be more registers
1488   // to restore from the stack frame (e.g. R31). If the frame size is not
1489   // a simple immediate value, we will need a spare register to hold the
1490   // restored SP. If the frame size is known and small, we can simply adjust
1491   // the offsets of the registers to be restored, and still use SP to restore
1492   // them. In such case, the final update of SP will be to add the frame
1493   // size to it.
1494   // To simplify the code, set RBReg to the base register used to restore
1495   // values from the stack, and set SPAdd to the value that needs to be added
1496   // to the SP at the end. The default values are as if red zone was present.
1497   unsigned RBReg = SPReg;
1498   unsigned SPAdd = 0;
1499 
1500   // Check if we can move the stack update instruction up the epilogue
1501   // past the callee saves. This will allow the move to LR instruction
1502   // to be executed before the restores of the callee saves which means
1503   // that the callee saves can hide the latency from the MTLR instrcution.
1504   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1505   if (stackUpdateCanBeMoved(MF)) {
1506     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1507     for (CalleeSavedInfo CSI : Info) {
1508       int FrIdx = CSI.getFrameIdx();
1509       // If the frame index is not negative the callee saved info belongs to a
1510       // stack object that is not a fixed stack object. We ignore non-fixed
1511       // stack objects because we won't move the update of the stack pointer
1512       // past them.
1513       if (FrIdx >= 0)
1514         continue;
1515 
1516       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1517         StackUpdateLoc--;
1518       else {
1519         // Abort the operation as we can't update all CSR restores.
1520         StackUpdateLoc = MBBI;
1521         break;
1522       }
1523     }
1524   }
1525 
1526   if (FrameSize) {
1527     // In the prologue, the loaded (or persistent) stack pointer value is
1528     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1529     // zone add this offset back now.
1530 
1531     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1532     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1533     // call which invalidates the stack pointer value in SP(0). So we use the
1534     // value of R31 in this case.
1535     if (FI->hasFastCall()) {
1536       assert(HasFP && "Expecting a valid frame pointer.");
1537       if (!HasRedZone)
1538         RBReg = FPReg;
1539       if (!isLargeFrame) {
1540         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1541           .addReg(FPReg).addImm(FrameSize);
1542       } else {
1543         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1544           .addImm(FrameSize >> 16);
1545         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1546           .addReg(ScratchReg, RegState::Kill)
1547           .addImm(FrameSize & 0xFFFF);
1548         BuildMI(MBB, MBBI, dl, AddInst)
1549           .addReg(RBReg)
1550           .addReg(FPReg)
1551           .addReg(ScratchReg);
1552       }
1553     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1554       if (HasRedZone) {
1555         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1556           .addReg(SPReg)
1557           .addImm(FrameSize);
1558       } else {
1559         // Make sure that adding FrameSize will not overflow the max offset
1560         // size.
1561         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1562                "Local offsets should be negative");
1563         SPAdd = FrameSize;
1564         FPOffset += FrameSize;
1565         BPOffset += FrameSize;
1566         PBPOffset += FrameSize;
1567       }
1568     } else {
1569       // We don't want to use ScratchReg as a base register, because it
1570       // could happen to be R0. Use FP instead, but make sure to preserve it.
1571       if (!HasRedZone) {
1572         // If FP is not saved, copy it to ScratchReg.
1573         if (!HasFP)
1574           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1575             .addReg(FPReg)
1576             .addReg(FPReg);
1577         RBReg = FPReg;
1578       }
1579       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1580         .addImm(0)
1581         .addReg(SPReg);
1582     }
1583   }
1584   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1585   // If there is no red zone, ScratchReg may be needed for holding a useful
1586   // value (although not the base register). Make sure it is not overwritten
1587   // too early.
1588 
1589   // If we need to restore both the LR and the CR and we only have one
1590   // available scratch register, we must do them one at a time.
1591   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1592     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1593     // is live here.
1594     assert(HasRedZone && "Expecting red zone");
1595     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1596       .addImm(CRSaveOffset)
1597       .addReg(SPReg);
1598     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1599       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1600         .addReg(TempReg, getKillRegState(i == e-1));
1601   }
1602 
1603   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1604   // LR is stored in the caller's stack frame. ScratchReg will be needed
1605   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1606   // a base register anyway, because it may happen to be R0.
1607   bool LoadedLR = false;
1608   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1609     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1610       .addImm(LROffset+SPAdd)
1611       .addReg(RBReg);
1612     LoadedLR = true;
1613   }
1614 
1615   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1616     assert(RBReg == SPReg && "Should be using SP as a base register");
1617     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1618       .addImm(CRSaveOffset)
1619       .addReg(RBReg);
1620   }
1621 
1622   if (HasFP) {
1623     // If there is red zone, restore FP directly, since SP has already been
1624     // restored. Otherwise, restore the value of FP into ScratchReg.
1625     if (HasRedZone || RBReg == SPReg)
1626       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1627         .addImm(FPOffset)
1628         .addReg(SPReg);
1629     else
1630       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1631         .addImm(FPOffset)
1632         .addReg(RBReg);
1633   }
1634 
1635   if (FI->usesPICBase())
1636     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1637       .addImm(PBPOffset)
1638       .addReg(RBReg);
1639 
1640   if (HasBP)
1641     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1642       .addImm(BPOffset)
1643       .addReg(RBReg);
1644 
1645   // There is nothing more to be loaded from the stack, so now we can
1646   // restore SP: SP = RBReg + SPAdd.
1647   if (RBReg != SPReg || SPAdd != 0) {
1648     assert(!HasRedZone && "This should not happen with red zone");
1649     // If SPAdd is 0, generate a copy.
1650     if (SPAdd == 0)
1651       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1652         .addReg(RBReg)
1653         .addReg(RBReg);
1654     else
1655       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1656         .addReg(RBReg)
1657         .addImm(SPAdd);
1658 
1659     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1660     if (RBReg == FPReg)
1661       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1662         .addReg(ScratchReg)
1663         .addReg(ScratchReg);
1664 
1665     // Now load the LR from the caller's stack frame.
1666     if (MustSaveLR && !LoadedLR)
1667       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1668         .addImm(LROffset)
1669         .addReg(SPReg);
1670   }
1671 
1672   if (MustSaveCR &&
1673       !(SingleScratchReg && MustSaveLR))
1674     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1675       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1676         .addReg(TempReg, getKillRegState(i == e-1));
1677 
1678   if (MustSaveLR)
1679     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1680 
1681   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1682   // call optimization
1683   if (IsReturnBlock) {
1684     unsigned RetOpcode = MBBI->getOpcode();
1685     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1686         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1687         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1688       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1689       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1690 
1691       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1692         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1693           .addReg(SPReg).addImm(CallerAllocatedAmt);
1694       } else {
1695         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1696           .addImm(CallerAllocatedAmt >> 16);
1697         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1698           .addReg(ScratchReg, RegState::Kill)
1699           .addImm(CallerAllocatedAmt & 0xFFFF);
1700         BuildMI(MBB, MBBI, dl, AddInst)
1701           .addReg(SPReg)
1702           .addReg(FPReg)
1703           .addReg(ScratchReg);
1704       }
1705     } else {
1706       createTailCallBranchInstr(MBB);
1707     }
1708   }
1709 }
1710 
1711 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1712   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1713 
1714   // If we got this far a first terminator should exist.
1715   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1716 
1717   DebugLoc dl = MBBI->getDebugLoc();
1718   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1719 
1720   // Create branch instruction for pseudo tail call return instruction.
1721   // The TCRETURNdi variants are direct calls. Valid targets for those are
1722   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1723   // since we can tail call external functions with PC-Rel (i.e. we don't need
1724   // to worry about different TOC pointers). Some of the external functions will
1725   // be MO_GlobalAddress while others like memcpy for example, are going to
1726   // be MO_ExternalSymbol.
1727   unsigned RetOpcode = MBBI->getOpcode();
1728   if (RetOpcode == PPC::TCRETURNdi) {
1729     MBBI = MBB.getLastNonDebugInstr();
1730     MachineOperand &JumpTarget = MBBI->getOperand(0);
1731     if (JumpTarget.isGlobal())
1732       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1733         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1734     else if (JumpTarget.isSymbol())
1735       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1736         addExternalSymbol(JumpTarget.getSymbolName());
1737     else
1738       llvm_unreachable("Expecting Global or External Symbol");
1739   } else if (RetOpcode == PPC::TCRETURNri) {
1740     MBBI = MBB.getLastNonDebugInstr();
1741     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1742     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1743   } else if (RetOpcode == PPC::TCRETURNai) {
1744     MBBI = MBB.getLastNonDebugInstr();
1745     MachineOperand &JumpTarget = MBBI->getOperand(0);
1746     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1747   } else if (RetOpcode == PPC::TCRETURNdi8) {
1748     MBBI = MBB.getLastNonDebugInstr();
1749     MachineOperand &JumpTarget = MBBI->getOperand(0);
1750     if (JumpTarget.isGlobal())
1751       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1752         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1753     else if (JumpTarget.isSymbol())
1754       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1755         addExternalSymbol(JumpTarget.getSymbolName());
1756     else
1757       llvm_unreachable("Expecting Global or External Symbol");
1758   } else if (RetOpcode == PPC::TCRETURNri8) {
1759     MBBI = MBB.getLastNonDebugInstr();
1760     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1761     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1762   } else if (RetOpcode == PPC::TCRETURNai8) {
1763     MBBI = MBB.getLastNonDebugInstr();
1764     MachineOperand &JumpTarget = MBBI->getOperand(0);
1765     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1766   }
1767 }
1768 
1769 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1770                                             BitVector &SavedRegs,
1771                                             RegScavenger *RS) const {
1772   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1773 
1774   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1775 
1776   //  Save and clear the LR state.
1777   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1778   unsigned LR = RegInfo->getRARegister();
1779   FI->setMustSaveLR(MustSaveLR(MF, LR));
1780   SavedRegs.reset(LR);
1781 
1782   //  Save R31 if necessary
1783   int FPSI = FI->getFramePointerSaveIndex();
1784   const bool isPPC64 = Subtarget.isPPC64();
1785   MachineFrameInfo &MFI = MF.getFrameInfo();
1786 
1787   // If the frame pointer save index hasn't been defined yet.
1788   if (!FPSI && needsFP(MF)) {
1789     // Find out what the fix offset of the frame pointer save area.
1790     int FPOffset = getFramePointerSaveOffset();
1791     // Allocate the frame index for frame pointer save area.
1792     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1793     // Save the result.
1794     FI->setFramePointerSaveIndex(FPSI);
1795   }
1796 
1797   int BPSI = FI->getBasePointerSaveIndex();
1798   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1799     int BPOffset = getBasePointerSaveOffset();
1800     // Allocate the frame index for the base pointer save area.
1801     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1802     // Save the result.
1803     FI->setBasePointerSaveIndex(BPSI);
1804   }
1805 
1806   // Reserve stack space for the PIC Base register (R30).
1807   // Only used in SVR4 32-bit.
1808   if (FI->usesPICBase()) {
1809     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1810     FI->setPICBasePointerSaveIndex(PBPSI);
1811   }
1812 
1813   // Make sure we don't explicitly spill r31, because, for example, we have
1814   // some inline asm which explicitly clobbers it, when we otherwise have a
1815   // frame pointer and are using r31's spill slot for the prologue/epilogue
1816   // code. Same goes for the base pointer and the PIC base register.
1817   if (needsFP(MF))
1818     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1819   if (RegInfo->hasBasePointer(MF))
1820     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1821   if (FI->usesPICBase())
1822     SavedRegs.reset(PPC::R30);
1823 
1824   // Reserve stack space to move the linkage area to in case of a tail call.
1825   int TCSPDelta = 0;
1826   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1827       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1828     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1829   }
1830 
1831   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1832   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1833   // object at the offset of the CR-save slot in the linkage area. The actual
1834   // save and restore of the condition register will be created as part of the
1835   // prologue and epilogue insertion, but the FixedStack object is needed to
1836   // keep the CalleSavedInfo valid.
1837   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1838        SavedRegs.test(PPC::CR4))) {
1839     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1840     const int64_t SpillOffset =
1841         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1842     int FrameIdx =
1843         MFI.CreateFixedObject(SpillSize, SpillOffset,
1844                               /* IsImmutable */ true, /* IsAliased */ false);
1845     FI->setCRSpillFrameIndex(FrameIdx);
1846   }
1847 }
1848 
1849 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1850                                                        RegScavenger *RS) const {
1851   // Get callee saved register information.
1852   MachineFrameInfo &MFI = MF.getFrameInfo();
1853   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1854 
1855   // If the function is shrink-wrapped, and if the function has a tail call, the
1856   // tail call might not be in the new RestoreBlock, so real branch instruction
1857   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1858   // RestoreBlock. So we handle this case here.
1859   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1860     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1861     for (MachineBasicBlock &MBB : MF) {
1862       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1863         createTailCallBranchInstr(MBB);
1864     }
1865   }
1866 
1867   // Early exit if no callee saved registers are modified!
1868   if (CSI.empty() && !needsFP(MF)) {
1869     addScavengingSpillSlot(MF, RS);
1870     return;
1871   }
1872 
1873   unsigned MinGPR = PPC::R31;
1874   unsigned MinG8R = PPC::X31;
1875   unsigned MinFPR = PPC::F31;
1876   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1877 
1878   bool HasGPSaveArea = false;
1879   bool HasG8SaveArea = false;
1880   bool HasFPSaveArea = false;
1881   bool HasVRSaveArea = false;
1882 
1883   SmallVector<CalleeSavedInfo, 18> GPRegs;
1884   SmallVector<CalleeSavedInfo, 18> G8Regs;
1885   SmallVector<CalleeSavedInfo, 18> FPRegs;
1886   SmallVector<CalleeSavedInfo, 18> VRegs;
1887 
1888   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1889     unsigned Reg = CSI[i].getReg();
1890     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1891             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1892            "Not expecting to try to spill R2 in a function that must save TOC");
1893     if (PPC::GPRCRegClass.contains(Reg)) {
1894       HasGPSaveArea = true;
1895 
1896       GPRegs.push_back(CSI[i]);
1897 
1898       if (Reg < MinGPR) {
1899         MinGPR = Reg;
1900       }
1901     } else if (PPC::G8RCRegClass.contains(Reg)) {
1902       HasG8SaveArea = true;
1903 
1904       G8Regs.push_back(CSI[i]);
1905 
1906       if (Reg < MinG8R) {
1907         MinG8R = Reg;
1908       }
1909     } else if (PPC::F8RCRegClass.contains(Reg)) {
1910       HasFPSaveArea = true;
1911 
1912       FPRegs.push_back(CSI[i]);
1913 
1914       if (Reg < MinFPR) {
1915         MinFPR = Reg;
1916       }
1917     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1918                PPC::CRRCRegClass.contains(Reg)) {
1919       ; // do nothing, as we already know whether CRs are spilled
1920     } else if (PPC::VRRCRegClass.contains(Reg) ||
1921                PPC::SPERCRegClass.contains(Reg)) {
1922       // Altivec and SPE are mutually exclusive, but have the same stack
1923       // alignment requirements, so overload the save area for both cases.
1924       HasVRSaveArea = true;
1925 
1926       VRegs.push_back(CSI[i]);
1927 
1928       if (Reg < MinVR) {
1929         MinVR = Reg;
1930       }
1931     } else {
1932       llvm_unreachable("Unknown RegisterClass!");
1933     }
1934   }
1935 
1936   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1937   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1938 
1939   int64_t LowerBound = 0;
1940 
1941   // Take into account stack space reserved for tail calls.
1942   int TCSPDelta = 0;
1943   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1944       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1945     LowerBound = TCSPDelta;
1946   }
1947 
1948   // The Floating-point register save area is right below the back chain word
1949   // of the previous stack frame.
1950   if (HasFPSaveArea) {
1951     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1952       int FI = FPRegs[i].getFrameIdx();
1953 
1954       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1955     }
1956 
1957     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1958   }
1959 
1960   // Check whether the frame pointer register is allocated. If so, make sure it
1961   // is spilled to the correct offset.
1962   if (needsFP(MF)) {
1963     int FI = PFI->getFramePointerSaveIndex();
1964     assert(FI && "No Frame Pointer Save Slot!");
1965     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1966     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1967     HasGPSaveArea = true;
1968   }
1969 
1970   if (PFI->usesPICBase()) {
1971     int FI = PFI->getPICBasePointerSaveIndex();
1972     assert(FI && "No PIC Base Pointer Save Slot!");
1973     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1974 
1975     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1976     HasGPSaveArea = true;
1977   }
1978 
1979   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1980   if (RegInfo->hasBasePointer(MF)) {
1981     int FI = PFI->getBasePointerSaveIndex();
1982     assert(FI && "No Base Pointer Save Slot!");
1983     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1984 
1985     Register BP = RegInfo->getBaseRegister(MF);
1986     if (PPC::G8RCRegClass.contains(BP)) {
1987       MinG8R = std::min<unsigned>(MinG8R, BP);
1988       HasG8SaveArea = true;
1989     } else if (PPC::GPRCRegClass.contains(BP)) {
1990       MinGPR = std::min<unsigned>(MinGPR, BP);
1991       HasGPSaveArea = true;
1992     }
1993   }
1994 
1995   // General register save area starts right below the Floating-point
1996   // register save area.
1997   if (HasGPSaveArea || HasG8SaveArea) {
1998     // Move general register save area spill slots down, taking into account
1999     // the size of the Floating-point register save area.
2000     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2001       if (!GPRegs[i].isSpilledToReg()) {
2002         int FI = GPRegs[i].getFrameIdx();
2003         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2004       }
2005     }
2006 
2007     // Move general register save area spill slots down, taking into account
2008     // the size of the Floating-point register save area.
2009     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2010       if (!G8Regs[i].isSpilledToReg()) {
2011         int FI = G8Regs[i].getFrameIdx();
2012         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2013       }
2014     }
2015 
2016     unsigned MinReg =
2017       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2018                          TRI->getEncodingValue(MinG8R));
2019 
2020     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2021     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2022   }
2023 
2024   // For 32-bit only, the CR save area is below the general register
2025   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2026   // to the stack pointer and hence does not need an adjustment here.
2027   // Only CR2 (the first nonvolatile spilled) has an associated frame
2028   // index so that we have a single uniform save area.
2029   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2030     // Adjust the frame index of the CR spill slot.
2031     for (const auto &CSInfo : CSI) {
2032       if (CSInfo.getReg() == PPC::CR2) {
2033         int FI = CSInfo.getFrameIdx();
2034         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2035         break;
2036       }
2037     }
2038 
2039     LowerBound -= 4; // The CR save area is always 4 bytes long.
2040   }
2041 
2042   // Both Altivec and SPE have the same alignment and padding requirements
2043   // within the stack frame.
2044   if (HasVRSaveArea) {
2045     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2046     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2047     // we are using negative number here (the stack grows downward). We should
2048     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2049     // is the alignment size ( n = 16 here) and y is the size after aligning.
2050     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2051     LowerBound &= ~(15);
2052 
2053     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2054       int FI = VRegs[i].getFrameIdx();
2055 
2056       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2057     }
2058   }
2059 
2060   addScavengingSpillSlot(MF, RS);
2061 }
2062 
2063 void
2064 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2065                                          RegScavenger *RS) const {
2066   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2067   // a large stack, which will require scavenging a register to materialize a
2068   // large offset.
2069 
2070   // We need to have a scavenger spill slot for spills if the frame size is
2071   // large. In case there is no free register for large-offset addressing,
2072   // this slot is used for the necessary emergency spill. Also, we need the
2073   // slot for dynamic stack allocations.
2074 
2075   // The scavenger might be invoked if the frame offset does not fit into
2076   // the 16-bit immediate. We don't know the complete frame size here
2077   // because we've not yet computed callee-saved register spills or the
2078   // needed alignment padding.
2079   unsigned StackSize = determineFrameLayout(MF, true);
2080   MachineFrameInfo &MFI = MF.getFrameInfo();
2081   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2082       (hasSpills(MF) && !isInt<16>(StackSize))) {
2083     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2084     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2085     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2086     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2087     unsigned Size = TRI.getSpillSize(RC);
2088     Align Alignment = TRI.getSpillAlign(RC);
2089     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2090 
2091     // Might we have over-aligned allocas?
2092     bool HasAlVars =
2093         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2094 
2095     // These kinds of spills might need two registers.
2096     if (spillsCR(MF) || HasAlVars)
2097       RS->addScavengingFrameIndex(
2098           MFI.CreateStackObject(Size, Alignment, false));
2099   }
2100 }
2101 
2102 // This function checks if a callee saved gpr can be spilled to a volatile
2103 // vector register. This occurs for leaf functions when the option
2104 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2105 // which were not spilled to vectors, return false so the target independent
2106 // code can handle them by assigning a FrameIdx to a stack slot.
2107 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2108     MachineFunction &MF, const TargetRegisterInfo *TRI,
2109     std::vector<CalleeSavedInfo> &CSI) const {
2110 
2111   if (CSI.empty())
2112     return true; // Early exit if no callee saved registers are modified!
2113 
2114   // Early exit if cannot spill gprs to volatile vector registers.
2115   MachineFrameInfo &MFI = MF.getFrameInfo();
2116   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2117     return false;
2118 
2119   // Build a BitVector of VSRs that can be used for spilling GPRs.
2120   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2121   BitVector BVCalleeSaved(TRI->getNumRegs());
2122   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2123   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2124   for (unsigned i = 0; CSRegs[i]; ++i)
2125     BVCalleeSaved.set(CSRegs[i]);
2126 
2127   for (unsigned Reg : BVAllocatable.set_bits()) {
2128     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2129     // used in the function.
2130     if (BVCalleeSaved[Reg] ||
2131         (!PPC::F8RCRegClass.contains(Reg) &&
2132          !PPC::VFRCRegClass.contains(Reg)) ||
2133         (MF.getRegInfo().isPhysRegUsed(Reg)))
2134       BVAllocatable.reset(Reg);
2135   }
2136 
2137   bool AllSpilledToReg = true;
2138   for (auto &CS : CSI) {
2139     if (BVAllocatable.none())
2140       return false;
2141 
2142     unsigned Reg = CS.getReg();
2143     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2144       AllSpilledToReg = false;
2145       continue;
2146     }
2147 
2148     unsigned VolatileVFReg = BVAllocatable.find_first();
2149     if (VolatileVFReg < BVAllocatable.size()) {
2150       CS.setDstReg(VolatileVFReg);
2151       BVAllocatable.reset(VolatileVFReg);
2152     } else {
2153       AllSpilledToReg = false;
2154     }
2155   }
2156   return AllSpilledToReg;
2157 }
2158 
2159 bool PPCFrameLowering::spillCalleeSavedRegisters(
2160     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2161     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2162 
2163   MachineFunction *MF = MBB.getParent();
2164   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2165   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2166   bool MustSaveTOC = FI->mustSaveTOC();
2167   DebugLoc DL;
2168   bool CRSpilled = false;
2169   MachineInstrBuilder CRMIB;
2170 
2171   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2172     unsigned Reg = CSI[i].getReg();
2173 
2174     // CR2 through CR4 are the nonvolatile CR fields.
2175     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2176 
2177     // Add the callee-saved register as live-in; it's killed at the spill.
2178     // Do not do this for callee-saved registers that are live-in to the
2179     // function because they will already be marked live-in and this will be
2180     // adding it for a second time. It is an error to add the same register
2181     // to the set more than once.
2182     const MachineRegisterInfo &MRI = MF->getRegInfo();
2183     bool IsLiveIn = MRI.isLiveIn(Reg);
2184     if (!IsLiveIn)
2185        MBB.addLiveIn(Reg);
2186 
2187     if (CRSpilled && IsCRField) {
2188       CRMIB.addReg(Reg, RegState::ImplicitKill);
2189       continue;
2190     }
2191 
2192     // The actual spill will happen in the prologue.
2193     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2194       continue;
2195 
2196     // Insert the spill to the stack frame.
2197     if (IsCRField) {
2198       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2199       if (!Subtarget.is32BitELFABI()) {
2200         // The actual spill will happen at the start of the prologue.
2201         FuncInfo->addMustSaveCR(Reg);
2202       } else {
2203         CRSpilled = true;
2204         FuncInfo->setSpillsCR();
2205 
2206         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2207         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2208         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2209                   .addReg(Reg, RegState::ImplicitKill);
2210 
2211         MBB.insert(MI, CRMIB);
2212         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2213                                          .addReg(PPC::R12,
2214                                                  getKillRegState(true)),
2215                                          CSI[i].getFrameIdx()));
2216       }
2217     } else {
2218       if (CSI[i].isSpilledToReg()) {
2219         NumPESpillVSR++;
2220         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2221           .addReg(Reg, getKillRegState(true));
2222       } else {
2223         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2224         // Use !IsLiveIn for the kill flag.
2225         // We do not want to kill registers that are live in this function
2226         // before their use because they will become undefined registers.
2227         // Functions without NoUnwind need to preserve the order of elements in
2228         // saved vector registers.
2229         if (Subtarget.needsSwapsForVSXMemOps() &&
2230             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2231           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2232                                        CSI[i].getFrameIdx(), RC, TRI);
2233         else
2234           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2235                                   RC, TRI);
2236       }
2237     }
2238   }
2239   return true;
2240 }
2241 
2242 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2243                        bool CR4Spilled, MachineBasicBlock &MBB,
2244                        MachineBasicBlock::iterator MI,
2245                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2246 
2247   MachineFunction *MF = MBB.getParent();
2248   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2249   DebugLoc DL;
2250   unsigned MoveReg = PPC::R12;
2251 
2252   // 32-bit:  FP-relative
2253   MBB.insert(MI,
2254              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2255                                CSI[CSIIndex].getFrameIdx()));
2256 
2257   unsigned RestoreOp = PPC::MTOCRF;
2258   if (CR2Spilled)
2259     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2260                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2261 
2262   if (CR3Spilled)
2263     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2264                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2265 
2266   if (CR4Spilled)
2267     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2268                .addReg(MoveReg, getKillRegState(true)));
2269 }
2270 
2271 MachineBasicBlock::iterator PPCFrameLowering::
2272 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2273                               MachineBasicBlock::iterator I) const {
2274   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2275   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2276       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2277     // Add (actually subtract) back the amount the callee popped on return.
2278     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2279       bool is64Bit = Subtarget.isPPC64();
2280       CalleeAmt *= -1;
2281       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2282       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2283       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2284       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2285       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2286       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2287       const DebugLoc &dl = I->getDebugLoc();
2288 
2289       if (isInt<16>(CalleeAmt)) {
2290         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2291           .addReg(StackReg, RegState::Kill)
2292           .addImm(CalleeAmt);
2293       } else {
2294         MachineBasicBlock::iterator MBBI = I;
2295         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2296           .addImm(CalleeAmt >> 16);
2297         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2298           .addReg(TmpReg, RegState::Kill)
2299           .addImm(CalleeAmt & 0xFFFF);
2300         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2301           .addReg(StackReg, RegState::Kill)
2302           .addReg(TmpReg);
2303       }
2304     }
2305   }
2306   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2307   return MBB.erase(I);
2308 }
2309 
2310 static bool isCalleeSavedCR(unsigned Reg) {
2311   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2312 }
2313 
2314 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2315     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2316     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2317   MachineFunction *MF = MBB.getParent();
2318   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2319   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2320   bool MustSaveTOC = FI->mustSaveTOC();
2321   bool CR2Spilled = false;
2322   bool CR3Spilled = false;
2323   bool CR4Spilled = false;
2324   unsigned CSIIndex = 0;
2325 
2326   // Initialize insertion-point logic; we will be restoring in reverse
2327   // order of spill.
2328   MachineBasicBlock::iterator I = MI, BeforeI = I;
2329   bool AtStart = I == MBB.begin();
2330 
2331   if (!AtStart)
2332     --BeforeI;
2333 
2334   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2335     unsigned Reg = CSI[i].getReg();
2336 
2337     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2338       continue;
2339 
2340     // Restore of callee saved condition register field is handled during
2341     // epilogue insertion.
2342     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2343       continue;
2344 
2345     if (Reg == PPC::CR2) {
2346       CR2Spilled = true;
2347       // The spill slot is associated only with CR2, which is the
2348       // first nonvolatile spilled.  Save it here.
2349       CSIIndex = i;
2350       continue;
2351     } else if (Reg == PPC::CR3) {
2352       CR3Spilled = true;
2353       continue;
2354     } else if (Reg == PPC::CR4) {
2355       CR4Spilled = true;
2356       continue;
2357     } else {
2358       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2359       // least one CR register, restore all spilled CRs together.
2360       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2361         bool is31 = needsFP(*MF);
2362         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2363                    CSIIndex);
2364         CR2Spilled = CR3Spilled = CR4Spilled = false;
2365       }
2366 
2367       if (CSI[i].isSpilledToReg()) {
2368         DebugLoc DL;
2369         NumPEReloadVSR++;
2370         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2371             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2372       } else {
2373        // Default behavior for non-CR saves.
2374         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2375 
2376         // Functions without NoUnwind need to preserve the order of elements in
2377         // saved vector registers.
2378         if (Subtarget.needsSwapsForVSXMemOps() &&
2379             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2380           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2381                                         TRI);
2382         else
2383           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2384 
2385         assert(I != MBB.begin() &&
2386                "loadRegFromStackSlot didn't insert any code!");
2387       }
2388     }
2389 
2390     // Insert in reverse order.
2391     if (AtStart)
2392       I = MBB.begin();
2393     else {
2394       I = BeforeI;
2395       ++I;
2396     }
2397   }
2398 
2399   // If we haven't yet spilled the CRs, do so now.
2400   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2401     assert(Subtarget.is32BitELFABI() &&
2402            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2403     bool is31 = needsFP(*MF);
2404     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2405   }
2406 
2407   return true;
2408 }
2409 
2410 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2411   return TOCSaveOffset;
2412 }
2413 
2414 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2415   return FramePointerSaveOffset;
2416 }
2417 
2418 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2419   return BasePointerSaveOffset;
2420 }
2421 
2422 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2423   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2424     return false;
2425   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2426           MF.getSubtarget<PPCSubtarget>().isPPC64());
2427 }
2428