xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision e1c03ddc9b03b820b421d8b3bca6a94e4d1a4675)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF) && // No special alignment.
321                        !MFI.isFrameAddressTaken();
322 
323   // Note: for PPC32 SVR4ABI, we can still generate stackless
324   // code if all local vars are reg-allocated.
325   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
326 
327   // Check whether we can skip adjusting the stack pointer (by using red zone)
328   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
329     // No need for frame
330     return 0;
331   }
332 
333   // Get the maximum call frame size of all the calls.
334   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
335 
336   // Maximum call frame needs to be at least big enough for linkage area.
337   unsigned minCallFrameSize = getLinkageSize();
338   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
339 
340   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
341   // that allocations will be aligned.
342   if (MFI.hasVarSizedObjects())
343     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
344 
345   // Update the new max call frame size if the caller passes in a valid pointer.
346   if (NewMaxCallFrameSize)
347     *NewMaxCallFrameSize = maxCallFrameSize;
348 
349   // Include call frame size in total.
350   FrameSize += maxCallFrameSize;
351 
352   // Make sure the frame is aligned.
353   FrameSize = alignTo(FrameSize, Alignment);
354 
355   return FrameSize;
356 }
357 
358 // hasFP - Return true if the specified function actually has a dedicated frame
359 // pointer register.
360 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
361   const MachineFrameInfo &MFI = MF.getFrameInfo();
362   // FIXME: This is pretty much broken by design: hasFP() might be called really
363   // early, before the stack layout was calculated and thus hasFP() might return
364   // true or false here depending on the time of call.
365   return (MFI.getStackSize()) && needsFP(MF);
366 }
367 
368 // needsFP - Return true if the specified function should have a dedicated frame
369 // pointer register.  This is true if the function has variable sized allocas or
370 // if frame pointer elimination is disabled.
371 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
372   const MachineFrameInfo &MFI = MF.getFrameInfo();
373 
374   // Naked functions have no stack frame pushed, so we don't have a frame
375   // pointer.
376   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
377     return false;
378 
379   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
380          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
381          MF.exposesReturnsTwice() ||
382          (MF.getTarget().Options.GuaranteedTailCallOpt &&
383           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
384 }
385 
386 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
387   // When there is dynamic alloca in this function, we can not use the frame
388   // pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1
389   // always points to the backchain.
390   bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects();
391   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
392   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
393 
394   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
395   bool HasBP = RegInfo->hasBasePointer(MF);
396   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
397   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
398 
399   for (MachineBasicBlock &MBB : MF)
400     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
401       --MBBI;
402       for (MachineOperand &MO : MBBI->operands()) {
403         if (!MO.isReg())
404           continue;
405 
406         switch (MO.getReg()) {
407         case PPC::FP:
408           MO.setReg(FPReg);
409           break;
410         case PPC::FP8:
411           MO.setReg(FP8Reg);
412           break;
413         case PPC::BP:
414           MO.setReg(BPReg);
415           break;
416         case PPC::BP8:
417           MO.setReg(BP8Reg);
418           break;
419 
420         }
421       }
422     }
423 }
424 
425 /*  This function will do the following:
426     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
427       respectively (defaults recommended by the ABI) and return true
428     - If MBB is not an entry block, initialize the register scavenger and look
429       for available registers.
430     - If the defaults (R0/R12) are available, return true
431     - If TwoUniqueRegsRequired is set to true, it looks for two unique
432       registers. Otherwise, look for a single available register.
433       - If the required registers are found, set SR1 and SR2 and return true.
434       - If the required registers are not found, set SR2 or both SR1 and SR2 to
435         PPC::NoRegister and return false.
436 
437     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
438     is not set, this function will attempt to find two different registers, but
439     still return true if only one register is available (and set SR1 == SR2).
440 */
441 bool
442 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
443                                       bool UseAtEnd,
444                                       bool TwoUniqueRegsRequired,
445                                       Register *SR1,
446                                       Register *SR2) const {
447   RegScavenger RS;
448   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
449   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
450 
451   // Set the defaults for the two scratch registers.
452   if (SR1)
453     *SR1 = R0;
454 
455   if (SR2) {
456     assert (SR1 && "Asking for the second scratch register but not the first?");
457     *SR2 = R12;
458   }
459 
460   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
461   if ((UseAtEnd && MBB->isReturnBlock()) ||
462       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
463     return true;
464 
465   if (UseAtEnd) {
466     // The scratch register will be used before the first terminator (or at the
467     // end of the block if there are no terminators).
468     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469     if (MBBI == MBB->begin()) {
470       RS.enterBasicBlock(*MBB);
471     } else {
472       RS.enterBasicBlockEnd(*MBB);
473       RS.backward(MBBI);
474     }
475   } else {
476     // The scratch register will be used at the start of the block.
477     RS.enterBasicBlock(*MBB);
478   }
479 
480   // If the two registers are available, we're all good.
481   // Note that we only return here if both R0 and R12 are available because
482   // although the function may not require two unique registers, it may benefit
483   // from having two so we should try to provide them.
484   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
485     return true;
486 
487   // Get the list of callee-saved registers for the target.
488   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
489   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
490 
491   // Get all the available registers in the block.
492   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
493                                      &PPC::GPRCRegClass);
494 
495   // We shouldn't use callee-saved registers as scratch registers as they may be
496   // available when looking for a candidate block for shrink wrapping but not
497   // available when the actual prologue/epilogue is being emitted because they
498   // were added as live-in to the prologue block by PrologueEpilogueInserter.
499   for (int i = 0; CSRegs[i]; ++i)
500     BV.reset(CSRegs[i]);
501 
502   // Set the first scratch register to the first available one.
503   if (SR1) {
504     int FirstScratchReg = BV.find_first();
505     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
506   }
507 
508   // If there is another one available, set the second scratch register to that.
509   // Otherwise, set it to either PPC::NoRegister if this function requires two
510   // or to whatever SR1 is set to if this function doesn't require two.
511   if (SR2) {
512     int SecondScratchReg = BV.find_next(*SR1);
513     if (SecondScratchReg != -1)
514       *SR2 = SecondScratchReg;
515     else
516       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
517   }
518 
519   // Now that we've done our best to provide both registers, double check
520   // whether we were unable to provide enough.
521   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
522     return false;
523 
524   return true;
525 }
526 
527 // We need a scratch register for spilling LR and for spilling CR. By default,
528 // we use two scratch registers to hide latency. However, if only one scratch
529 // register is available, we can adjust for that by not overlapping the spill
530 // code. However, if we need to realign the stack (i.e. have a base pointer)
531 // and the stack frame is large, we need two scratch registers.
532 // Also, stack probe requires two scratch registers, one for old sp, one for
533 // large frame and large probe size.
534 bool
535 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
536   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
537   MachineFunction &MF = *(MBB->getParent());
538   bool HasBP = RegInfo->hasBasePointer(MF);
539   unsigned FrameSize = determineFrameLayout(MF);
540   int NegFrameSize = -FrameSize;
541   bool IsLargeFrame = !isInt<16>(NegFrameSize);
542   MachineFrameInfo &MFI = MF.getFrameInfo();
543   Align MaxAlign = MFI.getMaxAlign();
544   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
545   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
546 
547   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
548          TLI.hasInlineStackProbe(MF);
549 }
550 
551 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
552   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
553 
554   return findScratchRegister(TmpMBB, false,
555                              twoUniqueScratchRegsRequired(TmpMBB));
556 }
557 
558 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
559   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
560 
561   return findScratchRegister(TmpMBB, true);
562 }
563 
564 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
565   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
566   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
567 
568   // Abort if there is no register info or function info.
569   if (!RegInfo || !FI)
570     return false;
571 
572   // Only move the stack update on ELFv2 ABI and PPC64.
573   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
574     return false;
575 
576   // Check the frame size first and return false if it does not fit the
577   // requirements.
578   // We need a non-zero frame size as well as a frame that will fit in the red
579   // zone. This is because by moving the stack pointer update we are now storing
580   // to the red zone until the stack pointer is updated. If we get an interrupt
581   // inside the prologue but before the stack update we now have a number of
582   // stores to the red zone and those stores must all fit.
583   MachineFrameInfo &MFI = MF.getFrameInfo();
584   unsigned FrameSize = MFI.getStackSize();
585   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
586     return false;
587 
588   // Frame pointers and base pointers complicate matters so don't do anything
589   // if we have them. For example having a frame pointer will sometimes require
590   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
591   // difficult. Similar situation exists with setjmp.
592   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
593     return false;
594 
595   // Calls to fast_cc functions use different rules for passing parameters on
596   // the stack from the ABI and using PIC base in the function imposes
597   // similar restrictions to using the base pointer. It is not generally safe
598   // to move the stack pointer update in these situations.
599   if (FI->hasFastCall() || FI->usesPICBase())
600     return false;
601 
602   // Finally we can move the stack update if we do not require register
603   // scavenging. Register scavenging can introduce more spills and so
604   // may make the frame size larger than we have computed.
605   return !RegInfo->requiresFrameIndexScavenging(MF);
606 }
607 
608 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
609                                     MachineBasicBlock &MBB) const {
610   MachineBasicBlock::iterator MBBI = MBB.begin();
611   MachineFrameInfo &MFI = MF.getFrameInfo();
612   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
613   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
614   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
615 
616   MachineModuleInfo &MMI = MF.getMMI();
617   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
618   DebugLoc dl;
619   // AIX assembler does not support cfi directives.
620   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
621 
622   const bool HasFastMFLR = Subtarget.hasFastMFLR();
623 
624   // Get processor type.
625   bool isPPC64 = Subtarget.isPPC64();
626   // Get the ABI.
627   bool isSVR4ABI = Subtarget.isSVR4ABI();
628   bool isELFv2ABI = Subtarget.isELFv2ABI();
629   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
630 
631   // Work out frame sizes.
632   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
633   int64_t NegFrameSize = -FrameSize;
634   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
635     llvm_unreachable("Unhandled stack size!");
636 
637   if (MFI.isFrameAddressTaken())
638     replaceFPWithRealFP(MF);
639 
640   // Check if the link register (LR) must be saved.
641   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
642   bool MustSaveLR = FI->mustSaveLR();
643   bool MustSaveTOC = FI->mustSaveTOC();
644   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
645   bool MustSaveCR = !MustSaveCRs.empty();
646   // Do we have a frame pointer and/or base pointer for this function?
647   bool HasFP = hasFP(MF);
648   bool HasBP = RegInfo->hasBasePointer(MF);
649   bool HasRedZone = isPPC64 || !isSVR4ABI;
650   bool HasROPProtect = Subtarget.hasROPProtect();
651   bool HasPrivileged = Subtarget.hasPrivileged();
652 
653   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
654   Register BPReg = RegInfo->getBaseRegister(MF);
655   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
656   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
657   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
658   Register ScratchReg;
659   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
660   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
661   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
662                                                 : PPC::MFLR );
663   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
664                                                  : PPC::STW );
665   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
666                                                      : PPC::STWU );
667   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
668                                                         : PPC::STWUX);
669   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
670                                               : PPC::OR );
671   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
672                                                             : PPC::SUBFC);
673   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
674                                                                : PPC::SUBFIC);
675   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
676                                                            : PPC::MFCR);
677   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
678   const MCInstrDesc &HashST =
679       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
680                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
681 
682   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
683   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
684   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
685   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
686   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
687          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
688 
689   // Using the same bool variable as below to suppress compiler warnings.
690   bool SingleScratchReg = findScratchRegister(
691       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
692   assert(SingleScratchReg &&
693          "Required number of registers not available in this block");
694 
695   SingleScratchReg = ScratchReg == TempReg;
696 
697   int64_t LROffset = getReturnSaveOffset();
698 
699   int64_t FPOffset = 0;
700   if (HasFP) {
701     MachineFrameInfo &MFI = MF.getFrameInfo();
702     int FPIndex = FI->getFramePointerSaveIndex();
703     assert(FPIndex && "No Frame Pointer Save Slot!");
704     FPOffset = MFI.getObjectOffset(FPIndex);
705   }
706 
707   int64_t BPOffset = 0;
708   if (HasBP) {
709     MachineFrameInfo &MFI = MF.getFrameInfo();
710     int BPIndex = FI->getBasePointerSaveIndex();
711     assert(BPIndex && "No Base Pointer Save Slot!");
712     BPOffset = MFI.getObjectOffset(BPIndex);
713   }
714 
715   int64_t PBPOffset = 0;
716   if (FI->usesPICBase()) {
717     MachineFrameInfo &MFI = MF.getFrameInfo();
718     int PBPIndex = FI->getPICBasePointerSaveIndex();
719     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
720     PBPOffset = MFI.getObjectOffset(PBPIndex);
721   }
722 
723   // Get stack alignments.
724   Align MaxAlign = MFI.getMaxAlign();
725   if (HasBP && MaxAlign > 1)
726     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
727 
728   // Frames of 32KB & larger require special handling because they cannot be
729   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
730   bool isLargeFrame = !isInt<16>(NegFrameSize);
731 
732   // Check if we can move the stack update instruction (stdu) down the prologue
733   // past the callee saves. Hopefully this will avoid the situation where the
734   // saves are waiting for the update on the store with update to complete.
735   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
736   bool MovingStackUpdateDown = false;
737 
738   // Check if we can move the stack update.
739   if (stackUpdateCanBeMoved(MF)) {
740     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
741     for (CalleeSavedInfo CSI : Info) {
742       // If the callee saved register is spilled to a register instead of the
743       // stack then the spill no longer uses the stack pointer.
744       // This can lead to two consequences:
745       // 1) We no longer need to update the stack because the function does not
746       //    spill any callee saved registers to stack.
747       // 2) We have a situation where we still have to update the stack pointer
748       //    even though some registers are spilled to other registers. In
749       //    this case the current code moves the stack update to an incorrect
750       //    position.
751       // In either case we should abort moving the stack update operation.
752       if (CSI.isSpilledToReg()) {
753         StackUpdateLoc = MBBI;
754         MovingStackUpdateDown = false;
755         break;
756       }
757 
758       int FrIdx = CSI.getFrameIdx();
759       // If the frame index is not negative the callee saved info belongs to a
760       // stack object that is not a fixed stack object. We ignore non-fixed
761       // stack objects because we won't move the stack update pointer past them.
762       if (FrIdx >= 0)
763         continue;
764 
765       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
766         StackUpdateLoc++;
767         MovingStackUpdateDown = true;
768       } else {
769         // We need all of the Frame Indices to meet these conditions.
770         // If they do not, abort the whole operation.
771         StackUpdateLoc = MBBI;
772         MovingStackUpdateDown = false;
773         break;
774       }
775     }
776 
777     // If the operation was not aborted then update the object offset.
778     if (MovingStackUpdateDown) {
779       for (CalleeSavedInfo CSI : Info) {
780         int FrIdx = CSI.getFrameIdx();
781         if (FrIdx < 0)
782           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
783       }
784     }
785   }
786 
787   // Where in the prologue we move the CR fields depends on how many scratch
788   // registers we have, and if we need to save the link register or not. This
789   // lambda is to avoid duplicating the logic in 2 places.
790   auto BuildMoveFromCR = [&]() {
791     if (isELFv2ABI && MustSaveCRs.size() == 1) {
792     // In the ELFv2 ABI, we are not required to save all CR fields.
793     // If only one CR field is clobbered, it is more efficient to use
794     // mfocrf to selectively save just that field, because mfocrf has short
795     // latency compares to mfcr.
796       assert(isPPC64 && "V2 ABI is 64-bit only.");
797       MachineInstrBuilder MIB =
798           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
799       MIB.addReg(MustSaveCRs[0], RegState::Kill);
800     } else {
801       MachineInstrBuilder MIB =
802           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
803       for (unsigned CRfield : MustSaveCRs)
804         MIB.addReg(CRfield, RegState::ImplicitKill);
805     }
806   };
807 
808   // If we need to spill the CR and the LR but we don't have two separate
809   // registers available, we must spill them one at a time
810   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
811     BuildMoveFromCR();
812     BuildMI(MBB, MBBI, dl, StoreWordInst)
813         .addReg(TempReg, getKillRegState(true))
814         .addImm(CRSaveOffset)
815         .addReg(SPReg);
816   }
817 
818   if (MustSaveLR)
819     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
820 
821   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
822     BuildMoveFromCR();
823 
824   if (HasRedZone) {
825     if (HasFP)
826       BuildMI(MBB, MBBI, dl, StoreInst)
827         .addReg(FPReg)
828         .addImm(FPOffset)
829         .addReg(SPReg);
830     if (FI->usesPICBase())
831       BuildMI(MBB, MBBI, dl, StoreInst)
832         .addReg(PPC::R30)
833         .addImm(PBPOffset)
834         .addReg(SPReg);
835     if (HasBP)
836       BuildMI(MBB, MBBI, dl, StoreInst)
837         .addReg(BPReg)
838         .addImm(BPOffset)
839         .addReg(SPReg);
840   }
841 
842   // Generate the instruction to store the LR. In the case where ROP protection
843   // is required the register holding the LR should not be killed as it will be
844   // used by the hash store instruction.
845   auto SaveLR = [&](int64_t Offset) {
846     assert(MustSaveLR && "LR is not required to be saved!");
847     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
848         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
849         .addImm(Offset)
850         .addReg(SPReg);
851 
852     // Add the ROP protection Hash Store instruction.
853     // NOTE: This is technically a violation of the ABI. The hash can be saved
854     // up to 512 bytes into the Protected Zone. This can be outside of the
855     // initial 288 byte volatile program storage region in the Protected Zone.
856     // However, this restriction will be removed in an upcoming revision of the
857     // ABI.
858     if (HasROPProtect) {
859       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
860       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
861       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
862              "ROP hash save offset out of range.");
863       assert(((ImmOffset & 0x7) == 0) &&
864              "ROP hash save offset must be 8 byte aligned.");
865       BuildMI(MBB, StackUpdateLoc, dl, HashST)
866           .addReg(ScratchReg, getKillRegState(true))
867           .addImm(ImmOffset)
868           .addReg(SPReg);
869     }
870   };
871 
872   if (MustSaveLR && HasFastMFLR)
873       SaveLR(LROffset);
874 
875   if (MustSaveCR &&
876       !(SingleScratchReg && MustSaveLR)) {
877     assert(HasRedZone && "A red zone is always available on PPC64");
878     BuildMI(MBB, MBBI, dl, StoreWordInst)
879       .addReg(TempReg, getKillRegState(true))
880       .addImm(CRSaveOffset)
881       .addReg(SPReg);
882   }
883 
884   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
885   if (!FrameSize) {
886     if (MustSaveLR && !HasFastMFLR)
887       SaveLR(LROffset);
888     return;
889   }
890 
891   // Adjust stack pointer: r1 += NegFrameSize.
892   // If there is a preferred stack alignment, align R1 now
893 
894   if (HasBP && HasRedZone) {
895     // Save a copy of r1 as the base pointer.
896     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
897       .addReg(SPReg)
898       .addReg(SPReg);
899   }
900 
901   // Have we generated a STUX instruction to claim stack frame? If so,
902   // the negated frame size will be placed in ScratchReg.
903   bool HasSTUX =
904       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
905       (HasBP && MaxAlign > 1) || isLargeFrame;
906 
907   // If we use STUX to update the stack pointer, we need the two scratch
908   // registers TempReg and ScratchReg, we have to save LR here which is stored
909   // in ScratchReg.
910   // If the offset can not be encoded into the store instruction, we also have
911   // to save LR here.
912   if (MustSaveLR && !HasFastMFLR &&
913       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
914     SaveLR(LROffset);
915 
916   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
917   // pointer is always stored at SP, we will get a free probe due to an essential
918   // STU(X) instruction.
919   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
920     // To be consistent with other targets, a pseudo instruction is emitted and
921     // will be later expanded in `inlineStackProbe`.
922     BuildMI(MBB, MBBI, dl,
923             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
924                             : PPC::PROBED_STACKALLOC_32))
925         .addDef(TempReg)
926         .addDef(ScratchReg) // ScratchReg stores the old sp.
927         .addImm(NegFrameSize);
928     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
929     // update the ScratchReg to meet the assumption that ScratchReg contains
930     // the NegFrameSize. This solution is rather tricky.
931     if (!HasRedZone) {
932       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
933           .addReg(ScratchReg)
934           .addReg(SPReg);
935     }
936   } else {
937     // This condition must be kept in sync with canUseAsPrologue.
938     if (HasBP && MaxAlign > 1) {
939       if (isPPC64)
940         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
941             .addReg(SPReg)
942             .addImm(0)
943             .addImm(64 - Log2(MaxAlign));
944       else // PPC32...
945         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
946             .addReg(SPReg)
947             .addImm(0)
948             .addImm(32 - Log2(MaxAlign))
949             .addImm(31);
950       if (!isLargeFrame) {
951         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
952             .addReg(ScratchReg, RegState::Kill)
953             .addImm(NegFrameSize);
954       } else {
955         assert(!SingleScratchReg && "Only a single scratch reg available");
956         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
957         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
958             .addReg(ScratchReg, RegState::Kill)
959             .addReg(TempReg, RegState::Kill);
960       }
961 
962       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
963           .addReg(SPReg, RegState::Kill)
964           .addReg(SPReg)
965           .addReg(ScratchReg);
966     } else if (!isLargeFrame) {
967       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
968           .addReg(SPReg)
969           .addImm(NegFrameSize)
970           .addReg(SPReg);
971     } else {
972       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
973       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
974           .addReg(SPReg, RegState::Kill)
975           .addReg(SPReg)
976           .addReg(ScratchReg);
977     }
978   }
979 
980   // Save the TOC register after the stack pointer update if a prologue TOC
981   // save is required for the function.
982   if (MustSaveTOC) {
983     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
984     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
985       .addReg(TOCReg, getKillRegState(true))
986       .addImm(TOCSaveOffset)
987       .addReg(SPReg);
988   }
989 
990   if (!HasRedZone) {
991     assert(!isPPC64 && "A red zone is always available on PPC64");
992     if (HasSTUX) {
993       // The negated frame size is in ScratchReg, and the SPReg has been
994       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
995       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
996       // the stack frame (i.e. the old SP), ideally, we would put the old
997       // SP into a register and use it as the base for the stores. The
998       // problem is that the only available register may be ScratchReg,
999       // which could be R0, and R0 cannot be used as a base address.
1000 
1001       // First, set ScratchReg to the old SP. This may need to be modified
1002       // later.
1003       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1004         .addReg(ScratchReg, RegState::Kill)
1005         .addReg(SPReg);
1006 
1007       if (ScratchReg == PPC::R0) {
1008         // R0 cannot be used as a base register, but it can be used as an
1009         // index in a store-indexed.
1010         int LastOffset = 0;
1011         if (HasFP)  {
1012           // R0 += (FPOffset-LastOffset).
1013           // Need addic, since addi treats R0 as 0.
1014           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1015             .addReg(ScratchReg)
1016             .addImm(FPOffset-LastOffset);
1017           LastOffset = FPOffset;
1018           // Store FP into *R0.
1019           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1020             .addReg(FPReg, RegState::Kill)  // Save FP.
1021             .addReg(PPC::ZERO)
1022             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1023         }
1024         if (FI->usesPICBase()) {
1025           // R0 += (PBPOffset-LastOffset).
1026           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1027             .addReg(ScratchReg)
1028             .addImm(PBPOffset-LastOffset);
1029           LastOffset = PBPOffset;
1030           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1031             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1032             .addReg(PPC::ZERO)
1033             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1034         }
1035         if (HasBP) {
1036           // R0 += (BPOffset-LastOffset).
1037           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1038             .addReg(ScratchReg)
1039             .addImm(BPOffset-LastOffset);
1040           LastOffset = BPOffset;
1041           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1042             .addReg(BPReg, RegState::Kill)  // Save BP.
1043             .addReg(PPC::ZERO)
1044             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1045           // BP = R0-LastOffset
1046           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1047             .addReg(ScratchReg, RegState::Kill)
1048             .addImm(-LastOffset);
1049         }
1050       } else {
1051         // ScratchReg is not R0, so use it as the base register. It is
1052         // already set to the old SP, so we can use the offsets directly.
1053 
1054         // Now that the stack frame has been allocated, save all the necessary
1055         // registers using ScratchReg as the base address.
1056         if (HasFP)
1057           BuildMI(MBB, MBBI, dl, StoreInst)
1058             .addReg(FPReg)
1059             .addImm(FPOffset)
1060             .addReg(ScratchReg);
1061         if (FI->usesPICBase())
1062           BuildMI(MBB, MBBI, dl, StoreInst)
1063             .addReg(PPC::R30)
1064             .addImm(PBPOffset)
1065             .addReg(ScratchReg);
1066         if (HasBP) {
1067           BuildMI(MBB, MBBI, dl, StoreInst)
1068             .addReg(BPReg)
1069             .addImm(BPOffset)
1070             .addReg(ScratchReg);
1071           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1072             .addReg(ScratchReg, RegState::Kill)
1073             .addReg(ScratchReg);
1074         }
1075       }
1076     } else {
1077       // The frame size is a known 16-bit constant (fitting in the immediate
1078       // field of STWU). To be here we have to be compiling for PPC32.
1079       // Since the SPReg has been decreased by FrameSize, add it back to each
1080       // offset.
1081       if (HasFP)
1082         BuildMI(MBB, MBBI, dl, StoreInst)
1083           .addReg(FPReg)
1084           .addImm(FrameSize + FPOffset)
1085           .addReg(SPReg);
1086       if (FI->usesPICBase())
1087         BuildMI(MBB, MBBI, dl, StoreInst)
1088           .addReg(PPC::R30)
1089           .addImm(FrameSize + PBPOffset)
1090           .addReg(SPReg);
1091       if (HasBP) {
1092         BuildMI(MBB, MBBI, dl, StoreInst)
1093           .addReg(BPReg)
1094           .addImm(FrameSize + BPOffset)
1095           .addReg(SPReg);
1096         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1097           .addReg(SPReg)
1098           .addImm(FrameSize);
1099       }
1100     }
1101   }
1102 
1103   // Save the LR now.
1104   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1105     SaveLR(LROffset + FrameSize);
1106 
1107   // Add Call Frame Information for the instructions we generated above.
1108   if (needsCFI) {
1109     unsigned CFIIndex;
1110 
1111     if (HasBP) {
1112       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1113       // because if the stack needed aligning then CFA won't be at a fixed
1114       // offset from FP/SP.
1115       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1116       CFIIndex = MF.addFrameInst(
1117           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1118     } else {
1119       // Adjust the definition of CFA to account for the change in SP.
1120       assert(NegFrameSize);
1121       CFIIndex = MF.addFrameInst(
1122           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1123     }
1124     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1125         .addCFIIndex(CFIIndex);
1126 
1127     if (HasFP) {
1128       // Describe where FP was saved, at a fixed offset from CFA.
1129       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1130       CFIIndex = MF.addFrameInst(
1131           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1132       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1133           .addCFIIndex(CFIIndex);
1134     }
1135 
1136     if (FI->usesPICBase()) {
1137       // Describe where FP was saved, at a fixed offset from CFA.
1138       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1139       CFIIndex = MF.addFrameInst(
1140           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1141       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1142           .addCFIIndex(CFIIndex);
1143     }
1144 
1145     if (HasBP) {
1146       // Describe where BP was saved, at a fixed offset from CFA.
1147       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1148       CFIIndex = MF.addFrameInst(
1149           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1150       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1151           .addCFIIndex(CFIIndex);
1152     }
1153 
1154     if (MustSaveLR) {
1155       // Describe where LR was saved, at a fixed offset from CFA.
1156       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1157       CFIIndex = MF.addFrameInst(
1158           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1159       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1160           .addCFIIndex(CFIIndex);
1161     }
1162   }
1163 
1164   // If there is a frame pointer, copy R1 into R31
1165   if (HasFP) {
1166     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1167       .addReg(SPReg)
1168       .addReg(SPReg);
1169 
1170     if (!HasBP && needsCFI) {
1171       // Change the definition of CFA from SP+offset to FP+offset, because SP
1172       // will change at every alloca.
1173       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1174       unsigned CFIIndex = MF.addFrameInst(
1175           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1176 
1177       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1178           .addCFIIndex(CFIIndex);
1179     }
1180   }
1181 
1182   if (needsCFI) {
1183     // Describe where callee saved registers were saved, at fixed offsets from
1184     // CFA.
1185     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1186     for (const CalleeSavedInfo &I : CSI) {
1187       Register Reg = I.getReg();
1188       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1189 
1190       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1191       // subregisters of CR2. We just need to emit a move of CR2.
1192       if (PPC::CRBITRCRegClass.contains(Reg))
1193         continue;
1194 
1195       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1196         continue;
1197 
1198       // For 64-bit SVR4 when we have spilled CRs, the spill location
1199       // is SP+8, not a frame-relative slot.
1200       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1201         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1202         // the whole CR word.  In the ELFv2 ABI, every CR that was
1203         // actually saved gets its own CFI record.
1204         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1205         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1206             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1207         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1208             .addCFIIndex(CFIIndex);
1209         continue;
1210       }
1211 
1212       if (I.isSpilledToReg()) {
1213         unsigned SpilledReg = I.getDstReg();
1214         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1215             nullptr, MRI->getDwarfRegNum(Reg, true),
1216             MRI->getDwarfRegNum(SpilledReg, true)));
1217         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1218           .addCFIIndex(CFIRegister);
1219       } else {
1220         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1221         // We have changed the object offset above but we do not want to change
1222         // the actual offsets in the CFI instruction so we have to undo the
1223         // offset change here.
1224         if (MovingStackUpdateDown)
1225           Offset -= NegFrameSize;
1226 
1227         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1228             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1229         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1230             .addCFIIndex(CFIIndex);
1231       }
1232     }
1233   }
1234 }
1235 
1236 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1237                                         MachineBasicBlock &PrologMBB) const {
1238   bool isPPC64 = Subtarget.isPPC64();
1239   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1240   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1241   MachineFrameInfo &MFI = MF.getFrameInfo();
1242   MachineModuleInfo &MMI = MF.getMMI();
1243   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1244   // AIX assembler does not support cfi directives.
1245   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1246   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1247     int Opc = MI.getOpcode();
1248     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1249   });
1250   if (StackAllocMIPos == PrologMBB.end())
1251     return;
1252   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1253   MachineBasicBlock *CurrentMBB = &PrologMBB;
1254   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1255   MachineInstr &MI = *StackAllocMIPos;
1256   int64_t NegFrameSize = MI.getOperand(2).getImm();
1257   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1258   int64_t NegProbeSize = -(int64_t)ProbeSize;
1259   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1260   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1261   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1262   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1263   Register ScratchReg = MI.getOperand(0).getReg();
1264   Register FPReg = MI.getOperand(1).getReg();
1265   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1266   bool HasBP = RegInfo->hasBasePointer(MF);
1267   Register BPReg = RegInfo->getBaseRegister(MF);
1268   Align MaxAlign = MFI.getMaxAlign();
1269   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1270   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1271   // Subroutines to generate .cfi_* directives.
1272   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1273                             MachineBasicBlock::iterator MBBI, Register Reg) {
1274     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1275     unsigned CFIIndex = MF.addFrameInst(
1276         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1277     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1278         .addCFIIndex(CFIIndex);
1279   };
1280   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1281                          MachineBasicBlock::iterator MBBI, Register Reg,
1282                          int Offset) {
1283     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1284     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1285         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1286     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1287         .addCFIIndex(CFIIndex);
1288   };
1289   // Subroutine to determine if we can use the Imm as part of d-form.
1290   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1291   // Subroutine to materialize the Imm into TempReg.
1292   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1293                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1294                             Register &TempReg) {
1295     assert(isInt<32>(Imm) && "Unhandled imm");
1296     if (isInt<16>(Imm))
1297       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1298           .addImm(Imm);
1299     else {
1300       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1301           .addImm(Imm >> 16);
1302       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1303           .addReg(TempReg)
1304           .addImm(Imm & 0xFFFF);
1305     }
1306   };
1307   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1308   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1309                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1310                               Register NegSizeReg, bool UseDForm,
1311                               Register StoreReg) {
1312     if (UseDForm)
1313       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1314           .addReg(StoreReg)
1315           .addImm(NegSize)
1316           .addReg(SPReg);
1317     else
1318       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1319           .addReg(StoreReg)
1320           .addReg(SPReg)
1321           .addReg(NegSizeReg);
1322   };
1323   // Used to probe stack when realignment is required.
1324   // Note that, according to ABI's requirement, *sp must always equals the
1325   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1326   // Following is pseudo code:
1327   // final_sp = (sp & align) + negframesize;
1328   // neg_gap = final_sp - sp;
1329   // while (neg_gap < negprobesize) {
1330   //   stdu fp, negprobesize(sp);
1331   //   neg_gap -= negprobesize;
1332   // }
1333   // stdux fp, sp, neg_gap
1334   //
1335   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1336   // before probe code, we don't need to save it, so we get one additional reg
1337   // that can be used to materialize the probeside if needed to use xform.
1338   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1339   // now.
1340   //
1341   // The allocations are:
1342   // if (HasBP && HasRedzone) {
1343   //   r0: materialize the probesize if needed so that we can use xform.
1344   //   r12: `neg_gap`
1345   // } else {
1346   //   r0: back-chain pointer
1347   //   r12: `neg_gap`.
1348   // }
1349   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1350                                  MachineBasicBlock::iterator MBBI,
1351                                  Register ScratchReg, Register TempReg) {
1352     assert(HasBP && "The function is supposed to have base pointer when its "
1353                     "stack is realigned.");
1354     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1355 
1356     // FIXME: We can eliminate this limitation if we get more infomation about
1357     // which part of redzone are already used. Used redzone can be treated
1358     // probed. But there might be `holes' in redzone probed, this could
1359     // complicate the implementation.
1360     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1361            "Probe size should be larger or equal to the size of red-zone so "
1362            "that red-zone is not clobbered by probing.");
1363 
1364     Register &FinalStackPtr = TempReg;
1365     // FIXME: We only support NegProbeSize materializable by DForm currently.
1366     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1367     // register.
1368     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1369     assert(isInt<16>(NegProbeSize) &&
1370            "NegProbeSize should be materializable by DForm");
1371     Register CRReg = PPC::CR0;
1372     // Layout of output assembly kinda like:
1373     // bb.0:
1374     //   ...
1375     //   sub $scratchreg, $finalsp, r1
1376     //   cmpdi $scratchreg, <negprobesize>
1377     //   bge bb.2
1378     // bb.1:
1379     //   stdu <backchain>, <negprobesize>(r1)
1380     //   sub $scratchreg, $scratchreg, negprobesize
1381     //   cmpdi $scratchreg, <negprobesize>
1382     //   blt bb.1
1383     // bb.2:
1384     //   stdux <backchain>, r1, $scratchreg
1385     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1386     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1387     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1388     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1389     MF.insert(MBBInsertPoint, ProbeExitMBB);
1390     // bb.2
1391     {
1392       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1393       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1394                        BackChainPointer);
1395       if (HasRedZone)
1396         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1397         // to TempReg to satisfy it.
1398         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1399             .addReg(BPReg)
1400             .addReg(BPReg);
1401       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1402       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1403     }
1404     // bb.0
1405     {
1406       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1407           .addReg(SPReg)
1408           .addReg(FinalStackPtr);
1409       if (!HasRedZone)
1410         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1411       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1412           .addReg(ScratchReg)
1413           .addImm(NegProbeSize);
1414       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1415           .addImm(PPC::PRED_GE)
1416           .addReg(CRReg)
1417           .addMBB(ProbeExitMBB);
1418       MBB.addSuccessor(ProbeLoopBodyMBB);
1419       MBB.addSuccessor(ProbeExitMBB);
1420     }
1421     // bb.1
1422     {
1423       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1424       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1425                        0, true /*UseDForm*/, BackChainPointer);
1426       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1427               ScratchReg)
1428           .addReg(ScratchReg)
1429           .addImm(-NegProbeSize);
1430       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1431               CRReg)
1432           .addReg(ScratchReg)
1433           .addImm(NegProbeSize);
1434       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1435           .addImm(PPC::PRED_LT)
1436           .addReg(CRReg)
1437           .addMBB(ProbeLoopBodyMBB);
1438       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1439       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1440     }
1441     // Update liveins.
1442     fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB});
1443     return ProbeExitMBB;
1444   };
1445   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1446   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1447   // the offset subtracted from SP is determined by SP's runtime value.
1448   if (HasBP && MaxAlign > 1) {
1449     // Calculate final stack pointer.
1450     if (isPPC64)
1451       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1452           .addReg(SPReg)
1453           .addImm(0)
1454           .addImm(64 - Log2(MaxAlign));
1455     else
1456       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1457           .addReg(SPReg)
1458           .addImm(0)
1459           .addImm(32 - Log2(MaxAlign))
1460           .addImm(31);
1461     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1462             FPReg)
1463         .addReg(ScratchReg)
1464         .addReg(SPReg);
1465     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1466     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1467             FPReg)
1468         .addReg(ScratchReg)
1469         .addReg(FPReg);
1470     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1471     if (needsCFI)
1472       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1473   } else {
1474     // Initialize current frame pointer.
1475     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1476     // Use FPReg to calculate CFA.
1477     if (needsCFI)
1478       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1479     // Probe residual part.
1480     if (NegResidualSize) {
1481       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1482       if (!ResidualUseDForm)
1483         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1484       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1485                        ResidualUseDForm, FPReg);
1486     }
1487     bool UseDForm = CanUseDForm(NegProbeSize);
1488     // If number of blocks is small, just probe them directly.
1489     if (NumBlocks < 3) {
1490       if (!UseDForm)
1491         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1492       for (int i = 0; i < NumBlocks; ++i)
1493         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1494                          FPReg);
1495       if (needsCFI) {
1496         // Restore using SPReg to calculate CFA.
1497         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1498       }
1499     } else {
1500       // Since CTR is a volatile register and current shrinkwrap implementation
1501       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1502       // CTR loop to probe.
1503       // Calculate trip count and stores it in CTRReg.
1504       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1505       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1506           .addReg(ScratchReg, RegState::Kill);
1507       if (!UseDForm)
1508         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1509       // Create MBBs of the loop.
1510       MachineFunction::iterator MBBInsertPoint =
1511           std::next(CurrentMBB->getIterator());
1512       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1513       MF.insert(MBBInsertPoint, LoopMBB);
1514       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1515       MF.insert(MBBInsertPoint, ExitMBB);
1516       // Synthesize the loop body.
1517       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1518                        UseDForm, FPReg);
1519       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1520           .addMBB(LoopMBB);
1521       LoopMBB->addSuccessor(ExitMBB);
1522       LoopMBB->addSuccessor(LoopMBB);
1523       // Synthesize the exit MBB.
1524       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1525                       std::next(MachineBasicBlock::iterator(MI)),
1526                       CurrentMBB->end());
1527       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1528       CurrentMBB->addSuccessor(LoopMBB);
1529       if (needsCFI) {
1530         // Restore using SPReg to calculate CFA.
1531         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1532       }
1533       // Update liveins.
1534       fullyRecomputeLiveIns({ExitMBB, LoopMBB});
1535     }
1536   }
1537   ++NumPrologProbed;
1538   MI.eraseFromParent();
1539 }
1540 
1541 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1542                                     MachineBasicBlock &MBB) const {
1543   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1544   DebugLoc dl;
1545 
1546   if (MBBI != MBB.end())
1547     dl = MBBI->getDebugLoc();
1548 
1549   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1550   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1551 
1552   // Get alignment info so we know how to restore the SP.
1553   const MachineFrameInfo &MFI = MF.getFrameInfo();
1554 
1555   // Get the number of bytes allocated from the FrameInfo.
1556   int64_t FrameSize = MFI.getStackSize();
1557 
1558   // Get processor type.
1559   bool isPPC64 = Subtarget.isPPC64();
1560 
1561   // Check if the link register (LR) has been saved.
1562   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1563   bool MustSaveLR = FI->mustSaveLR();
1564   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1565   bool MustSaveCR = !MustSaveCRs.empty();
1566   // Do we have a frame pointer and/or base pointer for this function?
1567   bool HasFP = hasFP(MF);
1568   bool HasBP = RegInfo->hasBasePointer(MF);
1569   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1570   bool HasROPProtect = Subtarget.hasROPProtect();
1571   bool HasPrivileged = Subtarget.hasPrivileged();
1572 
1573   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1574   Register BPReg = RegInfo->getBaseRegister(MF);
1575   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1576   Register ScratchReg;
1577   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1578   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1579                                                  : PPC::MTLR );
1580   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1581                                                  : PPC::LWZ );
1582   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1583                                                            : PPC::LIS );
1584   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1585                                               : PPC::OR );
1586   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1587                                                   : PPC::ORI );
1588   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1589                                                    : PPC::ADDI );
1590   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1591                                                 : PPC::ADD4 );
1592   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1593                                                      : PPC::LWZ);
1594   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1595                                                      : PPC::MTOCRF);
1596   const MCInstrDesc &HashChk =
1597       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1598                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1599   int64_t LROffset = getReturnSaveOffset();
1600 
1601   int64_t FPOffset = 0;
1602 
1603   // Using the same bool variable as below to suppress compiler warnings.
1604   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1605                                               &TempReg);
1606   assert(SingleScratchReg &&
1607          "Could not find an available scratch register");
1608 
1609   SingleScratchReg = ScratchReg == TempReg;
1610 
1611   if (HasFP) {
1612     int FPIndex = FI->getFramePointerSaveIndex();
1613     assert(FPIndex && "No Frame Pointer Save Slot!");
1614     FPOffset = MFI.getObjectOffset(FPIndex);
1615   }
1616 
1617   int64_t BPOffset = 0;
1618   if (HasBP) {
1619       int BPIndex = FI->getBasePointerSaveIndex();
1620       assert(BPIndex && "No Base Pointer Save Slot!");
1621       BPOffset = MFI.getObjectOffset(BPIndex);
1622   }
1623 
1624   int64_t PBPOffset = 0;
1625   if (FI->usesPICBase()) {
1626     int PBPIndex = FI->getPICBasePointerSaveIndex();
1627     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1628     PBPOffset = MFI.getObjectOffset(PBPIndex);
1629   }
1630 
1631   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1632 
1633   if (IsReturnBlock) {
1634     unsigned RetOpcode = MBBI->getOpcode();
1635     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1636                       RetOpcode == PPC::TCRETURNdi ||
1637                       RetOpcode == PPC::TCRETURNai ||
1638                       RetOpcode == PPC::TCRETURNri8 ||
1639                       RetOpcode == PPC::TCRETURNdi8 ||
1640                       RetOpcode == PPC::TCRETURNai8;
1641 
1642     if (UsesTCRet) {
1643       int MaxTCRetDelta = FI->getTailCallSPDelta();
1644       MachineOperand &StackAdjust = MBBI->getOperand(1);
1645       assert(StackAdjust.isImm() && "Expecting immediate value.");
1646       // Adjust stack pointer.
1647       int StackAdj = StackAdjust.getImm();
1648       int Delta = StackAdj - MaxTCRetDelta;
1649       assert((Delta >= 0) && "Delta must be positive");
1650       if (MaxTCRetDelta>0)
1651         FrameSize += (StackAdj +Delta);
1652       else
1653         FrameSize += StackAdj;
1654     }
1655   }
1656 
1657   // Frames of 32KB & larger require special handling because they cannot be
1658   // indexed into with a simple LD/LWZ immediate offset operand.
1659   bool isLargeFrame = !isInt<16>(FrameSize);
1660 
1661   // On targets without red zone, the SP needs to be restored last, so that
1662   // all live contents of the stack frame are upwards of the SP. This means
1663   // that we cannot restore SP just now, since there may be more registers
1664   // to restore from the stack frame (e.g. R31). If the frame size is not
1665   // a simple immediate value, we will need a spare register to hold the
1666   // restored SP. If the frame size is known and small, we can simply adjust
1667   // the offsets of the registers to be restored, and still use SP to restore
1668   // them. In such case, the final update of SP will be to add the frame
1669   // size to it.
1670   // To simplify the code, set RBReg to the base register used to restore
1671   // values from the stack, and set SPAdd to the value that needs to be added
1672   // to the SP at the end. The default values are as if red zone was present.
1673   unsigned RBReg = SPReg;
1674   uint64_t SPAdd = 0;
1675 
1676   // Check if we can move the stack update instruction up the epilogue
1677   // past the callee saves. This will allow the move to LR instruction
1678   // to be executed before the restores of the callee saves which means
1679   // that the callee saves can hide the latency from the MTLR instrcution.
1680   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1681   if (stackUpdateCanBeMoved(MF)) {
1682     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1683     for (CalleeSavedInfo CSI : Info) {
1684       // If the callee saved register is spilled to another register abort the
1685       // stack update movement.
1686       if (CSI.isSpilledToReg()) {
1687         StackUpdateLoc = MBBI;
1688         break;
1689       }
1690       int FrIdx = CSI.getFrameIdx();
1691       // If the frame index is not negative the callee saved info belongs to a
1692       // stack object that is not a fixed stack object. We ignore non-fixed
1693       // stack objects because we won't move the update of the stack pointer
1694       // past them.
1695       if (FrIdx >= 0)
1696         continue;
1697 
1698       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1699         StackUpdateLoc--;
1700       else {
1701         // Abort the operation as we can't update all CSR restores.
1702         StackUpdateLoc = MBBI;
1703         break;
1704       }
1705     }
1706   }
1707 
1708   if (FrameSize) {
1709     // In the prologue, the loaded (or persistent) stack pointer value is
1710     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1711     // zone add this offset back now.
1712 
1713     // If the function has a base pointer, the stack pointer has been copied
1714     // to it so we can restore it by copying in the other direction.
1715     if (HasRedZone && HasBP) {
1716       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1717         addReg(BPReg).
1718         addReg(BPReg);
1719     }
1720     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1721     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1722     // call which invalidates the stack pointer value in SP(0). So we use the
1723     // value of R31 in this case. Similar situation exists with setjmp.
1724     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1725       assert(HasFP && "Expecting a valid frame pointer.");
1726       if (!HasRedZone)
1727         RBReg = FPReg;
1728       if (!isLargeFrame) {
1729         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1730           .addReg(FPReg).addImm(FrameSize);
1731       } else {
1732         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1733         BuildMI(MBB, MBBI, dl, AddInst)
1734           .addReg(RBReg)
1735           .addReg(FPReg)
1736           .addReg(ScratchReg);
1737       }
1738     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1739       if (HasRedZone) {
1740         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1741           .addReg(SPReg)
1742           .addImm(FrameSize);
1743       } else {
1744         // Make sure that adding FrameSize will not overflow the max offset
1745         // size.
1746         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1747                "Local offsets should be negative");
1748         SPAdd = FrameSize;
1749         FPOffset += FrameSize;
1750         BPOffset += FrameSize;
1751         PBPOffset += FrameSize;
1752       }
1753     } else {
1754       // We don't want to use ScratchReg as a base register, because it
1755       // could happen to be R0. Use FP instead, but make sure to preserve it.
1756       if (!HasRedZone) {
1757         // If FP is not saved, copy it to ScratchReg.
1758         if (!HasFP)
1759           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1760             .addReg(FPReg)
1761             .addReg(FPReg);
1762         RBReg = FPReg;
1763       }
1764       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1765         .addImm(0)
1766         .addReg(SPReg);
1767     }
1768   }
1769   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1770   // If there is no red zone, ScratchReg may be needed for holding a useful
1771   // value (although not the base register). Make sure it is not overwritten
1772   // too early.
1773 
1774   // If we need to restore both the LR and the CR and we only have one
1775   // available scratch register, we must do them one at a time.
1776   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1777     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1778     // is live here.
1779     assert(HasRedZone && "Expecting red zone");
1780     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1781       .addImm(CRSaveOffset)
1782       .addReg(SPReg);
1783     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1784       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1785         .addReg(TempReg, getKillRegState(i == e-1));
1786   }
1787 
1788   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1789   // LR is stored in the caller's stack frame. ScratchReg will be needed
1790   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1791   // a base register anyway, because it may happen to be R0.
1792   bool LoadedLR = false;
1793   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1794     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1795       .addImm(LROffset+SPAdd)
1796       .addReg(RBReg);
1797     LoadedLR = true;
1798   }
1799 
1800   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1801     assert(RBReg == SPReg && "Should be using SP as a base register");
1802     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1803       .addImm(CRSaveOffset)
1804       .addReg(RBReg);
1805   }
1806 
1807   if (HasFP) {
1808     // If there is red zone, restore FP directly, since SP has already been
1809     // restored. Otherwise, restore the value of FP into ScratchReg.
1810     if (HasRedZone || RBReg == SPReg)
1811       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1812         .addImm(FPOffset)
1813         .addReg(SPReg);
1814     else
1815       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1816         .addImm(FPOffset)
1817         .addReg(RBReg);
1818   }
1819 
1820   if (FI->usesPICBase())
1821     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1822       .addImm(PBPOffset)
1823       .addReg(RBReg);
1824 
1825   if (HasBP)
1826     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1827       .addImm(BPOffset)
1828       .addReg(RBReg);
1829 
1830   // There is nothing more to be loaded from the stack, so now we can
1831   // restore SP: SP = RBReg + SPAdd.
1832   if (RBReg != SPReg || SPAdd != 0) {
1833     assert(!HasRedZone && "This should not happen with red zone");
1834     // If SPAdd is 0, generate a copy.
1835     if (SPAdd == 0)
1836       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1837         .addReg(RBReg)
1838         .addReg(RBReg);
1839     else
1840       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1841         .addReg(RBReg)
1842         .addImm(SPAdd);
1843 
1844     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1845     if (RBReg == FPReg)
1846       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1847         .addReg(ScratchReg)
1848         .addReg(ScratchReg);
1849 
1850     // Now load the LR from the caller's stack frame.
1851     if (MustSaveLR && !LoadedLR)
1852       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1853         .addImm(LROffset)
1854         .addReg(SPReg);
1855   }
1856 
1857   if (MustSaveCR &&
1858       !(SingleScratchReg && MustSaveLR))
1859     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1860       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1861         .addReg(TempReg, getKillRegState(i == e-1));
1862 
1863   if (MustSaveLR) {
1864     // If ROP protection is required, an extra instruction is added to compute a
1865     // hash and then compare it to the hash stored in the prologue.
1866     if (HasROPProtect) {
1867       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1868       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1869       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1870              "ROP hash check location offset out of range.");
1871       assert(((ImmOffset & 0x7) == 0) &&
1872              "ROP hash check location offset must be 8 byte aligned.");
1873       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1874           .addReg(ScratchReg)
1875           .addImm(ImmOffset)
1876           .addReg(SPReg);
1877     }
1878     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1879   }
1880 
1881   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1882   // call optimization
1883   if (IsReturnBlock) {
1884     unsigned RetOpcode = MBBI->getOpcode();
1885     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1886         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1887         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1888       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1889       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1890 
1891       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1892         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1893           .addReg(SPReg).addImm(CallerAllocatedAmt);
1894       } else {
1895         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1896           .addImm(CallerAllocatedAmt >> 16);
1897         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1898           .addReg(ScratchReg, RegState::Kill)
1899           .addImm(CallerAllocatedAmt & 0xFFFF);
1900         BuildMI(MBB, MBBI, dl, AddInst)
1901           .addReg(SPReg)
1902           .addReg(FPReg)
1903           .addReg(ScratchReg);
1904       }
1905     } else {
1906       createTailCallBranchInstr(MBB);
1907     }
1908   }
1909 }
1910 
1911 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1912   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1913 
1914   // If we got this far a first terminator should exist.
1915   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1916 
1917   DebugLoc dl = MBBI->getDebugLoc();
1918   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1919 
1920   // Create branch instruction for pseudo tail call return instruction.
1921   // The TCRETURNdi variants are direct calls. Valid targets for those are
1922   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1923   // since we can tail call external functions with PC-Rel (i.e. we don't need
1924   // to worry about different TOC pointers). Some of the external functions will
1925   // be MO_GlobalAddress while others like memcpy for example, are going to
1926   // be MO_ExternalSymbol.
1927   unsigned RetOpcode = MBBI->getOpcode();
1928   if (RetOpcode == PPC::TCRETURNdi) {
1929     MBBI = MBB.getLastNonDebugInstr();
1930     MachineOperand &JumpTarget = MBBI->getOperand(0);
1931     if (JumpTarget.isGlobal())
1932       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1933         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1934     else if (JumpTarget.isSymbol())
1935       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1936         addExternalSymbol(JumpTarget.getSymbolName());
1937     else
1938       llvm_unreachable("Expecting Global or External Symbol");
1939   } else if (RetOpcode == PPC::TCRETURNri) {
1940     MBBI = MBB.getLastNonDebugInstr();
1941     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1942     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1943   } else if (RetOpcode == PPC::TCRETURNai) {
1944     MBBI = MBB.getLastNonDebugInstr();
1945     MachineOperand &JumpTarget = MBBI->getOperand(0);
1946     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1947   } else if (RetOpcode == PPC::TCRETURNdi8) {
1948     MBBI = MBB.getLastNonDebugInstr();
1949     MachineOperand &JumpTarget = MBBI->getOperand(0);
1950     if (JumpTarget.isGlobal())
1951       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1952         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1953     else if (JumpTarget.isSymbol())
1954       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1955         addExternalSymbol(JumpTarget.getSymbolName());
1956     else
1957       llvm_unreachable("Expecting Global or External Symbol");
1958   } else if (RetOpcode == PPC::TCRETURNri8) {
1959     MBBI = MBB.getLastNonDebugInstr();
1960     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1961     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1962   } else if (RetOpcode == PPC::TCRETURNai8) {
1963     MBBI = MBB.getLastNonDebugInstr();
1964     MachineOperand &JumpTarget = MBBI->getOperand(0);
1965     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1966   }
1967 }
1968 
1969 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1970                                             BitVector &SavedRegs,
1971                                             RegScavenger *RS) const {
1972   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1973   if (Subtarget.isAIXABI())
1974     updateCalleeSaves(MF, SavedRegs);
1975 
1976   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1977 
1978   // Do not explicitly save the callee saved VSRp registers.
1979   // The individual VSR subregisters will be saved instead.
1980   SavedRegs.reset(PPC::VSRp26);
1981   SavedRegs.reset(PPC::VSRp27);
1982   SavedRegs.reset(PPC::VSRp28);
1983   SavedRegs.reset(PPC::VSRp29);
1984   SavedRegs.reset(PPC::VSRp30);
1985   SavedRegs.reset(PPC::VSRp31);
1986 
1987   //  Save and clear the LR state.
1988   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1989   unsigned LR = RegInfo->getRARegister();
1990   FI->setMustSaveLR(MustSaveLR(MF, LR));
1991   SavedRegs.reset(LR);
1992 
1993   //  Save R31 if necessary
1994   int FPSI = FI->getFramePointerSaveIndex();
1995   const bool isPPC64 = Subtarget.isPPC64();
1996   MachineFrameInfo &MFI = MF.getFrameInfo();
1997 
1998   // If the frame pointer save index hasn't been defined yet.
1999   if (!FPSI && needsFP(MF)) {
2000     // Find out what the fix offset of the frame pointer save area.
2001     int FPOffset = getFramePointerSaveOffset();
2002     // Allocate the frame index for frame pointer save area.
2003     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2004     // Save the result.
2005     FI->setFramePointerSaveIndex(FPSI);
2006   }
2007 
2008   int BPSI = FI->getBasePointerSaveIndex();
2009   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2010     int BPOffset = getBasePointerSaveOffset();
2011     // Allocate the frame index for the base pointer save area.
2012     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2013     // Save the result.
2014     FI->setBasePointerSaveIndex(BPSI);
2015   }
2016 
2017   // Reserve stack space for the PIC Base register (R30).
2018   // Only used in SVR4 32-bit.
2019   if (FI->usesPICBase()) {
2020     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2021     FI->setPICBasePointerSaveIndex(PBPSI);
2022   }
2023 
2024   // Make sure we don't explicitly spill r31, because, for example, we have
2025   // some inline asm which explicitly clobbers it, when we otherwise have a
2026   // frame pointer and are using r31's spill slot for the prologue/epilogue
2027   // code. Same goes for the base pointer and the PIC base register.
2028   if (needsFP(MF))
2029     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2030   if (RegInfo->hasBasePointer(MF))
2031     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2032   if (FI->usesPICBase())
2033     SavedRegs.reset(PPC::R30);
2034 
2035   // Reserve stack space to move the linkage area to in case of a tail call.
2036   int TCSPDelta = 0;
2037   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2038       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2039     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2040   }
2041 
2042   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2043   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2044   // object at the offset of the CR-save slot in the linkage area. The actual
2045   // save and restore of the condition register will be created as part of the
2046   // prologue and epilogue insertion, but the FixedStack object is needed to
2047   // keep the CalleSavedInfo valid.
2048   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2049        SavedRegs.test(PPC::CR4))) {
2050     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2051     const int64_t SpillOffset =
2052         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2053     int FrameIdx =
2054         MFI.CreateFixedObject(SpillSize, SpillOffset,
2055                               /* IsImmutable */ true, /* IsAliased */ false);
2056     FI->setCRSpillFrameIndex(FrameIdx);
2057   }
2058 }
2059 
2060 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2061                                                        RegScavenger *RS) const {
2062   // Get callee saved register information.
2063   MachineFrameInfo &MFI = MF.getFrameInfo();
2064   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2065 
2066   // If the function is shrink-wrapped, and if the function has a tail call, the
2067   // tail call might not be in the new RestoreBlock, so real branch instruction
2068   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2069   // RestoreBlock. So we handle this case here.
2070   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2071     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2072     for (MachineBasicBlock &MBB : MF) {
2073       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2074         createTailCallBranchInstr(MBB);
2075     }
2076   }
2077 
2078   // Early exit if no callee saved registers are modified!
2079   if (CSI.empty() && !needsFP(MF)) {
2080     addScavengingSpillSlot(MF, RS);
2081     return;
2082   }
2083 
2084   unsigned MinGPR = PPC::R31;
2085   unsigned MinG8R = PPC::X31;
2086   unsigned MinFPR = PPC::F31;
2087   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2088 
2089   bool HasGPSaveArea = false;
2090   bool HasG8SaveArea = false;
2091   bool HasFPSaveArea = false;
2092   bool HasVRSaveArea = false;
2093 
2094   SmallVector<CalleeSavedInfo, 18> GPRegs;
2095   SmallVector<CalleeSavedInfo, 18> G8Regs;
2096   SmallVector<CalleeSavedInfo, 18> FPRegs;
2097   SmallVector<CalleeSavedInfo, 18> VRegs;
2098 
2099   for (const CalleeSavedInfo &I : CSI) {
2100     Register Reg = I.getReg();
2101     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2102             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2103            "Not expecting to try to spill R2 in a function that must save TOC");
2104     if (PPC::GPRCRegClass.contains(Reg)) {
2105       HasGPSaveArea = true;
2106 
2107       GPRegs.push_back(I);
2108 
2109       if (Reg < MinGPR) {
2110         MinGPR = Reg;
2111       }
2112     } else if (PPC::G8RCRegClass.contains(Reg)) {
2113       HasG8SaveArea = true;
2114 
2115       G8Regs.push_back(I);
2116 
2117       if (Reg < MinG8R) {
2118         MinG8R = Reg;
2119       }
2120     } else if (PPC::F8RCRegClass.contains(Reg)) {
2121       HasFPSaveArea = true;
2122 
2123       FPRegs.push_back(I);
2124 
2125       if (Reg < MinFPR) {
2126         MinFPR = Reg;
2127       }
2128     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2129                PPC::CRRCRegClass.contains(Reg)) {
2130       ; // do nothing, as we already know whether CRs are spilled
2131     } else if (PPC::VRRCRegClass.contains(Reg) ||
2132                PPC::SPERCRegClass.contains(Reg)) {
2133       // Altivec and SPE are mutually exclusive, but have the same stack
2134       // alignment requirements, so overload the save area for both cases.
2135       HasVRSaveArea = true;
2136 
2137       VRegs.push_back(I);
2138 
2139       if (Reg < MinVR) {
2140         MinVR = Reg;
2141       }
2142     } else {
2143       llvm_unreachable("Unknown RegisterClass!");
2144     }
2145   }
2146 
2147   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2148   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2149 
2150   int64_t LowerBound = 0;
2151 
2152   // Take into account stack space reserved for tail calls.
2153   int TCSPDelta = 0;
2154   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2155       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2156     LowerBound = TCSPDelta;
2157   }
2158 
2159   // The Floating-point register save area is right below the back chain word
2160   // of the previous stack frame.
2161   if (HasFPSaveArea) {
2162     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2163       int FI = FPRegs[i].getFrameIdx();
2164 
2165       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2166     }
2167 
2168     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2169   }
2170 
2171   // Check whether the frame pointer register is allocated. If so, make sure it
2172   // is spilled to the correct offset.
2173   if (needsFP(MF)) {
2174     int FI = PFI->getFramePointerSaveIndex();
2175     assert(FI && "No Frame Pointer Save Slot!");
2176     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2177     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2178     HasGPSaveArea = true;
2179   }
2180 
2181   if (PFI->usesPICBase()) {
2182     int FI = PFI->getPICBasePointerSaveIndex();
2183     assert(FI && "No PIC Base Pointer Save Slot!");
2184     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2185 
2186     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2187     HasGPSaveArea = true;
2188   }
2189 
2190   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2191   if (RegInfo->hasBasePointer(MF)) {
2192     int FI = PFI->getBasePointerSaveIndex();
2193     assert(FI && "No Base Pointer Save Slot!");
2194     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2195 
2196     Register BP = RegInfo->getBaseRegister(MF);
2197     if (PPC::G8RCRegClass.contains(BP)) {
2198       MinG8R = std::min<unsigned>(MinG8R, BP);
2199       HasG8SaveArea = true;
2200     } else if (PPC::GPRCRegClass.contains(BP)) {
2201       MinGPR = std::min<unsigned>(MinGPR, BP);
2202       HasGPSaveArea = true;
2203     }
2204   }
2205 
2206   // General register save area starts right below the Floating-point
2207   // register save area.
2208   if (HasGPSaveArea || HasG8SaveArea) {
2209     // Move general register save area spill slots down, taking into account
2210     // the size of the Floating-point register save area.
2211     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2212       if (!GPRegs[i].isSpilledToReg()) {
2213         int FI = GPRegs[i].getFrameIdx();
2214         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2215       }
2216     }
2217 
2218     // Move general register save area spill slots down, taking into account
2219     // the size of the Floating-point register save area.
2220     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2221       if (!G8Regs[i].isSpilledToReg()) {
2222         int FI = G8Regs[i].getFrameIdx();
2223         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2224       }
2225     }
2226 
2227     unsigned MinReg =
2228       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2229                          TRI->getEncodingValue(MinG8R));
2230 
2231     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2232     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2233   }
2234 
2235   // For 32-bit only, the CR save area is below the general register
2236   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2237   // to the stack pointer and hence does not need an adjustment here.
2238   // Only CR2 (the first nonvolatile spilled) has an associated frame
2239   // index so that we have a single uniform save area.
2240   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2241     // Adjust the frame index of the CR spill slot.
2242     for (const auto &CSInfo : CSI) {
2243       if (CSInfo.getReg() == PPC::CR2) {
2244         int FI = CSInfo.getFrameIdx();
2245         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2246         break;
2247       }
2248     }
2249 
2250     LowerBound -= 4; // The CR save area is always 4 bytes long.
2251   }
2252 
2253   // Both Altivec and SPE have the same alignment and padding requirements
2254   // within the stack frame.
2255   if (HasVRSaveArea) {
2256     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2257     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2258     // we are using negative number here (the stack grows downward). We should
2259     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2260     // is the alignment size ( n = 16 here) and y is the size after aligning.
2261     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2262     LowerBound &= ~(15);
2263 
2264     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2265       int FI = VRegs[i].getFrameIdx();
2266 
2267       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2268     }
2269   }
2270 
2271   addScavengingSpillSlot(MF, RS);
2272 }
2273 
2274 void
2275 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2276                                          RegScavenger *RS) const {
2277   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2278   // a large stack, which will require scavenging a register to materialize a
2279   // large offset.
2280 
2281   // We need to have a scavenger spill slot for spills if the frame size is
2282   // large. In case there is no free register for large-offset addressing,
2283   // this slot is used for the necessary emergency spill. Also, we need the
2284   // slot for dynamic stack allocations.
2285 
2286   // The scavenger might be invoked if the frame offset does not fit into
2287   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2288   // We don't know the complete frame size here because we've not yet computed
2289   // callee-saved register spills or the needed alignment padding.
2290   unsigned StackSize = determineFrameLayout(MF, true);
2291   MachineFrameInfo &MFI = MF.getFrameInfo();
2292   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2293 
2294   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2295       (hasSpills(MF) && NeedSpills)) {
2296     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2297     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2298     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2299     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2300     unsigned Size = TRI.getSpillSize(RC);
2301     Align Alignment = TRI.getSpillAlign(RC);
2302     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2303 
2304     // Might we have over-aligned allocas?
2305     bool HasAlVars =
2306         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2307 
2308     // These kinds of spills might need two registers.
2309     if (spillsCR(MF) || HasAlVars)
2310       RS->addScavengingFrameIndex(
2311           MFI.CreateStackObject(Size, Alignment, false));
2312   }
2313 }
2314 
2315 // This function checks if a callee saved gpr can be spilled to a volatile
2316 // vector register. This occurs for leaf functions when the option
2317 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2318 // which were not spilled to vectors, return false so the target independent
2319 // code can handle them by assigning a FrameIdx to a stack slot.
2320 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2321     MachineFunction &MF, const TargetRegisterInfo *TRI,
2322     std::vector<CalleeSavedInfo> &CSI) const {
2323 
2324   if (CSI.empty())
2325     return true; // Early exit if no callee saved registers are modified!
2326 
2327   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2328   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2329   const MachineRegisterInfo &MRI = MF.getRegInfo();
2330 
2331   if (Subtarget.hasSPE()) {
2332     // In case of SPE we only have SuperRegs and CRs
2333     // in our CalleSaveInfo vector.
2334 
2335     for (auto &CalleeSaveReg : CSI) {
2336       MCPhysReg Reg = CalleeSaveReg.getReg();
2337       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2338       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2339 
2340       if ( // Check only for SuperRegs.
2341           Lower &&
2342           // Replace Reg if only lower-32 bits modified
2343           !MRI.isPhysRegModified(Higher))
2344         CalleeSaveReg = CalleeSavedInfo(Lower);
2345     }
2346   }
2347 
2348   // Early exit if cannot spill gprs to volatile vector registers.
2349   MachineFrameInfo &MFI = MF.getFrameInfo();
2350   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2351     return false;
2352 
2353   // Build a BitVector of VSRs that can be used for spilling GPRs.
2354   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2355   BitVector BVCalleeSaved(TRI->getNumRegs());
2356   for (unsigned i = 0; CSRegs[i]; ++i)
2357     BVCalleeSaved.set(CSRegs[i]);
2358 
2359   for (unsigned Reg : BVAllocatable.set_bits()) {
2360     // Set to 0 if the register is not a volatile VSX register, or if it is
2361     // used in the function.
2362     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2363         MRI.isPhysRegUsed(Reg))
2364       BVAllocatable.reset(Reg);
2365   }
2366 
2367   bool AllSpilledToReg = true;
2368   unsigned LastVSRUsedForSpill = 0;
2369   for (auto &CS : CSI) {
2370     if (BVAllocatable.none())
2371       return false;
2372 
2373     Register Reg = CS.getReg();
2374 
2375     if (!PPC::G8RCRegClass.contains(Reg)) {
2376       AllSpilledToReg = false;
2377       continue;
2378     }
2379 
2380     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2381     // into one VSR using the mtvsrdd instruction.
2382     if (LastVSRUsedForSpill != 0) {
2383       CS.setDstReg(LastVSRUsedForSpill);
2384       BVAllocatable.reset(LastVSRUsedForSpill);
2385       LastVSRUsedForSpill = 0;
2386       continue;
2387     }
2388 
2389     unsigned VolatileVFReg = BVAllocatable.find_first();
2390     if (VolatileVFReg < BVAllocatable.size()) {
2391       CS.setDstReg(VolatileVFReg);
2392       LastVSRUsedForSpill = VolatileVFReg;
2393     } else {
2394       AllSpilledToReg = false;
2395     }
2396   }
2397   return AllSpilledToReg;
2398 }
2399 
2400 bool PPCFrameLowering::spillCalleeSavedRegisters(
2401     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2402     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2403 
2404   MachineFunction *MF = MBB.getParent();
2405   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2406   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2407   bool MustSaveTOC = FI->mustSaveTOC();
2408   DebugLoc DL;
2409   bool CRSpilled = false;
2410   MachineInstrBuilder CRMIB;
2411   BitVector Spilled(TRI->getNumRegs());
2412 
2413   VSRContainingGPRs.clear();
2414 
2415   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2416   // or two GPRs, so we need table to record information for later save/restore.
2417   for (const CalleeSavedInfo &Info : CSI) {
2418     if (Info.isSpilledToReg()) {
2419       auto &SpilledVSR =
2420           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2421       assert(SpilledVSR.second == 0 &&
2422              "Can't spill more than two GPRs into VSR!");
2423       if (SpilledVSR.first == 0)
2424         SpilledVSR.first = Info.getReg();
2425       else
2426         SpilledVSR.second = Info.getReg();
2427     }
2428   }
2429 
2430   for (const CalleeSavedInfo &I : CSI) {
2431     Register Reg = I.getReg();
2432 
2433     // CR2 through CR4 are the nonvolatile CR fields.
2434     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2435 
2436     // Add the callee-saved register as live-in; it's killed at the spill.
2437     // Do not do this for callee-saved registers that are live-in to the
2438     // function because they will already be marked live-in and this will be
2439     // adding it for a second time. It is an error to add the same register
2440     // to the set more than once.
2441     const MachineRegisterInfo &MRI = MF->getRegInfo();
2442     bool IsLiveIn = MRI.isLiveIn(Reg);
2443     if (!IsLiveIn)
2444        MBB.addLiveIn(Reg);
2445 
2446     if (CRSpilled && IsCRField) {
2447       CRMIB.addReg(Reg, RegState::ImplicitKill);
2448       continue;
2449     }
2450 
2451     // The actual spill will happen in the prologue.
2452     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2453       continue;
2454 
2455     // Insert the spill to the stack frame.
2456     if (IsCRField) {
2457       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2458       if (!Subtarget.is32BitELFABI()) {
2459         // The actual spill will happen at the start of the prologue.
2460         FuncInfo->addMustSaveCR(Reg);
2461       } else {
2462         CRSpilled = true;
2463         FuncInfo->setSpillsCR();
2464 
2465         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2466         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2467         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2468                   .addReg(Reg, RegState::ImplicitKill);
2469 
2470         MBB.insert(MI, CRMIB);
2471         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2472                                          .addReg(PPC::R12,
2473                                                  getKillRegState(true)),
2474                                          I.getFrameIdx()));
2475       }
2476     } else {
2477       if (I.isSpilledToReg()) {
2478         unsigned Dst = I.getDstReg();
2479 
2480         if (Spilled[Dst])
2481           continue;
2482 
2483         if (VSRContainingGPRs[Dst].second != 0) {
2484           assert(Subtarget.hasP9Vector() &&
2485                  "mtvsrdd is unavailable on pre-P9 targets.");
2486 
2487           NumPESpillVSR += 2;
2488           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2489               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2490               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2491         } else if (VSRContainingGPRs[Dst].second == 0) {
2492           assert(Subtarget.hasP8Vector() &&
2493                  "Can't move GPR to VSR on pre-P8 targets.");
2494 
2495           ++NumPESpillVSR;
2496           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2497                   TRI->getSubReg(Dst, PPC::sub_64))
2498               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2499         } else {
2500           llvm_unreachable("More than two GPRs spilled to a VSR!");
2501         }
2502         Spilled.set(Dst);
2503       } else {
2504         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2505         // Use !IsLiveIn for the kill flag.
2506         // We do not want to kill registers that are live in this function
2507         // before their use because they will become undefined registers.
2508         // Functions without NoUnwind need to preserve the order of elements in
2509         // saved vector registers.
2510         if (Subtarget.needsSwapsForVSXMemOps() &&
2511             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2512           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2513                                        I.getFrameIdx(), RC, TRI);
2514         else
2515           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2516                                   TRI, Register());
2517       }
2518     }
2519   }
2520   return true;
2521 }
2522 
2523 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2524                        bool CR4Spilled, MachineBasicBlock &MBB,
2525                        MachineBasicBlock::iterator MI,
2526                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2527 
2528   MachineFunction *MF = MBB.getParent();
2529   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2530   DebugLoc DL;
2531   unsigned MoveReg = PPC::R12;
2532 
2533   // 32-bit:  FP-relative
2534   MBB.insert(MI,
2535              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2536                                CSI[CSIIndex].getFrameIdx()));
2537 
2538   unsigned RestoreOp = PPC::MTOCRF;
2539   if (CR2Spilled)
2540     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2541                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2542 
2543   if (CR3Spilled)
2544     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2545                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2546 
2547   if (CR4Spilled)
2548     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2549                .addReg(MoveReg, getKillRegState(true)));
2550 }
2551 
2552 MachineBasicBlock::iterator PPCFrameLowering::
2553 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2554                               MachineBasicBlock::iterator I) const {
2555   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2556   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2557       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2558     // Add (actually subtract) back the amount the callee popped on return.
2559     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2560       bool is64Bit = Subtarget.isPPC64();
2561       CalleeAmt *= -1;
2562       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2563       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2564       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2565       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2566       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2567       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2568       const DebugLoc &dl = I->getDebugLoc();
2569 
2570       if (isInt<16>(CalleeAmt)) {
2571         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2572           .addReg(StackReg, RegState::Kill)
2573           .addImm(CalleeAmt);
2574       } else {
2575         MachineBasicBlock::iterator MBBI = I;
2576         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2577           .addImm(CalleeAmt >> 16);
2578         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2579           .addReg(TmpReg, RegState::Kill)
2580           .addImm(CalleeAmt & 0xFFFF);
2581         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2582           .addReg(StackReg, RegState::Kill)
2583           .addReg(TmpReg);
2584       }
2585     }
2586   }
2587   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2588   return MBB.erase(I);
2589 }
2590 
2591 static bool isCalleeSavedCR(unsigned Reg) {
2592   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2593 }
2594 
2595 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2596     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2597     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2598   MachineFunction *MF = MBB.getParent();
2599   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2600   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2601   bool MustSaveTOC = FI->mustSaveTOC();
2602   bool CR2Spilled = false;
2603   bool CR3Spilled = false;
2604   bool CR4Spilled = false;
2605   unsigned CSIIndex = 0;
2606   BitVector Restored(TRI->getNumRegs());
2607 
2608   // Initialize insertion-point logic; we will be restoring in reverse
2609   // order of spill.
2610   MachineBasicBlock::iterator I = MI, BeforeI = I;
2611   bool AtStart = I == MBB.begin();
2612 
2613   if (!AtStart)
2614     --BeforeI;
2615 
2616   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2617     Register Reg = CSI[i].getReg();
2618 
2619     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2620       continue;
2621 
2622     // Restore of callee saved condition register field is handled during
2623     // epilogue insertion.
2624     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2625       continue;
2626 
2627     if (Reg == PPC::CR2) {
2628       CR2Spilled = true;
2629       // The spill slot is associated only with CR2, which is the
2630       // first nonvolatile spilled.  Save it here.
2631       CSIIndex = i;
2632       continue;
2633     } else if (Reg == PPC::CR3) {
2634       CR3Spilled = true;
2635       continue;
2636     } else if (Reg == PPC::CR4) {
2637       CR4Spilled = true;
2638       continue;
2639     } else {
2640       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2641       // least one CR register, restore all spilled CRs together.
2642       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2643         bool is31 = needsFP(*MF);
2644         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2645                    CSIIndex);
2646         CR2Spilled = CR3Spilled = CR4Spilled = false;
2647       }
2648 
2649       if (CSI[i].isSpilledToReg()) {
2650         DebugLoc DL;
2651         unsigned Dst = CSI[i].getDstReg();
2652 
2653         if (Restored[Dst])
2654           continue;
2655 
2656         if (VSRContainingGPRs[Dst].second != 0) {
2657           assert(Subtarget.hasP9Vector());
2658           NumPEReloadVSR += 2;
2659           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2660                   VSRContainingGPRs[Dst].second)
2661               .addReg(Dst);
2662           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2663                   VSRContainingGPRs[Dst].first)
2664               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2665         } else if (VSRContainingGPRs[Dst].second == 0) {
2666           assert(Subtarget.hasP8Vector());
2667           ++NumPEReloadVSR;
2668           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2669                   VSRContainingGPRs[Dst].first)
2670               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2671         } else {
2672           llvm_unreachable("More than two GPRs spilled to a VSR!");
2673         }
2674 
2675         Restored.set(Dst);
2676 
2677       } else {
2678         // Default behavior for non-CR saves.
2679         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2680 
2681         // Functions without NoUnwind need to preserve the order of elements in
2682         // saved vector registers.
2683         if (Subtarget.needsSwapsForVSXMemOps() &&
2684             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2685           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2686                                         TRI);
2687         else
2688           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2689                                    Register());
2690 
2691         assert(I != MBB.begin() &&
2692                "loadRegFromStackSlot didn't insert any code!");
2693       }
2694     }
2695 
2696     // Insert in reverse order.
2697     if (AtStart)
2698       I = MBB.begin();
2699     else {
2700       I = BeforeI;
2701       ++I;
2702     }
2703   }
2704 
2705   // If we haven't yet spilled the CRs, do so now.
2706   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2707     assert(Subtarget.is32BitELFABI() &&
2708            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2709     bool is31 = needsFP(*MF);
2710     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2711   }
2712 
2713   return true;
2714 }
2715 
2716 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2717   return TOCSaveOffset;
2718 }
2719 
2720 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2721   return FramePointerSaveOffset;
2722 }
2723 
2724 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2725   return BasePointerSaveOffset;
2726 }
2727 
2728 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2729   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2730     return false;
2731   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2732 }
2733 
2734 void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF,
2735                                          BitVector &SavedRegs) const {
2736   // The AIX ABI uses traceback tables for EH which require that if callee-saved
2737   // register N is used, all registers N-31 must be saved/restored.
2738   // NOTE: The check for AIX is not actually what is relevant. Traceback tables
2739   // on Linux have the same requirements. It is just that AIX is the only ABI
2740   // for which we actually use traceback tables. If another ABI needs to be
2741   // supported that also uses them, we can add a check such as
2742   // Subtarget.usesTraceBackTables().
2743   assert(Subtarget.isAIXABI() &&
2744          "Function updateCalleeSaves should only be called for AIX.");
2745 
2746   // If there are no callee saves then there is nothing to do.
2747   if (SavedRegs.none())
2748     return;
2749 
2750   const MCPhysReg *CSRegs =
2751       Subtarget.getRegisterInfo()->getCalleeSavedRegs(&MF);
2752   MCPhysReg LowestGPR = PPC::R31;
2753   MCPhysReg LowestG8R = PPC::X31;
2754   MCPhysReg LowestFPR = PPC::F31;
2755   MCPhysReg LowestVR = PPC::V31;
2756 
2757   // Traverse the CSRs twice so as not to rely on ascending ordering of
2758   // registers in the array. The first pass finds the lowest numbered
2759   // register and the second pass marks all higher numbered registers
2760   // for spilling.
2761   for (int i = 0; CSRegs[i]; i++) {
2762     // Get the lowest numbered register for each class that actually needs
2763     // to be saved.
2764     MCPhysReg Cand = CSRegs[i];
2765     if (!SavedRegs.test(Cand))
2766       continue;
2767     if (PPC::GPRCRegClass.contains(Cand) && Cand < LowestGPR)
2768       LowestGPR = Cand;
2769     else if (PPC::G8RCRegClass.contains(Cand) && Cand < LowestG8R)
2770       LowestG8R = Cand;
2771     else if ((PPC::F4RCRegClass.contains(Cand) ||
2772               PPC::F8RCRegClass.contains(Cand)) &&
2773              Cand < LowestFPR)
2774       LowestFPR = Cand;
2775     else if (PPC::VRRCRegClass.contains(Cand) && Cand < LowestVR)
2776       LowestVR = Cand;
2777   }
2778 
2779   for (int i = 0; CSRegs[i]; i++) {
2780     MCPhysReg Cand = CSRegs[i];
2781     if ((PPC::GPRCRegClass.contains(Cand) && Cand > LowestGPR) ||
2782         (PPC::G8RCRegClass.contains(Cand) && Cand > LowestG8R) ||
2783         ((PPC::F4RCRegClass.contains(Cand) ||
2784           PPC::F8RCRegClass.contains(Cand)) &&
2785          Cand > LowestFPR) ||
2786         (PPC::VRRCRegClass.contains(Cand) && Cand > LowestVR))
2787       SavedRegs.set(Cand);
2788   }
2789 }
2790 
2791 uint64_t PPCFrameLowering::getStackThreshold() const {
2792   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2793   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2794   // Scratch register contains a signed 64-bit number, which is negative
2795   // when extending the stack and is positive when releasing the stack frame.
2796   // To make `stux` and `add` paired, the absolute value of the number contained
2797   // in the scratch register should be the same. Thus the maximum stack size
2798   // is (2^63)-1, i.e., LONG_MAX.
2799   if (Subtarget.isPPC64())
2800     return LONG_MAX;
2801 
2802   return TargetFrameLowering::getStackThreshold();
2803 }
2804