xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 21d177096f84c38cf434c21bd3ff0dbd2ca163d0)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF); // No special alignment.
321 
322   // Note: for PPC32 SVR4ABI, we can still generate stackless
323   // code if all local vars are reg-allocated.
324   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
325 
326   // Check whether we can skip adjusting the stack pointer (by using red zone)
327   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
328     // No need for frame
329     return 0;
330   }
331 
332   // Get the maximum call frame size of all the calls.
333   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
334 
335   // Maximum call frame needs to be at least big enough for linkage area.
336   unsigned minCallFrameSize = getLinkageSize();
337   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
338 
339   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
340   // that allocations will be aligned.
341   if (MFI.hasVarSizedObjects())
342     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
343 
344   // Update the new max call frame size if the caller passes in a valid pointer.
345   if (NewMaxCallFrameSize)
346     *NewMaxCallFrameSize = maxCallFrameSize;
347 
348   // Include call frame size in total.
349   FrameSize += maxCallFrameSize;
350 
351   // Make sure the frame is aligned.
352   FrameSize = alignTo(FrameSize, Alignment);
353 
354   return FrameSize;
355 }
356 
357 // hasFP - Return true if the specified function actually has a dedicated frame
358 // pointer register.
359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
360   const MachineFrameInfo &MFI = MF.getFrameInfo();
361   // FIXME: This is pretty much broken by design: hasFP() might be called really
362   // early, before the stack layout was calculated and thus hasFP() might return
363   // true or false here depending on the time of call.
364   return (MFI.getStackSize()) && needsFP(MF);
365 }
366 
367 // needsFP - Return true if the specified function should have a dedicated frame
368 // pointer register.  This is true if the function has variable sized allocas or
369 // if frame pointer elimination is disabled.
370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
371   const MachineFrameInfo &MFI = MF.getFrameInfo();
372 
373   // Naked functions have no stack frame pushed, so we don't have a frame
374   // pointer.
375   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
376     return false;
377 
378   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
379          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
380          MF.exposesReturnsTwice() ||
381          (MF.getTarget().Options.GuaranteedTailCallOpt &&
382           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
383 }
384 
385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
386   bool is31 = needsFP(MF);
387   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
388   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
389 
390   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
391   bool HasBP = RegInfo->hasBasePointer(MF);
392   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
393   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
394 
395   for (MachineBasicBlock &MBB : MF)
396     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
397       --MBBI;
398       for (MachineOperand &MO : MBBI->operands()) {
399         if (!MO.isReg())
400           continue;
401 
402         switch (MO.getReg()) {
403         case PPC::FP:
404           MO.setReg(FPReg);
405           break;
406         case PPC::FP8:
407           MO.setReg(FP8Reg);
408           break;
409         case PPC::BP:
410           MO.setReg(BPReg);
411           break;
412         case PPC::BP8:
413           MO.setReg(BP8Reg);
414           break;
415 
416         }
417       }
418     }
419 }
420 
421 /*  This function will do the following:
422     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
423       respectively (defaults recommended by the ABI) and return true
424     - If MBB is not an entry block, initialize the register scavenger and look
425       for available registers.
426     - If the defaults (R0/R12) are available, return true
427     - If TwoUniqueRegsRequired is set to true, it looks for two unique
428       registers. Otherwise, look for a single available register.
429       - If the required registers are found, set SR1 and SR2 and return true.
430       - If the required registers are not found, set SR2 or both SR1 and SR2 to
431         PPC::NoRegister and return false.
432 
433     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
434     is not set, this function will attempt to find two different registers, but
435     still return true if only one register is available (and set SR1 == SR2).
436 */
437 bool
438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
439                                       bool UseAtEnd,
440                                       bool TwoUniqueRegsRequired,
441                                       Register *SR1,
442                                       Register *SR2) const {
443   RegScavenger RS;
444   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
445   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
446 
447   // Set the defaults for the two scratch registers.
448   if (SR1)
449     *SR1 = R0;
450 
451   if (SR2) {
452     assert (SR1 && "Asking for the second scratch register but not the first?");
453     *SR2 = R12;
454   }
455 
456   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
457   if ((UseAtEnd && MBB->isReturnBlock()) ||
458       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
459     return true;
460 
461   if (UseAtEnd) {
462     // The scratch register will be used before the first terminator (or at the
463     // end of the block if there are no terminators).
464     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
465     if (MBBI == MBB->begin()) {
466       RS.enterBasicBlock(*MBB);
467     } else {
468       RS.enterBasicBlockEnd(*MBB);
469       RS.backward(MBBI);
470     }
471   } else {
472     // The scratch register will be used at the start of the block.
473     RS.enterBasicBlock(*MBB);
474   }
475 
476   // If the two registers are available, we're all good.
477   // Note that we only return here if both R0 and R12 are available because
478   // although the function may not require two unique registers, it may benefit
479   // from having two so we should try to provide them.
480   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
481     return true;
482 
483   // Get the list of callee-saved registers for the target.
484   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
485   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
486 
487   // Get all the available registers in the block.
488   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
489                                      &PPC::GPRCRegClass);
490 
491   // We shouldn't use callee-saved registers as scratch registers as they may be
492   // available when looking for a candidate block for shrink wrapping but not
493   // available when the actual prologue/epilogue is being emitted because they
494   // were added as live-in to the prologue block by PrologueEpilogueInserter.
495   for (int i = 0; CSRegs[i]; ++i)
496     BV.reset(CSRegs[i]);
497 
498   // Set the first scratch register to the first available one.
499   if (SR1) {
500     int FirstScratchReg = BV.find_first();
501     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
502   }
503 
504   // If there is another one available, set the second scratch register to that.
505   // Otherwise, set it to either PPC::NoRegister if this function requires two
506   // or to whatever SR1 is set to if this function doesn't require two.
507   if (SR2) {
508     int SecondScratchReg = BV.find_next(*SR1);
509     if (SecondScratchReg != -1)
510       *SR2 = SecondScratchReg;
511     else
512       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
513   }
514 
515   // Now that we've done our best to provide both registers, double check
516   // whether we were unable to provide enough.
517   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
518     return false;
519 
520   return true;
521 }
522 
523 // We need a scratch register for spilling LR and for spilling CR. By default,
524 // we use two scratch registers to hide latency. However, if only one scratch
525 // register is available, we can adjust for that by not overlapping the spill
526 // code. However, if we need to realign the stack (i.e. have a base pointer)
527 // and the stack frame is large, we need two scratch registers.
528 // Also, stack probe requires two scratch registers, one for old sp, one for
529 // large frame and large probe size.
530 bool
531 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
532   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
533   MachineFunction &MF = *(MBB->getParent());
534   bool HasBP = RegInfo->hasBasePointer(MF);
535   unsigned FrameSize = determineFrameLayout(MF);
536   int NegFrameSize = -FrameSize;
537   bool IsLargeFrame = !isInt<16>(NegFrameSize);
538   MachineFrameInfo &MFI = MF.getFrameInfo();
539   Align MaxAlign = MFI.getMaxAlign();
540   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
541   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
542 
543   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
544          TLI.hasInlineStackProbe(MF);
545 }
546 
547 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
548   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
549 
550   return findScratchRegister(TmpMBB, false,
551                              twoUniqueScratchRegsRequired(TmpMBB));
552 }
553 
554 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
555   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
556 
557   return findScratchRegister(TmpMBB, true);
558 }
559 
560 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
561   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
562   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
563 
564   // Abort if there is no register info or function info.
565   if (!RegInfo || !FI)
566     return false;
567 
568   // Only move the stack update on ELFv2 ABI and PPC64.
569   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
570     return false;
571 
572   // Check the frame size first and return false if it does not fit the
573   // requirements.
574   // We need a non-zero frame size as well as a frame that will fit in the red
575   // zone. This is because by moving the stack pointer update we are now storing
576   // to the red zone until the stack pointer is updated. If we get an interrupt
577   // inside the prologue but before the stack update we now have a number of
578   // stores to the red zone and those stores must all fit.
579   MachineFrameInfo &MFI = MF.getFrameInfo();
580   unsigned FrameSize = MFI.getStackSize();
581   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
582     return false;
583 
584   // Frame pointers and base pointers complicate matters so don't do anything
585   // if we have them. For example having a frame pointer will sometimes require
586   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
587   // difficult. Similar situation exists with setjmp.
588   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
589     return false;
590 
591   // Calls to fast_cc functions use different rules for passing parameters on
592   // the stack from the ABI and using PIC base in the function imposes
593   // similar restrictions to using the base pointer. It is not generally safe
594   // to move the stack pointer update in these situations.
595   if (FI->hasFastCall() || FI->usesPICBase())
596     return false;
597 
598   // Finally we can move the stack update if we do not require register
599   // scavenging. Register scavenging can introduce more spills and so
600   // may make the frame size larger than we have computed.
601   return !RegInfo->requiresFrameIndexScavenging(MF);
602 }
603 
604 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
605                                     MachineBasicBlock &MBB) const {
606   MachineBasicBlock::iterator MBBI = MBB.begin();
607   MachineFrameInfo &MFI = MF.getFrameInfo();
608   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
609   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
610   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
611 
612   MachineModuleInfo &MMI = MF.getMMI();
613   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
614   DebugLoc dl;
615   // AIX assembler does not support cfi directives.
616   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
617 
618   const bool HasFastMFLR = Subtarget.hasFastMFLR();
619 
620   // Get processor type.
621   bool isPPC64 = Subtarget.isPPC64();
622   // Get the ABI.
623   bool isSVR4ABI = Subtarget.isSVR4ABI();
624   bool isELFv2ABI = Subtarget.isELFv2ABI();
625   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
626 
627   // Work out frame sizes.
628   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
629   int64_t NegFrameSize = -FrameSize;
630   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
631     llvm_unreachable("Unhandled stack size!");
632 
633   if (MFI.isFrameAddressTaken())
634     replaceFPWithRealFP(MF);
635 
636   // Check if the link register (LR) must be saved.
637   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
638   bool MustSaveLR = FI->mustSaveLR();
639   bool MustSaveTOC = FI->mustSaveTOC();
640   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
641   bool MustSaveCR = !MustSaveCRs.empty();
642   // Do we have a frame pointer and/or base pointer for this function?
643   bool HasFP = hasFP(MF);
644   bool HasBP = RegInfo->hasBasePointer(MF);
645   bool HasRedZone = isPPC64 || !isSVR4ABI;
646   bool HasROPProtect = Subtarget.hasROPProtect();
647   bool HasPrivileged = Subtarget.hasPrivileged();
648 
649   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
650   Register BPReg = RegInfo->getBaseRegister(MF);
651   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
652   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
653   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
654   Register ScratchReg;
655   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
656   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
657   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
658                                                 : PPC::MFLR );
659   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
660                                                  : PPC::STW );
661   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
662                                                      : PPC::STWU );
663   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
664                                                         : PPC::STWUX);
665   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
666                                               : PPC::OR );
667   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
668                                                             : PPC::SUBFC);
669   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
670                                                                : PPC::SUBFIC);
671   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
672                                                            : PPC::MFCR);
673   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
674   const MCInstrDesc &HashST =
675       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
676                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
677 
678   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
679   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
680   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
681   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
682   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
683          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
684 
685   // Using the same bool variable as below to suppress compiler warnings.
686   bool SingleScratchReg = findScratchRegister(
687       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
688   assert(SingleScratchReg &&
689          "Required number of registers not available in this block");
690 
691   SingleScratchReg = ScratchReg == TempReg;
692 
693   int64_t LROffset = getReturnSaveOffset();
694 
695   int64_t FPOffset = 0;
696   if (HasFP) {
697     MachineFrameInfo &MFI = MF.getFrameInfo();
698     int FPIndex = FI->getFramePointerSaveIndex();
699     assert(FPIndex && "No Frame Pointer Save Slot!");
700     FPOffset = MFI.getObjectOffset(FPIndex);
701   }
702 
703   int64_t BPOffset = 0;
704   if (HasBP) {
705     MachineFrameInfo &MFI = MF.getFrameInfo();
706     int BPIndex = FI->getBasePointerSaveIndex();
707     assert(BPIndex && "No Base Pointer Save Slot!");
708     BPOffset = MFI.getObjectOffset(BPIndex);
709   }
710 
711   int64_t PBPOffset = 0;
712   if (FI->usesPICBase()) {
713     MachineFrameInfo &MFI = MF.getFrameInfo();
714     int PBPIndex = FI->getPICBasePointerSaveIndex();
715     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
716     PBPOffset = MFI.getObjectOffset(PBPIndex);
717   }
718 
719   // Get stack alignments.
720   Align MaxAlign = MFI.getMaxAlign();
721   if (HasBP && MaxAlign > 1)
722     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
723 
724   // Frames of 32KB & larger require special handling because they cannot be
725   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
726   bool isLargeFrame = !isInt<16>(NegFrameSize);
727 
728   // Check if we can move the stack update instruction (stdu) down the prologue
729   // past the callee saves. Hopefully this will avoid the situation where the
730   // saves are waiting for the update on the store with update to complete.
731   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
732   bool MovingStackUpdateDown = false;
733 
734   // Check if we can move the stack update.
735   if (stackUpdateCanBeMoved(MF)) {
736     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
737     for (CalleeSavedInfo CSI : Info) {
738       // If the callee saved register is spilled to a register instead of the
739       // stack then the spill no longer uses the stack pointer.
740       // This can lead to two consequences:
741       // 1) We no longer need to update the stack because the function does not
742       //    spill any callee saved registers to stack.
743       // 2) We have a situation where we still have to update the stack pointer
744       //    even though some registers are spilled to other registers. In
745       //    this case the current code moves the stack update to an incorrect
746       //    position.
747       // In either case we should abort moving the stack update operation.
748       if (CSI.isSpilledToReg()) {
749         StackUpdateLoc = MBBI;
750         MovingStackUpdateDown = false;
751         break;
752       }
753 
754       int FrIdx = CSI.getFrameIdx();
755       // If the frame index is not negative the callee saved info belongs to a
756       // stack object that is not a fixed stack object. We ignore non-fixed
757       // stack objects because we won't move the stack update pointer past them.
758       if (FrIdx >= 0)
759         continue;
760 
761       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
762         StackUpdateLoc++;
763         MovingStackUpdateDown = true;
764       } else {
765         // We need all of the Frame Indices to meet these conditions.
766         // If they do not, abort the whole operation.
767         StackUpdateLoc = MBBI;
768         MovingStackUpdateDown = false;
769         break;
770       }
771     }
772 
773     // If the operation was not aborted then update the object offset.
774     if (MovingStackUpdateDown) {
775       for (CalleeSavedInfo CSI : Info) {
776         int FrIdx = CSI.getFrameIdx();
777         if (FrIdx < 0)
778           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
779       }
780     }
781   }
782 
783   // Where in the prologue we move the CR fields depends on how many scratch
784   // registers we have, and if we need to save the link register or not. This
785   // lambda is to avoid duplicating the logic in 2 places.
786   auto BuildMoveFromCR = [&]() {
787     if (isELFv2ABI && MustSaveCRs.size() == 1) {
788     // In the ELFv2 ABI, we are not required to save all CR fields.
789     // If only one CR field is clobbered, it is more efficient to use
790     // mfocrf to selectively save just that field, because mfocrf has short
791     // latency compares to mfcr.
792       assert(isPPC64 && "V2 ABI is 64-bit only.");
793       MachineInstrBuilder MIB =
794           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
795       MIB.addReg(MustSaveCRs[0], RegState::Kill);
796     } else {
797       MachineInstrBuilder MIB =
798           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
799       for (unsigned CRfield : MustSaveCRs)
800         MIB.addReg(CRfield, RegState::ImplicitKill);
801     }
802   };
803 
804   // If we need to spill the CR and the LR but we don't have two separate
805   // registers available, we must spill them one at a time
806   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
807     BuildMoveFromCR();
808     BuildMI(MBB, MBBI, dl, StoreWordInst)
809         .addReg(TempReg, getKillRegState(true))
810         .addImm(CRSaveOffset)
811         .addReg(SPReg);
812   }
813 
814   if (MustSaveLR)
815     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
816 
817   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
818     BuildMoveFromCR();
819 
820   if (HasRedZone) {
821     if (HasFP)
822       BuildMI(MBB, MBBI, dl, StoreInst)
823         .addReg(FPReg)
824         .addImm(FPOffset)
825         .addReg(SPReg);
826     if (FI->usesPICBase())
827       BuildMI(MBB, MBBI, dl, StoreInst)
828         .addReg(PPC::R30)
829         .addImm(PBPOffset)
830         .addReg(SPReg);
831     if (HasBP)
832       BuildMI(MBB, MBBI, dl, StoreInst)
833         .addReg(BPReg)
834         .addImm(BPOffset)
835         .addReg(SPReg);
836   }
837 
838   // Generate the instruction to store the LR. In the case where ROP protection
839   // is required the register holding the LR should not be killed as it will be
840   // used by the hash store instruction.
841   auto SaveLR = [&](int64_t Offset) {
842     assert(MustSaveLR && "LR is not required to be saved!");
843     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
844         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
845         .addImm(Offset)
846         .addReg(SPReg);
847 
848     // Add the ROP protection Hash Store instruction.
849     // NOTE: This is technically a violation of the ABI. The hash can be saved
850     // up to 512 bytes into the Protected Zone. This can be outside of the
851     // initial 288 byte volatile program storage region in the Protected Zone.
852     // However, this restriction will be removed in an upcoming revision of the
853     // ABI.
854     if (HasROPProtect) {
855       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
856       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
857       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
858              "ROP hash save offset out of range.");
859       assert(((ImmOffset & 0x7) == 0) &&
860              "ROP hash save offset must be 8 byte aligned.");
861       BuildMI(MBB, StackUpdateLoc, dl, HashST)
862           .addReg(ScratchReg, getKillRegState(true))
863           .addImm(ImmOffset)
864           .addReg(SPReg);
865     }
866   };
867 
868   if (MustSaveLR && HasFastMFLR)
869       SaveLR(LROffset);
870 
871   if (MustSaveCR &&
872       !(SingleScratchReg && MustSaveLR)) {
873     assert(HasRedZone && "A red zone is always available on PPC64");
874     BuildMI(MBB, MBBI, dl, StoreWordInst)
875       .addReg(TempReg, getKillRegState(true))
876       .addImm(CRSaveOffset)
877       .addReg(SPReg);
878   }
879 
880   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
881   if (!FrameSize) {
882     if (MustSaveLR && !HasFastMFLR)
883       SaveLR(LROffset);
884     return;
885   }
886 
887   // Adjust stack pointer: r1 += NegFrameSize.
888   // If there is a preferred stack alignment, align R1 now
889 
890   if (HasBP && HasRedZone) {
891     // Save a copy of r1 as the base pointer.
892     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
893       .addReg(SPReg)
894       .addReg(SPReg);
895   }
896 
897   // Have we generated a STUX instruction to claim stack frame? If so,
898   // the negated frame size will be placed in ScratchReg.
899   bool HasSTUX =
900       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
901       (HasBP && MaxAlign > 1) || isLargeFrame;
902 
903   // If we use STUX to update the stack pointer, we need the two scratch
904   // registers TempReg and ScratchReg, we have to save LR here which is stored
905   // in ScratchReg.
906   // If the offset can not be encoded into the store instruction, we also have
907   // to save LR here.
908   if (MustSaveLR && !HasFastMFLR &&
909       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
910     SaveLR(LROffset);
911 
912   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
913   // pointer is always stored at SP, we will get a free probe due to an essential
914   // STU(X) instruction.
915   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
916     // To be consistent with other targets, a pseudo instruction is emitted and
917     // will be later expanded in `inlineStackProbe`.
918     BuildMI(MBB, MBBI, dl,
919             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
920                             : PPC::PROBED_STACKALLOC_32))
921         .addDef(TempReg)
922         .addDef(ScratchReg) // ScratchReg stores the old sp.
923         .addImm(NegFrameSize);
924     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
925     // update the ScratchReg to meet the assumption that ScratchReg contains
926     // the NegFrameSize. This solution is rather tricky.
927     if (!HasRedZone) {
928       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
929           .addReg(ScratchReg)
930           .addReg(SPReg);
931     }
932   } else {
933     // This condition must be kept in sync with canUseAsPrologue.
934     if (HasBP && MaxAlign > 1) {
935       if (isPPC64)
936         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
937             .addReg(SPReg)
938             .addImm(0)
939             .addImm(64 - Log2(MaxAlign));
940       else // PPC32...
941         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
942             .addReg(SPReg)
943             .addImm(0)
944             .addImm(32 - Log2(MaxAlign))
945             .addImm(31);
946       if (!isLargeFrame) {
947         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
948             .addReg(ScratchReg, RegState::Kill)
949             .addImm(NegFrameSize);
950       } else {
951         assert(!SingleScratchReg && "Only a single scratch reg available");
952         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
953         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
954             .addReg(ScratchReg, RegState::Kill)
955             .addReg(TempReg, RegState::Kill);
956       }
957 
958       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
959           .addReg(SPReg, RegState::Kill)
960           .addReg(SPReg)
961           .addReg(ScratchReg);
962     } else if (!isLargeFrame) {
963       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
964           .addReg(SPReg)
965           .addImm(NegFrameSize)
966           .addReg(SPReg);
967     } else {
968       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
969       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
970           .addReg(SPReg, RegState::Kill)
971           .addReg(SPReg)
972           .addReg(ScratchReg);
973     }
974   }
975 
976   // Save the TOC register after the stack pointer update if a prologue TOC
977   // save is required for the function.
978   if (MustSaveTOC) {
979     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
980     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
981       .addReg(TOCReg, getKillRegState(true))
982       .addImm(TOCSaveOffset)
983       .addReg(SPReg);
984   }
985 
986   if (!HasRedZone) {
987     assert(!isPPC64 && "A red zone is always available on PPC64");
988     if (HasSTUX) {
989       // The negated frame size is in ScratchReg, and the SPReg has been
990       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
991       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
992       // the stack frame (i.e. the old SP), ideally, we would put the old
993       // SP into a register and use it as the base for the stores. The
994       // problem is that the only available register may be ScratchReg,
995       // which could be R0, and R0 cannot be used as a base address.
996 
997       // First, set ScratchReg to the old SP. This may need to be modified
998       // later.
999       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1000         .addReg(ScratchReg, RegState::Kill)
1001         .addReg(SPReg);
1002 
1003       if (ScratchReg == PPC::R0) {
1004         // R0 cannot be used as a base register, but it can be used as an
1005         // index in a store-indexed.
1006         int LastOffset = 0;
1007         if (HasFP)  {
1008           // R0 += (FPOffset-LastOffset).
1009           // Need addic, since addi treats R0 as 0.
1010           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1011             .addReg(ScratchReg)
1012             .addImm(FPOffset-LastOffset);
1013           LastOffset = FPOffset;
1014           // Store FP into *R0.
1015           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1016             .addReg(FPReg, RegState::Kill)  // Save FP.
1017             .addReg(PPC::ZERO)
1018             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1019         }
1020         if (FI->usesPICBase()) {
1021           // R0 += (PBPOffset-LastOffset).
1022           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1023             .addReg(ScratchReg)
1024             .addImm(PBPOffset-LastOffset);
1025           LastOffset = PBPOffset;
1026           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1027             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1028             .addReg(PPC::ZERO)
1029             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1030         }
1031         if (HasBP) {
1032           // R0 += (BPOffset-LastOffset).
1033           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1034             .addReg(ScratchReg)
1035             .addImm(BPOffset-LastOffset);
1036           LastOffset = BPOffset;
1037           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1038             .addReg(BPReg, RegState::Kill)  // Save BP.
1039             .addReg(PPC::ZERO)
1040             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1041           // BP = R0-LastOffset
1042           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1043             .addReg(ScratchReg, RegState::Kill)
1044             .addImm(-LastOffset);
1045         }
1046       } else {
1047         // ScratchReg is not R0, so use it as the base register. It is
1048         // already set to the old SP, so we can use the offsets directly.
1049 
1050         // Now that the stack frame has been allocated, save all the necessary
1051         // registers using ScratchReg as the base address.
1052         if (HasFP)
1053           BuildMI(MBB, MBBI, dl, StoreInst)
1054             .addReg(FPReg)
1055             .addImm(FPOffset)
1056             .addReg(ScratchReg);
1057         if (FI->usesPICBase())
1058           BuildMI(MBB, MBBI, dl, StoreInst)
1059             .addReg(PPC::R30)
1060             .addImm(PBPOffset)
1061             .addReg(ScratchReg);
1062         if (HasBP) {
1063           BuildMI(MBB, MBBI, dl, StoreInst)
1064             .addReg(BPReg)
1065             .addImm(BPOffset)
1066             .addReg(ScratchReg);
1067           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1068             .addReg(ScratchReg, RegState::Kill)
1069             .addReg(ScratchReg);
1070         }
1071       }
1072     } else {
1073       // The frame size is a known 16-bit constant (fitting in the immediate
1074       // field of STWU). To be here we have to be compiling for PPC32.
1075       // Since the SPReg has been decreased by FrameSize, add it back to each
1076       // offset.
1077       if (HasFP)
1078         BuildMI(MBB, MBBI, dl, StoreInst)
1079           .addReg(FPReg)
1080           .addImm(FrameSize + FPOffset)
1081           .addReg(SPReg);
1082       if (FI->usesPICBase())
1083         BuildMI(MBB, MBBI, dl, StoreInst)
1084           .addReg(PPC::R30)
1085           .addImm(FrameSize + PBPOffset)
1086           .addReg(SPReg);
1087       if (HasBP) {
1088         BuildMI(MBB, MBBI, dl, StoreInst)
1089           .addReg(BPReg)
1090           .addImm(FrameSize + BPOffset)
1091           .addReg(SPReg);
1092         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1093           .addReg(SPReg)
1094           .addImm(FrameSize);
1095       }
1096     }
1097   }
1098 
1099   // Save the LR now.
1100   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1101     SaveLR(LROffset + FrameSize);
1102 
1103   // Add Call Frame Information for the instructions we generated above.
1104   if (needsCFI) {
1105     unsigned CFIIndex;
1106 
1107     if (HasBP) {
1108       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1109       // because if the stack needed aligning then CFA won't be at a fixed
1110       // offset from FP/SP.
1111       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1112       CFIIndex = MF.addFrameInst(
1113           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1114     } else {
1115       // Adjust the definition of CFA to account for the change in SP.
1116       assert(NegFrameSize);
1117       CFIIndex = MF.addFrameInst(
1118           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1119     }
1120     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1121         .addCFIIndex(CFIIndex);
1122 
1123     if (HasFP) {
1124       // Describe where FP was saved, at a fixed offset from CFA.
1125       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1126       CFIIndex = MF.addFrameInst(
1127           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1128       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1129           .addCFIIndex(CFIIndex);
1130     }
1131 
1132     if (FI->usesPICBase()) {
1133       // Describe where FP was saved, at a fixed offset from CFA.
1134       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1135       CFIIndex = MF.addFrameInst(
1136           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1137       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138           .addCFIIndex(CFIIndex);
1139     }
1140 
1141     if (HasBP) {
1142       // Describe where BP was saved, at a fixed offset from CFA.
1143       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1144       CFIIndex = MF.addFrameInst(
1145           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1146       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147           .addCFIIndex(CFIIndex);
1148     }
1149 
1150     if (MustSaveLR) {
1151       // Describe where LR was saved, at a fixed offset from CFA.
1152       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1153       CFIIndex = MF.addFrameInst(
1154           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1155       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1156           .addCFIIndex(CFIIndex);
1157     }
1158   }
1159 
1160   // If there is a frame pointer, copy R1 into R31
1161   if (HasFP) {
1162     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1163       .addReg(SPReg)
1164       .addReg(SPReg);
1165 
1166     if (!HasBP && needsCFI) {
1167       // Change the definition of CFA from SP+offset to FP+offset, because SP
1168       // will change at every alloca.
1169       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1170       unsigned CFIIndex = MF.addFrameInst(
1171           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1172 
1173       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1174           .addCFIIndex(CFIIndex);
1175     }
1176   }
1177 
1178   if (needsCFI) {
1179     // Describe where callee saved registers were saved, at fixed offsets from
1180     // CFA.
1181     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1182     for (const CalleeSavedInfo &I : CSI) {
1183       Register Reg = I.getReg();
1184       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1185 
1186       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1187       // subregisters of CR2. We just need to emit a move of CR2.
1188       if (PPC::CRBITRCRegClass.contains(Reg))
1189         continue;
1190 
1191       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1192         continue;
1193 
1194       // For 64-bit SVR4 when we have spilled CRs, the spill location
1195       // is SP+8, not a frame-relative slot.
1196       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1197         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1198         // the whole CR word.  In the ELFv2 ABI, every CR that was
1199         // actually saved gets its own CFI record.
1200         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1201         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1202             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1203         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1204             .addCFIIndex(CFIIndex);
1205         continue;
1206       }
1207 
1208       if (I.isSpilledToReg()) {
1209         unsigned SpilledReg = I.getDstReg();
1210         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1211             nullptr, MRI->getDwarfRegNum(Reg, true),
1212             MRI->getDwarfRegNum(SpilledReg, true)));
1213         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1214           .addCFIIndex(CFIRegister);
1215       } else {
1216         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1217         // We have changed the object offset above but we do not want to change
1218         // the actual offsets in the CFI instruction so we have to undo the
1219         // offset change here.
1220         if (MovingStackUpdateDown)
1221           Offset -= NegFrameSize;
1222 
1223         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1224             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1225         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1226             .addCFIIndex(CFIIndex);
1227       }
1228     }
1229   }
1230 }
1231 
1232 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1233                                         MachineBasicBlock &PrologMBB) const {
1234   bool isPPC64 = Subtarget.isPPC64();
1235   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1236   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1237   MachineFrameInfo &MFI = MF.getFrameInfo();
1238   MachineModuleInfo &MMI = MF.getMMI();
1239   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1240   // AIX assembler does not support cfi directives.
1241   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1242   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1243     int Opc = MI.getOpcode();
1244     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1245   });
1246   if (StackAllocMIPos == PrologMBB.end())
1247     return;
1248   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1249   MachineBasicBlock *CurrentMBB = &PrologMBB;
1250   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1251   MachineInstr &MI = *StackAllocMIPos;
1252   int64_t NegFrameSize = MI.getOperand(2).getImm();
1253   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1254   int64_t NegProbeSize = -(int64_t)ProbeSize;
1255   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1256   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1257   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1258   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1259   Register ScratchReg = MI.getOperand(0).getReg();
1260   Register FPReg = MI.getOperand(1).getReg();
1261   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1262   bool HasBP = RegInfo->hasBasePointer(MF);
1263   Register BPReg = RegInfo->getBaseRegister(MF);
1264   Align MaxAlign = MFI.getMaxAlign();
1265   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1266   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1267   // Subroutines to generate .cfi_* directives.
1268   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1269                             MachineBasicBlock::iterator MBBI, Register Reg) {
1270     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1271     unsigned CFIIndex = MF.addFrameInst(
1272         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1273     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1274         .addCFIIndex(CFIIndex);
1275   };
1276   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1277                          MachineBasicBlock::iterator MBBI, Register Reg,
1278                          int Offset) {
1279     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1280     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1281         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1282     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1283         .addCFIIndex(CFIIndex);
1284   };
1285   // Subroutine to determine if we can use the Imm as part of d-form.
1286   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1287   // Subroutine to materialize the Imm into TempReg.
1288   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1289                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1290                             Register &TempReg) {
1291     assert(isInt<32>(Imm) && "Unhandled imm");
1292     if (isInt<16>(Imm))
1293       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1294           .addImm(Imm);
1295     else {
1296       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1297           .addImm(Imm >> 16);
1298       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1299           .addReg(TempReg)
1300           .addImm(Imm & 0xFFFF);
1301     }
1302   };
1303   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1304   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1305                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1306                               Register NegSizeReg, bool UseDForm,
1307                               Register StoreReg) {
1308     if (UseDForm)
1309       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1310           .addReg(StoreReg)
1311           .addImm(NegSize)
1312           .addReg(SPReg);
1313     else
1314       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1315           .addReg(StoreReg)
1316           .addReg(SPReg)
1317           .addReg(NegSizeReg);
1318   };
1319   // Used to probe stack when realignment is required.
1320   // Note that, according to ABI's requirement, *sp must always equals the
1321   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1322   // Following is pseudo code:
1323   // final_sp = (sp & align) + negframesize;
1324   // neg_gap = final_sp - sp;
1325   // while (neg_gap < negprobesize) {
1326   //   stdu fp, negprobesize(sp);
1327   //   neg_gap -= negprobesize;
1328   // }
1329   // stdux fp, sp, neg_gap
1330   //
1331   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1332   // before probe code, we don't need to save it, so we get one additional reg
1333   // that can be used to materialize the probeside if needed to use xform.
1334   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1335   // now.
1336   //
1337   // The allocations are:
1338   // if (HasBP && HasRedzone) {
1339   //   r0: materialize the probesize if needed so that we can use xform.
1340   //   r12: `neg_gap`
1341   // } else {
1342   //   r0: back-chain pointer
1343   //   r12: `neg_gap`.
1344   // }
1345   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1346                                  MachineBasicBlock::iterator MBBI,
1347                                  Register ScratchReg, Register TempReg) {
1348     assert(HasBP && "The function is supposed to have base pointer when its "
1349                     "stack is realigned.");
1350     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1351 
1352     // FIXME: We can eliminate this limitation if we get more infomation about
1353     // which part of redzone are already used. Used redzone can be treated
1354     // probed. But there might be `holes' in redzone probed, this could
1355     // complicate the implementation.
1356     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1357            "Probe size should be larger or equal to the size of red-zone so "
1358            "that red-zone is not clobbered by probing.");
1359 
1360     Register &FinalStackPtr = TempReg;
1361     // FIXME: We only support NegProbeSize materializable by DForm currently.
1362     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1363     // register.
1364     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1365     assert(isInt<16>(NegProbeSize) &&
1366            "NegProbeSize should be materializable by DForm");
1367     Register CRReg = PPC::CR0;
1368     // Layout of output assembly kinda like:
1369     // bb.0:
1370     //   ...
1371     //   sub $scratchreg, $finalsp, r1
1372     //   cmpdi $scratchreg, <negprobesize>
1373     //   bge bb.2
1374     // bb.1:
1375     //   stdu <backchain>, <negprobesize>(r1)
1376     //   sub $scratchreg, $scratchreg, negprobesize
1377     //   cmpdi $scratchreg, <negprobesize>
1378     //   blt bb.1
1379     // bb.2:
1380     //   stdux <backchain>, r1, $scratchreg
1381     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1382     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1383     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1384     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1385     MF.insert(MBBInsertPoint, ProbeExitMBB);
1386     // bb.2
1387     {
1388       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1389       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1390                        BackChainPointer);
1391       if (HasRedZone)
1392         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1393         // to TempReg to satisfy it.
1394         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1395             .addReg(BPReg)
1396             .addReg(BPReg);
1397       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1398       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1399     }
1400     // bb.0
1401     {
1402       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1403           .addReg(SPReg)
1404           .addReg(FinalStackPtr);
1405       if (!HasRedZone)
1406         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1407       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1408           .addReg(ScratchReg)
1409           .addImm(NegProbeSize);
1410       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1411           .addImm(PPC::PRED_GE)
1412           .addReg(CRReg)
1413           .addMBB(ProbeExitMBB);
1414       MBB.addSuccessor(ProbeLoopBodyMBB);
1415       MBB.addSuccessor(ProbeExitMBB);
1416     }
1417     // bb.1
1418     {
1419       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1420       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1421                        0, true /*UseDForm*/, BackChainPointer);
1422       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1423               ScratchReg)
1424           .addReg(ScratchReg)
1425           .addImm(-NegProbeSize);
1426       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1427               CRReg)
1428           .addReg(ScratchReg)
1429           .addImm(NegProbeSize);
1430       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1431           .addImm(PPC::PRED_LT)
1432           .addReg(CRReg)
1433           .addMBB(ProbeLoopBodyMBB);
1434       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1435       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1436     }
1437     // Update liveins.
1438     fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB});
1439     return ProbeExitMBB;
1440   };
1441   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1442   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1443   // the offset subtracted from SP is determined by SP's runtime value.
1444   if (HasBP && MaxAlign > 1) {
1445     // Calculate final stack pointer.
1446     if (isPPC64)
1447       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1448           .addReg(SPReg)
1449           .addImm(0)
1450           .addImm(64 - Log2(MaxAlign));
1451     else
1452       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1453           .addReg(SPReg)
1454           .addImm(0)
1455           .addImm(32 - Log2(MaxAlign))
1456           .addImm(31);
1457     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1458             FPReg)
1459         .addReg(ScratchReg)
1460         .addReg(SPReg);
1461     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1462     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1463             FPReg)
1464         .addReg(ScratchReg)
1465         .addReg(FPReg);
1466     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1467     if (needsCFI)
1468       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1469   } else {
1470     // Initialize current frame pointer.
1471     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1472     // Use FPReg to calculate CFA.
1473     if (needsCFI)
1474       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1475     // Probe residual part.
1476     if (NegResidualSize) {
1477       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1478       if (!ResidualUseDForm)
1479         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1480       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1481                        ResidualUseDForm, FPReg);
1482     }
1483     bool UseDForm = CanUseDForm(NegProbeSize);
1484     // If number of blocks is small, just probe them directly.
1485     if (NumBlocks < 3) {
1486       if (!UseDForm)
1487         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1488       for (int i = 0; i < NumBlocks; ++i)
1489         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1490                          FPReg);
1491       if (needsCFI) {
1492         // Restore using SPReg to calculate CFA.
1493         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1494       }
1495     } else {
1496       // Since CTR is a volatile register and current shrinkwrap implementation
1497       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1498       // CTR loop to probe.
1499       // Calculate trip count and stores it in CTRReg.
1500       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1501       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1502           .addReg(ScratchReg, RegState::Kill);
1503       if (!UseDForm)
1504         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1505       // Create MBBs of the loop.
1506       MachineFunction::iterator MBBInsertPoint =
1507           std::next(CurrentMBB->getIterator());
1508       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1509       MF.insert(MBBInsertPoint, LoopMBB);
1510       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1511       MF.insert(MBBInsertPoint, ExitMBB);
1512       // Synthesize the loop body.
1513       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1514                        UseDForm, FPReg);
1515       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1516           .addMBB(LoopMBB);
1517       LoopMBB->addSuccessor(ExitMBB);
1518       LoopMBB->addSuccessor(LoopMBB);
1519       // Synthesize the exit MBB.
1520       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1521                       std::next(MachineBasicBlock::iterator(MI)),
1522                       CurrentMBB->end());
1523       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1524       CurrentMBB->addSuccessor(LoopMBB);
1525       if (needsCFI) {
1526         // Restore using SPReg to calculate CFA.
1527         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1528       }
1529       // Update liveins.
1530       fullyRecomputeLiveIns({ExitMBB, LoopMBB});
1531     }
1532   }
1533   ++NumPrologProbed;
1534   MI.eraseFromParent();
1535 }
1536 
1537 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1538                                     MachineBasicBlock &MBB) const {
1539   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1540   DebugLoc dl;
1541 
1542   if (MBBI != MBB.end())
1543     dl = MBBI->getDebugLoc();
1544 
1545   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1546   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1547 
1548   // Get alignment info so we know how to restore the SP.
1549   const MachineFrameInfo &MFI = MF.getFrameInfo();
1550 
1551   // Get the number of bytes allocated from the FrameInfo.
1552   int64_t FrameSize = MFI.getStackSize();
1553 
1554   // Get processor type.
1555   bool isPPC64 = Subtarget.isPPC64();
1556 
1557   // Check if the link register (LR) has been saved.
1558   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1559   bool MustSaveLR = FI->mustSaveLR();
1560   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1561   bool MustSaveCR = !MustSaveCRs.empty();
1562   // Do we have a frame pointer and/or base pointer for this function?
1563   bool HasFP = hasFP(MF);
1564   bool HasBP = RegInfo->hasBasePointer(MF);
1565   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1566   bool HasROPProtect = Subtarget.hasROPProtect();
1567   bool HasPrivileged = Subtarget.hasPrivileged();
1568 
1569   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1570   Register BPReg = RegInfo->getBaseRegister(MF);
1571   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1572   Register ScratchReg;
1573   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1574   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1575                                                  : PPC::MTLR );
1576   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1577                                                  : PPC::LWZ );
1578   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1579                                                            : PPC::LIS );
1580   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1581                                               : PPC::OR );
1582   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1583                                                   : PPC::ORI );
1584   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1585                                                    : PPC::ADDI );
1586   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1587                                                 : PPC::ADD4 );
1588   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1589                                                      : PPC::LWZ);
1590   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1591                                                      : PPC::MTOCRF);
1592   const MCInstrDesc &HashChk =
1593       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1594                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1595   int64_t LROffset = getReturnSaveOffset();
1596 
1597   int64_t FPOffset = 0;
1598 
1599   // Using the same bool variable as below to suppress compiler warnings.
1600   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1601                                               &TempReg);
1602   assert(SingleScratchReg &&
1603          "Could not find an available scratch register");
1604 
1605   SingleScratchReg = ScratchReg == TempReg;
1606 
1607   if (HasFP) {
1608     int FPIndex = FI->getFramePointerSaveIndex();
1609     assert(FPIndex && "No Frame Pointer Save Slot!");
1610     FPOffset = MFI.getObjectOffset(FPIndex);
1611   }
1612 
1613   int64_t BPOffset = 0;
1614   if (HasBP) {
1615       int BPIndex = FI->getBasePointerSaveIndex();
1616       assert(BPIndex && "No Base Pointer Save Slot!");
1617       BPOffset = MFI.getObjectOffset(BPIndex);
1618   }
1619 
1620   int64_t PBPOffset = 0;
1621   if (FI->usesPICBase()) {
1622     int PBPIndex = FI->getPICBasePointerSaveIndex();
1623     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1624     PBPOffset = MFI.getObjectOffset(PBPIndex);
1625   }
1626 
1627   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1628 
1629   if (IsReturnBlock) {
1630     unsigned RetOpcode = MBBI->getOpcode();
1631     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1632                       RetOpcode == PPC::TCRETURNdi ||
1633                       RetOpcode == PPC::TCRETURNai ||
1634                       RetOpcode == PPC::TCRETURNri8 ||
1635                       RetOpcode == PPC::TCRETURNdi8 ||
1636                       RetOpcode == PPC::TCRETURNai8;
1637 
1638     if (UsesTCRet) {
1639       int MaxTCRetDelta = FI->getTailCallSPDelta();
1640       MachineOperand &StackAdjust = MBBI->getOperand(1);
1641       assert(StackAdjust.isImm() && "Expecting immediate value.");
1642       // Adjust stack pointer.
1643       int StackAdj = StackAdjust.getImm();
1644       int Delta = StackAdj - MaxTCRetDelta;
1645       assert((Delta >= 0) && "Delta must be positive");
1646       if (MaxTCRetDelta>0)
1647         FrameSize += (StackAdj +Delta);
1648       else
1649         FrameSize += StackAdj;
1650     }
1651   }
1652 
1653   // Frames of 32KB & larger require special handling because they cannot be
1654   // indexed into with a simple LD/LWZ immediate offset operand.
1655   bool isLargeFrame = !isInt<16>(FrameSize);
1656 
1657   // On targets without red zone, the SP needs to be restored last, so that
1658   // all live contents of the stack frame are upwards of the SP. This means
1659   // that we cannot restore SP just now, since there may be more registers
1660   // to restore from the stack frame (e.g. R31). If the frame size is not
1661   // a simple immediate value, we will need a spare register to hold the
1662   // restored SP. If the frame size is known and small, we can simply adjust
1663   // the offsets of the registers to be restored, and still use SP to restore
1664   // them. In such case, the final update of SP will be to add the frame
1665   // size to it.
1666   // To simplify the code, set RBReg to the base register used to restore
1667   // values from the stack, and set SPAdd to the value that needs to be added
1668   // to the SP at the end. The default values are as if red zone was present.
1669   unsigned RBReg = SPReg;
1670   uint64_t SPAdd = 0;
1671 
1672   // Check if we can move the stack update instruction up the epilogue
1673   // past the callee saves. This will allow the move to LR instruction
1674   // to be executed before the restores of the callee saves which means
1675   // that the callee saves can hide the latency from the MTLR instrcution.
1676   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1677   if (stackUpdateCanBeMoved(MF)) {
1678     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1679     for (CalleeSavedInfo CSI : Info) {
1680       // If the callee saved register is spilled to another register abort the
1681       // stack update movement.
1682       if (CSI.isSpilledToReg()) {
1683         StackUpdateLoc = MBBI;
1684         break;
1685       }
1686       int FrIdx = CSI.getFrameIdx();
1687       // If the frame index is not negative the callee saved info belongs to a
1688       // stack object that is not a fixed stack object. We ignore non-fixed
1689       // stack objects because we won't move the update of the stack pointer
1690       // past them.
1691       if (FrIdx >= 0)
1692         continue;
1693 
1694       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1695         StackUpdateLoc--;
1696       else {
1697         // Abort the operation as we can't update all CSR restores.
1698         StackUpdateLoc = MBBI;
1699         break;
1700       }
1701     }
1702   }
1703 
1704   if (FrameSize) {
1705     // In the prologue, the loaded (or persistent) stack pointer value is
1706     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1707     // zone add this offset back now.
1708 
1709     // If the function has a base pointer, the stack pointer has been copied
1710     // to it so we can restore it by copying in the other direction.
1711     if (HasRedZone && HasBP) {
1712       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1713         addReg(BPReg).
1714         addReg(BPReg);
1715     }
1716     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1717     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1718     // call which invalidates the stack pointer value in SP(0). So we use the
1719     // value of R31 in this case. Similar situation exists with setjmp.
1720     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1721       assert(HasFP && "Expecting a valid frame pointer.");
1722       if (!HasRedZone)
1723         RBReg = FPReg;
1724       if (!isLargeFrame) {
1725         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1726           .addReg(FPReg).addImm(FrameSize);
1727       } else {
1728         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1729         BuildMI(MBB, MBBI, dl, AddInst)
1730           .addReg(RBReg)
1731           .addReg(FPReg)
1732           .addReg(ScratchReg);
1733       }
1734     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1735       if (HasRedZone) {
1736         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1737           .addReg(SPReg)
1738           .addImm(FrameSize);
1739       } else {
1740         // Make sure that adding FrameSize will not overflow the max offset
1741         // size.
1742         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1743                "Local offsets should be negative");
1744         SPAdd = FrameSize;
1745         FPOffset += FrameSize;
1746         BPOffset += FrameSize;
1747         PBPOffset += FrameSize;
1748       }
1749     } else {
1750       // We don't want to use ScratchReg as a base register, because it
1751       // could happen to be R0. Use FP instead, but make sure to preserve it.
1752       if (!HasRedZone) {
1753         // If FP is not saved, copy it to ScratchReg.
1754         if (!HasFP)
1755           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1756             .addReg(FPReg)
1757             .addReg(FPReg);
1758         RBReg = FPReg;
1759       }
1760       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1761         .addImm(0)
1762         .addReg(SPReg);
1763     }
1764   }
1765   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1766   // If there is no red zone, ScratchReg may be needed for holding a useful
1767   // value (although not the base register). Make sure it is not overwritten
1768   // too early.
1769 
1770   // If we need to restore both the LR and the CR and we only have one
1771   // available scratch register, we must do them one at a time.
1772   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1773     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1774     // is live here.
1775     assert(HasRedZone && "Expecting red zone");
1776     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1777       .addImm(CRSaveOffset)
1778       .addReg(SPReg);
1779     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1780       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1781         .addReg(TempReg, getKillRegState(i == e-1));
1782   }
1783 
1784   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1785   // LR is stored in the caller's stack frame. ScratchReg will be needed
1786   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1787   // a base register anyway, because it may happen to be R0.
1788   bool LoadedLR = false;
1789   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1790     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1791       .addImm(LROffset+SPAdd)
1792       .addReg(RBReg);
1793     LoadedLR = true;
1794   }
1795 
1796   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1797     assert(RBReg == SPReg && "Should be using SP as a base register");
1798     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1799       .addImm(CRSaveOffset)
1800       .addReg(RBReg);
1801   }
1802 
1803   if (HasFP) {
1804     // If there is red zone, restore FP directly, since SP has already been
1805     // restored. Otherwise, restore the value of FP into ScratchReg.
1806     if (HasRedZone || RBReg == SPReg)
1807       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1808         .addImm(FPOffset)
1809         .addReg(SPReg);
1810     else
1811       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1812         .addImm(FPOffset)
1813         .addReg(RBReg);
1814   }
1815 
1816   if (FI->usesPICBase())
1817     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1818       .addImm(PBPOffset)
1819       .addReg(RBReg);
1820 
1821   if (HasBP)
1822     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1823       .addImm(BPOffset)
1824       .addReg(RBReg);
1825 
1826   // There is nothing more to be loaded from the stack, so now we can
1827   // restore SP: SP = RBReg + SPAdd.
1828   if (RBReg != SPReg || SPAdd != 0) {
1829     assert(!HasRedZone && "This should not happen with red zone");
1830     // If SPAdd is 0, generate a copy.
1831     if (SPAdd == 0)
1832       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1833         .addReg(RBReg)
1834         .addReg(RBReg);
1835     else
1836       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1837         .addReg(RBReg)
1838         .addImm(SPAdd);
1839 
1840     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1841     if (RBReg == FPReg)
1842       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1843         .addReg(ScratchReg)
1844         .addReg(ScratchReg);
1845 
1846     // Now load the LR from the caller's stack frame.
1847     if (MustSaveLR && !LoadedLR)
1848       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1849         .addImm(LROffset)
1850         .addReg(SPReg);
1851   }
1852 
1853   if (MustSaveCR &&
1854       !(SingleScratchReg && MustSaveLR))
1855     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1856       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1857         .addReg(TempReg, getKillRegState(i == e-1));
1858 
1859   if (MustSaveLR) {
1860     // If ROP protection is required, an extra instruction is added to compute a
1861     // hash and then compare it to the hash stored in the prologue.
1862     if (HasROPProtect) {
1863       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1864       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1865       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1866              "ROP hash check location offset out of range.");
1867       assert(((ImmOffset & 0x7) == 0) &&
1868              "ROP hash check location offset must be 8 byte aligned.");
1869       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1870           .addReg(ScratchReg)
1871           .addImm(ImmOffset)
1872           .addReg(SPReg);
1873     }
1874     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1875   }
1876 
1877   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1878   // call optimization
1879   if (IsReturnBlock) {
1880     unsigned RetOpcode = MBBI->getOpcode();
1881     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1882         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1883         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1884       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1885       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1886 
1887       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1888         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1889           .addReg(SPReg).addImm(CallerAllocatedAmt);
1890       } else {
1891         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1892           .addImm(CallerAllocatedAmt >> 16);
1893         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1894           .addReg(ScratchReg, RegState::Kill)
1895           .addImm(CallerAllocatedAmt & 0xFFFF);
1896         BuildMI(MBB, MBBI, dl, AddInst)
1897           .addReg(SPReg)
1898           .addReg(FPReg)
1899           .addReg(ScratchReg);
1900       }
1901     } else {
1902       createTailCallBranchInstr(MBB);
1903     }
1904   }
1905 }
1906 
1907 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1908   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1909 
1910   // If we got this far a first terminator should exist.
1911   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1912 
1913   DebugLoc dl = MBBI->getDebugLoc();
1914   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1915 
1916   // Create branch instruction for pseudo tail call return instruction.
1917   // The TCRETURNdi variants are direct calls. Valid targets for those are
1918   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1919   // since we can tail call external functions with PC-Rel (i.e. we don't need
1920   // to worry about different TOC pointers). Some of the external functions will
1921   // be MO_GlobalAddress while others like memcpy for example, are going to
1922   // be MO_ExternalSymbol.
1923   unsigned RetOpcode = MBBI->getOpcode();
1924   if (RetOpcode == PPC::TCRETURNdi) {
1925     MBBI = MBB.getLastNonDebugInstr();
1926     MachineOperand &JumpTarget = MBBI->getOperand(0);
1927     if (JumpTarget.isGlobal())
1928       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1929         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1930     else if (JumpTarget.isSymbol())
1931       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1932         addExternalSymbol(JumpTarget.getSymbolName());
1933     else
1934       llvm_unreachable("Expecting Global or External Symbol");
1935   } else if (RetOpcode == PPC::TCRETURNri) {
1936     MBBI = MBB.getLastNonDebugInstr();
1937     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1938     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1939   } else if (RetOpcode == PPC::TCRETURNai) {
1940     MBBI = MBB.getLastNonDebugInstr();
1941     MachineOperand &JumpTarget = MBBI->getOperand(0);
1942     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1943   } else if (RetOpcode == PPC::TCRETURNdi8) {
1944     MBBI = MBB.getLastNonDebugInstr();
1945     MachineOperand &JumpTarget = MBBI->getOperand(0);
1946     if (JumpTarget.isGlobal())
1947       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1948         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1949     else if (JumpTarget.isSymbol())
1950       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1951         addExternalSymbol(JumpTarget.getSymbolName());
1952     else
1953       llvm_unreachable("Expecting Global or External Symbol");
1954   } else if (RetOpcode == PPC::TCRETURNri8) {
1955     MBBI = MBB.getLastNonDebugInstr();
1956     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1957     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1958   } else if (RetOpcode == PPC::TCRETURNai8) {
1959     MBBI = MBB.getLastNonDebugInstr();
1960     MachineOperand &JumpTarget = MBBI->getOperand(0);
1961     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1962   }
1963 }
1964 
1965 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1966                                             BitVector &SavedRegs,
1967                                             RegScavenger *RS) const {
1968   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1969 
1970   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1971 
1972   // Do not explicitly save the callee saved VSRp registers.
1973   // The individual VSR subregisters will be saved instead.
1974   SavedRegs.reset(PPC::VSRp26);
1975   SavedRegs.reset(PPC::VSRp27);
1976   SavedRegs.reset(PPC::VSRp28);
1977   SavedRegs.reset(PPC::VSRp29);
1978   SavedRegs.reset(PPC::VSRp30);
1979   SavedRegs.reset(PPC::VSRp31);
1980 
1981   //  Save and clear the LR state.
1982   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1983   unsigned LR = RegInfo->getRARegister();
1984   FI->setMustSaveLR(MustSaveLR(MF, LR));
1985   SavedRegs.reset(LR);
1986 
1987   //  Save R31 if necessary
1988   int FPSI = FI->getFramePointerSaveIndex();
1989   const bool isPPC64 = Subtarget.isPPC64();
1990   MachineFrameInfo &MFI = MF.getFrameInfo();
1991 
1992   // If the frame pointer save index hasn't been defined yet.
1993   if (!FPSI && needsFP(MF)) {
1994     // Find out what the fix offset of the frame pointer save area.
1995     int FPOffset = getFramePointerSaveOffset();
1996     // Allocate the frame index for frame pointer save area.
1997     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1998     // Save the result.
1999     FI->setFramePointerSaveIndex(FPSI);
2000   }
2001 
2002   int BPSI = FI->getBasePointerSaveIndex();
2003   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2004     int BPOffset = getBasePointerSaveOffset();
2005     // Allocate the frame index for the base pointer save area.
2006     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2007     // Save the result.
2008     FI->setBasePointerSaveIndex(BPSI);
2009   }
2010 
2011   // Reserve stack space for the PIC Base register (R30).
2012   // Only used in SVR4 32-bit.
2013   if (FI->usesPICBase()) {
2014     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2015     FI->setPICBasePointerSaveIndex(PBPSI);
2016   }
2017 
2018   // Make sure we don't explicitly spill r31, because, for example, we have
2019   // some inline asm which explicitly clobbers it, when we otherwise have a
2020   // frame pointer and are using r31's spill slot for the prologue/epilogue
2021   // code. Same goes for the base pointer and the PIC base register.
2022   if (needsFP(MF))
2023     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2024   if (RegInfo->hasBasePointer(MF))
2025     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2026   if (FI->usesPICBase())
2027     SavedRegs.reset(PPC::R30);
2028 
2029   // Reserve stack space to move the linkage area to in case of a tail call.
2030   int TCSPDelta = 0;
2031   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2032       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2033     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2034   }
2035 
2036   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2037   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2038   // object at the offset of the CR-save slot in the linkage area. The actual
2039   // save and restore of the condition register will be created as part of the
2040   // prologue and epilogue insertion, but the FixedStack object is needed to
2041   // keep the CalleSavedInfo valid.
2042   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2043        SavedRegs.test(PPC::CR4))) {
2044     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2045     const int64_t SpillOffset =
2046         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2047     int FrameIdx =
2048         MFI.CreateFixedObject(SpillSize, SpillOffset,
2049                               /* IsImmutable */ true, /* IsAliased */ false);
2050     FI->setCRSpillFrameIndex(FrameIdx);
2051   }
2052 }
2053 
2054 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2055                                                        RegScavenger *RS) const {
2056   // Get callee saved register information.
2057   MachineFrameInfo &MFI = MF.getFrameInfo();
2058   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2059 
2060   // If the function is shrink-wrapped, and if the function has a tail call, the
2061   // tail call might not be in the new RestoreBlock, so real branch instruction
2062   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2063   // RestoreBlock. So we handle this case here.
2064   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2065     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2066     for (MachineBasicBlock &MBB : MF) {
2067       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2068         createTailCallBranchInstr(MBB);
2069     }
2070   }
2071 
2072   // Early exit if no callee saved registers are modified!
2073   if (CSI.empty() && !needsFP(MF)) {
2074     addScavengingSpillSlot(MF, RS);
2075     return;
2076   }
2077 
2078   unsigned MinGPR = PPC::R31;
2079   unsigned MinG8R = PPC::X31;
2080   unsigned MinFPR = PPC::F31;
2081   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2082 
2083   bool HasGPSaveArea = false;
2084   bool HasG8SaveArea = false;
2085   bool HasFPSaveArea = false;
2086   bool HasVRSaveArea = false;
2087 
2088   SmallVector<CalleeSavedInfo, 18> GPRegs;
2089   SmallVector<CalleeSavedInfo, 18> G8Regs;
2090   SmallVector<CalleeSavedInfo, 18> FPRegs;
2091   SmallVector<CalleeSavedInfo, 18> VRegs;
2092 
2093   for (const CalleeSavedInfo &I : CSI) {
2094     Register Reg = I.getReg();
2095     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2096             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2097            "Not expecting to try to spill R2 in a function that must save TOC");
2098     if (PPC::GPRCRegClass.contains(Reg)) {
2099       HasGPSaveArea = true;
2100 
2101       GPRegs.push_back(I);
2102 
2103       if (Reg < MinGPR) {
2104         MinGPR = Reg;
2105       }
2106     } else if (PPC::G8RCRegClass.contains(Reg)) {
2107       HasG8SaveArea = true;
2108 
2109       G8Regs.push_back(I);
2110 
2111       if (Reg < MinG8R) {
2112         MinG8R = Reg;
2113       }
2114     } else if (PPC::F8RCRegClass.contains(Reg)) {
2115       HasFPSaveArea = true;
2116 
2117       FPRegs.push_back(I);
2118 
2119       if (Reg < MinFPR) {
2120         MinFPR = Reg;
2121       }
2122     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2123                PPC::CRRCRegClass.contains(Reg)) {
2124       ; // do nothing, as we already know whether CRs are spilled
2125     } else if (PPC::VRRCRegClass.contains(Reg) ||
2126                PPC::SPERCRegClass.contains(Reg)) {
2127       // Altivec and SPE are mutually exclusive, but have the same stack
2128       // alignment requirements, so overload the save area for both cases.
2129       HasVRSaveArea = true;
2130 
2131       VRegs.push_back(I);
2132 
2133       if (Reg < MinVR) {
2134         MinVR = Reg;
2135       }
2136     } else {
2137       llvm_unreachable("Unknown RegisterClass!");
2138     }
2139   }
2140 
2141   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2142   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2143 
2144   int64_t LowerBound = 0;
2145 
2146   // Take into account stack space reserved for tail calls.
2147   int TCSPDelta = 0;
2148   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2149       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2150     LowerBound = TCSPDelta;
2151   }
2152 
2153   // The Floating-point register save area is right below the back chain word
2154   // of the previous stack frame.
2155   if (HasFPSaveArea) {
2156     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2157       int FI = FPRegs[i].getFrameIdx();
2158 
2159       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2160     }
2161 
2162     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2163   }
2164 
2165   // Check whether the frame pointer register is allocated. If so, make sure it
2166   // is spilled to the correct offset.
2167   if (needsFP(MF)) {
2168     int FI = PFI->getFramePointerSaveIndex();
2169     assert(FI && "No Frame Pointer Save Slot!");
2170     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2171     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2172     HasGPSaveArea = true;
2173   }
2174 
2175   if (PFI->usesPICBase()) {
2176     int FI = PFI->getPICBasePointerSaveIndex();
2177     assert(FI && "No PIC Base Pointer Save Slot!");
2178     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2179 
2180     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2181     HasGPSaveArea = true;
2182   }
2183 
2184   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2185   if (RegInfo->hasBasePointer(MF)) {
2186     int FI = PFI->getBasePointerSaveIndex();
2187     assert(FI && "No Base Pointer Save Slot!");
2188     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2189 
2190     Register BP = RegInfo->getBaseRegister(MF);
2191     if (PPC::G8RCRegClass.contains(BP)) {
2192       MinG8R = std::min<unsigned>(MinG8R, BP);
2193       HasG8SaveArea = true;
2194     } else if (PPC::GPRCRegClass.contains(BP)) {
2195       MinGPR = std::min<unsigned>(MinGPR, BP);
2196       HasGPSaveArea = true;
2197     }
2198   }
2199 
2200   // General register save area starts right below the Floating-point
2201   // register save area.
2202   if (HasGPSaveArea || HasG8SaveArea) {
2203     // Move general register save area spill slots down, taking into account
2204     // the size of the Floating-point register save area.
2205     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2206       if (!GPRegs[i].isSpilledToReg()) {
2207         int FI = GPRegs[i].getFrameIdx();
2208         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2209       }
2210     }
2211 
2212     // Move general register save area spill slots down, taking into account
2213     // the size of the Floating-point register save area.
2214     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2215       if (!G8Regs[i].isSpilledToReg()) {
2216         int FI = G8Regs[i].getFrameIdx();
2217         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2218       }
2219     }
2220 
2221     unsigned MinReg =
2222       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2223                          TRI->getEncodingValue(MinG8R));
2224 
2225     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2226     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2227   }
2228 
2229   // For 32-bit only, the CR save area is below the general register
2230   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2231   // to the stack pointer and hence does not need an adjustment here.
2232   // Only CR2 (the first nonvolatile spilled) has an associated frame
2233   // index so that we have a single uniform save area.
2234   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2235     // Adjust the frame index of the CR spill slot.
2236     for (const auto &CSInfo : CSI) {
2237       if (CSInfo.getReg() == PPC::CR2) {
2238         int FI = CSInfo.getFrameIdx();
2239         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2240         break;
2241       }
2242     }
2243 
2244     LowerBound -= 4; // The CR save area is always 4 bytes long.
2245   }
2246 
2247   // Both Altivec and SPE have the same alignment and padding requirements
2248   // within the stack frame.
2249   if (HasVRSaveArea) {
2250     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2251     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2252     // we are using negative number here (the stack grows downward). We should
2253     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2254     // is the alignment size ( n = 16 here) and y is the size after aligning.
2255     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2256     LowerBound &= ~(15);
2257 
2258     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2259       int FI = VRegs[i].getFrameIdx();
2260 
2261       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2262     }
2263   }
2264 
2265   addScavengingSpillSlot(MF, RS);
2266 }
2267 
2268 void
2269 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2270                                          RegScavenger *RS) const {
2271   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2272   // a large stack, which will require scavenging a register to materialize a
2273   // large offset.
2274 
2275   // We need to have a scavenger spill slot for spills if the frame size is
2276   // large. In case there is no free register for large-offset addressing,
2277   // this slot is used for the necessary emergency spill. Also, we need the
2278   // slot for dynamic stack allocations.
2279 
2280   // The scavenger might be invoked if the frame offset does not fit into
2281   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2282   // We don't know the complete frame size here because we've not yet computed
2283   // callee-saved register spills or the needed alignment padding.
2284   unsigned StackSize = determineFrameLayout(MF, true);
2285   MachineFrameInfo &MFI = MF.getFrameInfo();
2286   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2287 
2288   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2289       (hasSpills(MF) && NeedSpills)) {
2290     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2291     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2292     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2293     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2294     unsigned Size = TRI.getSpillSize(RC);
2295     Align Alignment = TRI.getSpillAlign(RC);
2296     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2297 
2298     // Might we have over-aligned allocas?
2299     bool HasAlVars =
2300         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2301 
2302     // These kinds of spills might need two registers.
2303     if (spillsCR(MF) || HasAlVars)
2304       RS->addScavengingFrameIndex(
2305           MFI.CreateStackObject(Size, Alignment, false));
2306   }
2307 }
2308 
2309 // This function checks if a callee saved gpr can be spilled to a volatile
2310 // vector register. This occurs for leaf functions when the option
2311 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2312 // which were not spilled to vectors, return false so the target independent
2313 // code can handle them by assigning a FrameIdx to a stack slot.
2314 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2315     MachineFunction &MF, const TargetRegisterInfo *TRI,
2316     std::vector<CalleeSavedInfo> &CSI) const {
2317 
2318   if (CSI.empty())
2319     return true; // Early exit if no callee saved registers are modified!
2320 
2321   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2322   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2323   const MachineRegisterInfo &MRI = MF.getRegInfo();
2324 
2325   if (Subtarget.hasSPE()) {
2326     // In case of SPE we only have SuperRegs and CRs
2327     // in our CalleSaveInfo vector.
2328 
2329     for (auto &CalleeSaveReg : CSI) {
2330       MCPhysReg Reg = CalleeSaveReg.getReg();
2331       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2332       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2333 
2334       if ( // Check only for SuperRegs.
2335           Lower &&
2336           // Replace Reg if only lower-32 bits modified
2337           !MRI.isPhysRegModified(Higher))
2338         CalleeSaveReg = CalleeSavedInfo(Lower);
2339     }
2340   }
2341 
2342   // Early exit if cannot spill gprs to volatile vector registers.
2343   MachineFrameInfo &MFI = MF.getFrameInfo();
2344   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2345     return false;
2346 
2347   // Build a BitVector of VSRs that can be used for spilling GPRs.
2348   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2349   BitVector BVCalleeSaved(TRI->getNumRegs());
2350   for (unsigned i = 0; CSRegs[i]; ++i)
2351     BVCalleeSaved.set(CSRegs[i]);
2352 
2353   for (unsigned Reg : BVAllocatable.set_bits()) {
2354     // Set to 0 if the register is not a volatile VSX register, or if it is
2355     // used in the function.
2356     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2357         MRI.isPhysRegUsed(Reg))
2358       BVAllocatable.reset(Reg);
2359   }
2360 
2361   bool AllSpilledToReg = true;
2362   unsigned LastVSRUsedForSpill = 0;
2363   for (auto &CS : CSI) {
2364     if (BVAllocatable.none())
2365       return false;
2366 
2367     Register Reg = CS.getReg();
2368 
2369     if (!PPC::G8RCRegClass.contains(Reg)) {
2370       AllSpilledToReg = false;
2371       continue;
2372     }
2373 
2374     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2375     // into one VSR using the mtvsrdd instruction.
2376     if (LastVSRUsedForSpill != 0) {
2377       CS.setDstReg(LastVSRUsedForSpill);
2378       BVAllocatable.reset(LastVSRUsedForSpill);
2379       LastVSRUsedForSpill = 0;
2380       continue;
2381     }
2382 
2383     unsigned VolatileVFReg = BVAllocatable.find_first();
2384     if (VolatileVFReg < BVAllocatable.size()) {
2385       CS.setDstReg(VolatileVFReg);
2386       LastVSRUsedForSpill = VolatileVFReg;
2387     } else {
2388       AllSpilledToReg = false;
2389     }
2390   }
2391   return AllSpilledToReg;
2392 }
2393 
2394 bool PPCFrameLowering::spillCalleeSavedRegisters(
2395     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2396     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2397 
2398   MachineFunction *MF = MBB.getParent();
2399   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2400   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2401   bool MustSaveTOC = FI->mustSaveTOC();
2402   DebugLoc DL;
2403   bool CRSpilled = false;
2404   MachineInstrBuilder CRMIB;
2405   BitVector Spilled(TRI->getNumRegs());
2406 
2407   VSRContainingGPRs.clear();
2408 
2409   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2410   // or two GPRs, so we need table to record information for later save/restore.
2411   for (const CalleeSavedInfo &Info : CSI) {
2412     if (Info.isSpilledToReg()) {
2413       auto &SpilledVSR =
2414           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2415       assert(SpilledVSR.second == 0 &&
2416              "Can't spill more than two GPRs into VSR!");
2417       if (SpilledVSR.first == 0)
2418         SpilledVSR.first = Info.getReg();
2419       else
2420         SpilledVSR.second = Info.getReg();
2421     }
2422   }
2423 
2424   for (const CalleeSavedInfo &I : CSI) {
2425     Register Reg = I.getReg();
2426 
2427     // CR2 through CR4 are the nonvolatile CR fields.
2428     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2429 
2430     // Add the callee-saved register as live-in; it's killed at the spill.
2431     // Do not do this for callee-saved registers that are live-in to the
2432     // function because they will already be marked live-in and this will be
2433     // adding it for a second time. It is an error to add the same register
2434     // to the set more than once.
2435     const MachineRegisterInfo &MRI = MF->getRegInfo();
2436     bool IsLiveIn = MRI.isLiveIn(Reg);
2437     if (!IsLiveIn)
2438        MBB.addLiveIn(Reg);
2439 
2440     if (CRSpilled && IsCRField) {
2441       CRMIB.addReg(Reg, RegState::ImplicitKill);
2442       continue;
2443     }
2444 
2445     // The actual spill will happen in the prologue.
2446     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2447       continue;
2448 
2449     // Insert the spill to the stack frame.
2450     if (IsCRField) {
2451       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2452       if (!Subtarget.is32BitELFABI()) {
2453         // The actual spill will happen at the start of the prologue.
2454         FuncInfo->addMustSaveCR(Reg);
2455       } else {
2456         CRSpilled = true;
2457         FuncInfo->setSpillsCR();
2458 
2459         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2460         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2461         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2462                   .addReg(Reg, RegState::ImplicitKill);
2463 
2464         MBB.insert(MI, CRMIB);
2465         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2466                                          .addReg(PPC::R12,
2467                                                  getKillRegState(true)),
2468                                          I.getFrameIdx()));
2469       }
2470     } else {
2471       if (I.isSpilledToReg()) {
2472         unsigned Dst = I.getDstReg();
2473 
2474         if (Spilled[Dst])
2475           continue;
2476 
2477         if (VSRContainingGPRs[Dst].second != 0) {
2478           assert(Subtarget.hasP9Vector() &&
2479                  "mtvsrdd is unavailable on pre-P9 targets.");
2480 
2481           NumPESpillVSR += 2;
2482           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2483               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2484               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2485         } else if (VSRContainingGPRs[Dst].second == 0) {
2486           assert(Subtarget.hasP8Vector() &&
2487                  "Can't move GPR to VSR on pre-P8 targets.");
2488 
2489           ++NumPESpillVSR;
2490           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2491                   TRI->getSubReg(Dst, PPC::sub_64))
2492               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2493         } else {
2494           llvm_unreachable("More than two GPRs spilled to a VSR!");
2495         }
2496         Spilled.set(Dst);
2497       } else {
2498         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2499         // Use !IsLiveIn for the kill flag.
2500         // We do not want to kill registers that are live in this function
2501         // before their use because they will become undefined registers.
2502         // Functions without NoUnwind need to preserve the order of elements in
2503         // saved vector registers.
2504         if (Subtarget.needsSwapsForVSXMemOps() &&
2505             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2506           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2507                                        I.getFrameIdx(), RC, TRI);
2508         else
2509           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2510                                   TRI, Register());
2511       }
2512     }
2513   }
2514   return true;
2515 }
2516 
2517 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2518                        bool CR4Spilled, MachineBasicBlock &MBB,
2519                        MachineBasicBlock::iterator MI,
2520                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2521 
2522   MachineFunction *MF = MBB.getParent();
2523   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2524   DebugLoc DL;
2525   unsigned MoveReg = PPC::R12;
2526 
2527   // 32-bit:  FP-relative
2528   MBB.insert(MI,
2529              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2530                                CSI[CSIIndex].getFrameIdx()));
2531 
2532   unsigned RestoreOp = PPC::MTOCRF;
2533   if (CR2Spilled)
2534     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2535                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2536 
2537   if (CR3Spilled)
2538     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2539                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2540 
2541   if (CR4Spilled)
2542     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2543                .addReg(MoveReg, getKillRegState(true)));
2544 }
2545 
2546 MachineBasicBlock::iterator PPCFrameLowering::
2547 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2548                               MachineBasicBlock::iterator I) const {
2549   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2550   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2551       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2552     // Add (actually subtract) back the amount the callee popped on return.
2553     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2554       bool is64Bit = Subtarget.isPPC64();
2555       CalleeAmt *= -1;
2556       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2557       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2558       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2559       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2560       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2561       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2562       const DebugLoc &dl = I->getDebugLoc();
2563 
2564       if (isInt<16>(CalleeAmt)) {
2565         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2566           .addReg(StackReg, RegState::Kill)
2567           .addImm(CalleeAmt);
2568       } else {
2569         MachineBasicBlock::iterator MBBI = I;
2570         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2571           .addImm(CalleeAmt >> 16);
2572         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2573           .addReg(TmpReg, RegState::Kill)
2574           .addImm(CalleeAmt & 0xFFFF);
2575         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2576           .addReg(StackReg, RegState::Kill)
2577           .addReg(TmpReg);
2578       }
2579     }
2580   }
2581   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2582   return MBB.erase(I);
2583 }
2584 
2585 static bool isCalleeSavedCR(unsigned Reg) {
2586   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2587 }
2588 
2589 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2590     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2591     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2592   MachineFunction *MF = MBB.getParent();
2593   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2594   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2595   bool MustSaveTOC = FI->mustSaveTOC();
2596   bool CR2Spilled = false;
2597   bool CR3Spilled = false;
2598   bool CR4Spilled = false;
2599   unsigned CSIIndex = 0;
2600   BitVector Restored(TRI->getNumRegs());
2601 
2602   // Initialize insertion-point logic; we will be restoring in reverse
2603   // order of spill.
2604   MachineBasicBlock::iterator I = MI, BeforeI = I;
2605   bool AtStart = I == MBB.begin();
2606 
2607   if (!AtStart)
2608     --BeforeI;
2609 
2610   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2611     Register Reg = CSI[i].getReg();
2612 
2613     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2614       continue;
2615 
2616     // Restore of callee saved condition register field is handled during
2617     // epilogue insertion.
2618     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2619       continue;
2620 
2621     if (Reg == PPC::CR2) {
2622       CR2Spilled = true;
2623       // The spill slot is associated only with CR2, which is the
2624       // first nonvolatile spilled.  Save it here.
2625       CSIIndex = i;
2626       continue;
2627     } else if (Reg == PPC::CR3) {
2628       CR3Spilled = true;
2629       continue;
2630     } else if (Reg == PPC::CR4) {
2631       CR4Spilled = true;
2632       continue;
2633     } else {
2634       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2635       // least one CR register, restore all spilled CRs together.
2636       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2637         bool is31 = needsFP(*MF);
2638         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2639                    CSIIndex);
2640         CR2Spilled = CR3Spilled = CR4Spilled = false;
2641       }
2642 
2643       if (CSI[i].isSpilledToReg()) {
2644         DebugLoc DL;
2645         unsigned Dst = CSI[i].getDstReg();
2646 
2647         if (Restored[Dst])
2648           continue;
2649 
2650         if (VSRContainingGPRs[Dst].second != 0) {
2651           assert(Subtarget.hasP9Vector());
2652           NumPEReloadVSR += 2;
2653           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2654                   VSRContainingGPRs[Dst].second)
2655               .addReg(Dst);
2656           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2657                   VSRContainingGPRs[Dst].first)
2658               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2659         } else if (VSRContainingGPRs[Dst].second == 0) {
2660           assert(Subtarget.hasP8Vector());
2661           ++NumPEReloadVSR;
2662           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2663                   VSRContainingGPRs[Dst].first)
2664               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2665         } else {
2666           llvm_unreachable("More than two GPRs spilled to a VSR!");
2667         }
2668 
2669         Restored.set(Dst);
2670 
2671       } else {
2672         // Default behavior for non-CR saves.
2673         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2674 
2675         // Functions without NoUnwind need to preserve the order of elements in
2676         // saved vector registers.
2677         if (Subtarget.needsSwapsForVSXMemOps() &&
2678             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2679           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2680                                         TRI);
2681         else
2682           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2683                                    Register());
2684 
2685         assert(I != MBB.begin() &&
2686                "loadRegFromStackSlot didn't insert any code!");
2687       }
2688     }
2689 
2690     // Insert in reverse order.
2691     if (AtStart)
2692       I = MBB.begin();
2693     else {
2694       I = BeforeI;
2695       ++I;
2696     }
2697   }
2698 
2699   // If we haven't yet spilled the CRs, do so now.
2700   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2701     assert(Subtarget.is32BitELFABI() &&
2702            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2703     bool is31 = needsFP(*MF);
2704     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2705   }
2706 
2707   return true;
2708 }
2709 
2710 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2711   return TOCSaveOffset;
2712 }
2713 
2714 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2715   return FramePointerSaveOffset;
2716 }
2717 
2718 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2719   return BasePointerSaveOffset;
2720 }
2721 
2722 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2723   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2724     return false;
2725   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2726 }
2727 
2728 uint64_t PPCFrameLowering::getStackThreshold() const {
2729   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2730   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2731   // Scratch register contains a signed 64-bit number, which is negative
2732   // when extending the stack and is positive when releasing the stack frame.
2733   // To make `stux` and `add` paired, the absolute value of the number contained
2734   // in the scratch register should be the same. Thus the maximum stack size
2735   // is (2^63)-1, i.e., LONG_MAX.
2736   if (Subtarget.isPPC64())
2737     return LONG_MAX;
2738 
2739   return TargetFrameLowering::getStackThreshold();
2740 }
2741