xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 59bf60519fc30d9d36c86abd83093b068f6b1e4b)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF); // No special alignment.
321 
322   // Note: for PPC32 SVR4ABI, we can still generate stackless
323   // code if all local vars are reg-allocated.
324   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
325 
326   // Check whether we can skip adjusting the stack pointer (by using red zone)
327   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
328     // No need for frame
329     return 0;
330   }
331 
332   // Get the maximum call frame size of all the calls.
333   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
334 
335   // Maximum call frame needs to be at least big enough for linkage area.
336   unsigned minCallFrameSize = getLinkageSize();
337   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
338 
339   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
340   // that allocations will be aligned.
341   if (MFI.hasVarSizedObjects())
342     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
343 
344   // Update the new max call frame size if the caller passes in a valid pointer.
345   if (NewMaxCallFrameSize)
346     *NewMaxCallFrameSize = maxCallFrameSize;
347 
348   // Include call frame size in total.
349   FrameSize += maxCallFrameSize;
350 
351   // Make sure the frame is aligned.
352   FrameSize = alignTo(FrameSize, Alignment);
353 
354   return FrameSize;
355 }
356 
357 // hasFP - Return true if the specified function actually has a dedicated frame
358 // pointer register.
359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
360   const MachineFrameInfo &MFI = MF.getFrameInfo();
361   // FIXME: This is pretty much broken by design: hasFP() might be called really
362   // early, before the stack layout was calculated and thus hasFP() might return
363   // true or false here depending on the time of call.
364   return (MFI.getStackSize()) && needsFP(MF);
365 }
366 
367 // needsFP - Return true if the specified function should have a dedicated frame
368 // pointer register.  This is true if the function has variable sized allocas or
369 // if frame pointer elimination is disabled.
370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
371   const MachineFrameInfo &MFI = MF.getFrameInfo();
372 
373   // Naked functions have no stack frame pushed, so we don't have a frame
374   // pointer.
375   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
376     return false;
377 
378   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
379          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
380          MF.exposesReturnsTwice() ||
381          (MF.getTarget().Options.GuaranteedTailCallOpt &&
382           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
383 }
384 
385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
386   bool is31 = needsFP(MF);
387   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
388   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
389 
390   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
391   bool HasBP = RegInfo->hasBasePointer(MF);
392   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
393   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
394 
395   for (MachineBasicBlock &MBB : MF)
396     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
397       --MBBI;
398       for (MachineOperand &MO : MBBI->operands()) {
399         if (!MO.isReg())
400           continue;
401 
402         switch (MO.getReg()) {
403         case PPC::FP:
404           MO.setReg(FPReg);
405           break;
406         case PPC::FP8:
407           MO.setReg(FP8Reg);
408           break;
409         case PPC::BP:
410           MO.setReg(BPReg);
411           break;
412         case PPC::BP8:
413           MO.setReg(BP8Reg);
414           break;
415 
416         }
417       }
418     }
419 }
420 
421 /*  This function will do the following:
422     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
423       respectively (defaults recommended by the ABI) and return true
424     - If MBB is not an entry block, initialize the register scavenger and look
425       for available registers.
426     - If the defaults (R0/R12) are available, return true
427     - If TwoUniqueRegsRequired is set to true, it looks for two unique
428       registers. Otherwise, look for a single available register.
429       - If the required registers are found, set SR1 and SR2 and return true.
430       - If the required registers are not found, set SR2 or both SR1 and SR2 to
431         PPC::NoRegister and return false.
432 
433     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
434     is not set, this function will attempt to find two different registers, but
435     still return true if only one register is available (and set SR1 == SR2).
436 */
437 bool
438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
439                                       bool UseAtEnd,
440                                       bool TwoUniqueRegsRequired,
441                                       Register *SR1,
442                                       Register *SR2) const {
443   RegScavenger RS;
444   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
445   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
446 
447   // Set the defaults for the two scratch registers.
448   if (SR1)
449     *SR1 = R0;
450 
451   if (SR2) {
452     assert (SR1 && "Asking for the second scratch register but not the first?");
453     *SR2 = R12;
454   }
455 
456   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
457   if ((UseAtEnd && MBB->isReturnBlock()) ||
458       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
459     return true;
460 
461   if (UseAtEnd) {
462     // The scratch register will be used before the first terminator (or at the
463     // end of the block if there are no terminators).
464     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
465     if (MBBI == MBB->begin()) {
466       RS.enterBasicBlock(*MBB);
467     } else {
468       RS.enterBasicBlockEnd(*MBB);
469       RS.backward(MBBI);
470     }
471   } else {
472     // The scratch register will be used at the start of the block.
473     RS.enterBasicBlock(*MBB);
474   }
475 
476   // If the two registers are available, we're all good.
477   // Note that we only return here if both R0 and R12 are available because
478   // although the function may not require two unique registers, it may benefit
479   // from having two so we should try to provide them.
480   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
481     return true;
482 
483   // Get the list of callee-saved registers for the target.
484   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
485   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
486 
487   // Get all the available registers in the block.
488   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
489                                      &PPC::GPRCRegClass);
490 
491   // We shouldn't use callee-saved registers as scratch registers as they may be
492   // available when looking for a candidate block for shrink wrapping but not
493   // available when the actual prologue/epilogue is being emitted because they
494   // were added as live-in to the prologue block by PrologueEpilogueInserter.
495   for (int i = 0; CSRegs[i]; ++i)
496     BV.reset(CSRegs[i]);
497 
498   // Set the first scratch register to the first available one.
499   if (SR1) {
500     int FirstScratchReg = BV.find_first();
501     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
502   }
503 
504   // If there is another one available, set the second scratch register to that.
505   // Otherwise, set it to either PPC::NoRegister if this function requires two
506   // or to whatever SR1 is set to if this function doesn't require two.
507   if (SR2) {
508     int SecondScratchReg = BV.find_next(*SR1);
509     if (SecondScratchReg != -1)
510       *SR2 = SecondScratchReg;
511     else
512       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
513   }
514 
515   // Now that we've done our best to provide both registers, double check
516   // whether we were unable to provide enough.
517   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
518     return false;
519 
520   return true;
521 }
522 
523 // We need a scratch register for spilling LR and for spilling CR. By default,
524 // we use two scratch registers to hide latency. However, if only one scratch
525 // register is available, we can adjust for that by not overlapping the spill
526 // code. However, if we need to realign the stack (i.e. have a base pointer)
527 // and the stack frame is large, we need two scratch registers.
528 // Also, stack probe requires two scratch registers, one for old sp, one for
529 // large frame and large probe size.
530 bool
531 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
532   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
533   MachineFunction &MF = *(MBB->getParent());
534   bool HasBP = RegInfo->hasBasePointer(MF);
535   unsigned FrameSize = determineFrameLayout(MF);
536   int NegFrameSize = -FrameSize;
537   bool IsLargeFrame = !isInt<16>(NegFrameSize);
538   MachineFrameInfo &MFI = MF.getFrameInfo();
539   Align MaxAlign = MFI.getMaxAlign();
540   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
541   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
542 
543   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
544          TLI.hasInlineStackProbe(MF);
545 }
546 
547 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
548   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
549 
550   return findScratchRegister(TmpMBB, false,
551                              twoUniqueScratchRegsRequired(TmpMBB));
552 }
553 
554 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
555   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
556 
557   return findScratchRegister(TmpMBB, true);
558 }
559 
560 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
561   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
562   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
563 
564   // Abort if there is no register info or function info.
565   if (!RegInfo || !FI)
566     return false;
567 
568   // Only move the stack update on ELFv2 ABI and PPC64.
569   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
570     return false;
571 
572   // Check the frame size first and return false if it does not fit the
573   // requirements.
574   // We need a non-zero frame size as well as a frame that will fit in the red
575   // zone. This is because by moving the stack pointer update we are now storing
576   // to the red zone until the stack pointer is updated. If we get an interrupt
577   // inside the prologue but before the stack update we now have a number of
578   // stores to the red zone and those stores must all fit.
579   MachineFrameInfo &MFI = MF.getFrameInfo();
580   unsigned FrameSize = MFI.getStackSize();
581   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
582     return false;
583 
584   // Frame pointers and base pointers complicate matters so don't do anything
585   // if we have them. For example having a frame pointer will sometimes require
586   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
587   // difficult. Similar situation exists with setjmp.
588   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
589     return false;
590 
591   // Calls to fast_cc functions use different rules for passing parameters on
592   // the stack from the ABI and using PIC base in the function imposes
593   // similar restrictions to using the base pointer. It is not generally safe
594   // to move the stack pointer update in these situations.
595   if (FI->hasFastCall() || FI->usesPICBase())
596     return false;
597 
598   // Finally we can move the stack update if we do not require register
599   // scavenging. Register scavenging can introduce more spills and so
600   // may make the frame size larger than we have computed.
601   return !RegInfo->requiresFrameIndexScavenging(MF);
602 }
603 
604 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
605                                     MachineBasicBlock &MBB) const {
606   MachineBasicBlock::iterator MBBI = MBB.begin();
607   MachineFrameInfo &MFI = MF.getFrameInfo();
608   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
609   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
610   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
611 
612   MachineModuleInfo &MMI = MF.getMMI();
613   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
614   DebugLoc dl;
615   // AIX assembler does not support cfi directives.
616   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
617 
618   const bool HasFastMFLR = Subtarget.hasFastMFLR();
619 
620   // Get processor type.
621   bool isPPC64 = Subtarget.isPPC64();
622   // Get the ABI.
623   bool isSVR4ABI = Subtarget.isSVR4ABI();
624   bool isELFv2ABI = Subtarget.isELFv2ABI();
625   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
626 
627   // Work out frame sizes.
628   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
629   int64_t NegFrameSize = -FrameSize;
630   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
631     llvm_unreachable("Unhandled stack size!");
632 
633   if (MFI.isFrameAddressTaken())
634     replaceFPWithRealFP(MF);
635 
636   // Check if the link register (LR) must be saved.
637   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
638   bool MustSaveLR = FI->mustSaveLR();
639   bool MustSaveTOC = FI->mustSaveTOC();
640   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
641   bool MustSaveCR = !MustSaveCRs.empty();
642   // Do we have a frame pointer and/or base pointer for this function?
643   bool HasFP = hasFP(MF);
644   bool HasBP = RegInfo->hasBasePointer(MF);
645   bool HasRedZone = isPPC64 || !isSVR4ABI;
646   bool HasROPProtect = Subtarget.hasROPProtect();
647   bool HasPrivileged = Subtarget.hasPrivileged();
648 
649   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
650   Register BPReg = RegInfo->getBaseRegister(MF);
651   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
652   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
653   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
654   Register ScratchReg;
655   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
656   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
657   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
658                                                 : PPC::MFLR );
659   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
660                                                  : PPC::STW );
661   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
662                                                      : PPC::STWU );
663   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
664                                                         : PPC::STWUX);
665   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
666                                               : PPC::OR );
667   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
668                                                             : PPC::SUBFC);
669   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
670                                                                : PPC::SUBFIC);
671   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
672                                                            : PPC::MFCR);
673   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
674   const MCInstrDesc &HashST =
675       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
676                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
677 
678   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
679   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
680   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
681   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
682   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
683          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
684 
685   // Using the same bool variable as below to suppress compiler warnings.
686   bool SingleScratchReg = findScratchRegister(
687       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
688   assert(SingleScratchReg &&
689          "Required number of registers not available in this block");
690 
691   SingleScratchReg = ScratchReg == TempReg;
692 
693   int64_t LROffset = getReturnSaveOffset();
694 
695   int64_t FPOffset = 0;
696   if (HasFP) {
697     MachineFrameInfo &MFI = MF.getFrameInfo();
698     int FPIndex = FI->getFramePointerSaveIndex();
699     assert(FPIndex && "No Frame Pointer Save Slot!");
700     FPOffset = MFI.getObjectOffset(FPIndex);
701   }
702 
703   int64_t BPOffset = 0;
704   if (HasBP) {
705     MachineFrameInfo &MFI = MF.getFrameInfo();
706     int BPIndex = FI->getBasePointerSaveIndex();
707     assert(BPIndex && "No Base Pointer Save Slot!");
708     BPOffset = MFI.getObjectOffset(BPIndex);
709   }
710 
711   int64_t PBPOffset = 0;
712   if (FI->usesPICBase()) {
713     MachineFrameInfo &MFI = MF.getFrameInfo();
714     int PBPIndex = FI->getPICBasePointerSaveIndex();
715     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
716     PBPOffset = MFI.getObjectOffset(PBPIndex);
717   }
718 
719   // Get stack alignments.
720   Align MaxAlign = MFI.getMaxAlign();
721   if (HasBP && MaxAlign > 1)
722     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
723 
724   // Frames of 32KB & larger require special handling because they cannot be
725   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
726   bool isLargeFrame = !isInt<16>(NegFrameSize);
727 
728   // Check if we can move the stack update instruction (stdu) down the prologue
729   // past the callee saves. Hopefully this will avoid the situation where the
730   // saves are waiting for the update on the store with update to complete.
731   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
732   bool MovingStackUpdateDown = false;
733 
734   // Check if we can move the stack update.
735   if (stackUpdateCanBeMoved(MF)) {
736     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
737     for (CalleeSavedInfo CSI : Info) {
738       // If the callee saved register is spilled to a register instead of the
739       // stack then the spill no longer uses the stack pointer.
740       // This can lead to two consequences:
741       // 1) We no longer need to update the stack because the function does not
742       //    spill any callee saved registers to stack.
743       // 2) We have a situation where we still have to update the stack pointer
744       //    even though some registers are spilled to other registers. In
745       //    this case the current code moves the stack update to an incorrect
746       //    position.
747       // In either case we should abort moving the stack update operation.
748       if (CSI.isSpilledToReg()) {
749         StackUpdateLoc = MBBI;
750         MovingStackUpdateDown = false;
751         break;
752       }
753 
754       int FrIdx = CSI.getFrameIdx();
755       // If the frame index is not negative the callee saved info belongs to a
756       // stack object that is not a fixed stack object. We ignore non-fixed
757       // stack objects because we won't move the stack update pointer past them.
758       if (FrIdx >= 0)
759         continue;
760 
761       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
762         StackUpdateLoc++;
763         MovingStackUpdateDown = true;
764       } else {
765         // We need all of the Frame Indices to meet these conditions.
766         // If they do not, abort the whole operation.
767         StackUpdateLoc = MBBI;
768         MovingStackUpdateDown = false;
769         break;
770       }
771     }
772 
773     // If the operation was not aborted then update the object offset.
774     if (MovingStackUpdateDown) {
775       for (CalleeSavedInfo CSI : Info) {
776         int FrIdx = CSI.getFrameIdx();
777         if (FrIdx < 0)
778           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
779       }
780     }
781   }
782 
783   // Where in the prologue we move the CR fields depends on how many scratch
784   // registers we have, and if we need to save the link register or not. This
785   // lambda is to avoid duplicating the logic in 2 places.
786   auto BuildMoveFromCR = [&]() {
787     if (isELFv2ABI && MustSaveCRs.size() == 1) {
788     // In the ELFv2 ABI, we are not required to save all CR fields.
789     // If only one CR field is clobbered, it is more efficient to use
790     // mfocrf to selectively save just that field, because mfocrf has short
791     // latency compares to mfcr.
792       assert(isPPC64 && "V2 ABI is 64-bit only.");
793       MachineInstrBuilder MIB =
794           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
795       MIB.addReg(MustSaveCRs[0], RegState::Kill);
796     } else {
797       MachineInstrBuilder MIB =
798           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
799       for (unsigned CRfield : MustSaveCRs)
800         MIB.addReg(CRfield, RegState::ImplicitKill);
801     }
802   };
803 
804   // If we need to spill the CR and the LR but we don't have two separate
805   // registers available, we must spill them one at a time
806   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
807     BuildMoveFromCR();
808     BuildMI(MBB, MBBI, dl, StoreWordInst)
809         .addReg(TempReg, getKillRegState(true))
810         .addImm(CRSaveOffset)
811         .addReg(SPReg);
812   }
813 
814   if (MustSaveLR)
815     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
816 
817   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
818     BuildMoveFromCR();
819 
820   if (HasRedZone) {
821     if (HasFP)
822       BuildMI(MBB, MBBI, dl, StoreInst)
823         .addReg(FPReg)
824         .addImm(FPOffset)
825         .addReg(SPReg);
826     if (FI->usesPICBase())
827       BuildMI(MBB, MBBI, dl, StoreInst)
828         .addReg(PPC::R30)
829         .addImm(PBPOffset)
830         .addReg(SPReg);
831     if (HasBP)
832       BuildMI(MBB, MBBI, dl, StoreInst)
833         .addReg(BPReg)
834         .addImm(BPOffset)
835         .addReg(SPReg);
836   }
837 
838   // Generate the instruction to store the LR. In the case where ROP protection
839   // is required the register holding the LR should not be killed as it will be
840   // used by the hash store instruction.
841   auto SaveLR = [&](int64_t Offset) {
842     assert(MustSaveLR && "LR is not required to be saved!");
843     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
844         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
845         .addImm(Offset)
846         .addReg(SPReg);
847 
848     // Add the ROP protection Hash Store instruction.
849     // NOTE: This is technically a violation of the ABI. The hash can be saved
850     // up to 512 bytes into the Protected Zone. This can be outside of the
851     // initial 288 byte volatile program storage region in the Protected Zone.
852     // However, this restriction will be removed in an upcoming revision of the
853     // ABI.
854     if (HasROPProtect) {
855       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
856       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
857       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
858              "ROP hash save offset out of range.");
859       assert(((ImmOffset & 0x7) == 0) &&
860              "ROP hash save offset must be 8 byte aligned.");
861       BuildMI(MBB, StackUpdateLoc, dl, HashST)
862           .addReg(ScratchReg, getKillRegState(true))
863           .addImm(ImmOffset)
864           .addReg(SPReg);
865     }
866   };
867 
868   if (MustSaveLR && HasFastMFLR)
869       SaveLR(LROffset);
870 
871   if (MustSaveCR &&
872       !(SingleScratchReg && MustSaveLR)) {
873     assert(HasRedZone && "A red zone is always available on PPC64");
874     BuildMI(MBB, MBBI, dl, StoreWordInst)
875       .addReg(TempReg, getKillRegState(true))
876       .addImm(CRSaveOffset)
877       .addReg(SPReg);
878   }
879 
880   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
881   if (!FrameSize) {
882     if (MustSaveLR && !HasFastMFLR)
883       SaveLR(LROffset);
884     return;
885   }
886 
887   // Adjust stack pointer: r1 += NegFrameSize.
888   // If there is a preferred stack alignment, align R1 now
889 
890   if (HasBP && HasRedZone) {
891     // Save a copy of r1 as the base pointer.
892     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
893       .addReg(SPReg)
894       .addReg(SPReg);
895   }
896 
897   // Have we generated a STUX instruction to claim stack frame? If so,
898   // the negated frame size will be placed in ScratchReg.
899   bool HasSTUX =
900       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
901       (HasBP && MaxAlign > 1) || isLargeFrame;
902 
903   // If we use STUX to update the stack pointer, we need the two scratch
904   // registers TempReg and ScratchReg, we have to save LR here which is stored
905   // in ScratchReg.
906   // If the offset can not be encoded into the store instruction, we also have
907   // to save LR here.
908   if (MustSaveLR && !HasFastMFLR &&
909       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
910     SaveLR(LROffset);
911 
912   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
913   // pointer is always stored at SP, we will get a free probe due to an essential
914   // STU(X) instruction.
915   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
916     // To be consistent with other targets, a pseudo instruction is emitted and
917     // will be later expanded in `inlineStackProbe`.
918     BuildMI(MBB, MBBI, dl,
919             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
920                             : PPC::PROBED_STACKALLOC_32))
921         .addDef(TempReg)
922         .addDef(ScratchReg) // ScratchReg stores the old sp.
923         .addImm(NegFrameSize);
924     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
925     // update the ScratchReg to meet the assumption that ScratchReg contains
926     // the NegFrameSize. This solution is rather tricky.
927     if (!HasRedZone) {
928       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
929           .addReg(ScratchReg)
930           .addReg(SPReg);
931     }
932   } else {
933     // This condition must be kept in sync with canUseAsPrologue.
934     if (HasBP && MaxAlign > 1) {
935       if (isPPC64)
936         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
937             .addReg(SPReg)
938             .addImm(0)
939             .addImm(64 - Log2(MaxAlign));
940       else // PPC32...
941         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
942             .addReg(SPReg)
943             .addImm(0)
944             .addImm(32 - Log2(MaxAlign))
945             .addImm(31);
946       if (!isLargeFrame) {
947         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
948             .addReg(ScratchReg, RegState::Kill)
949             .addImm(NegFrameSize);
950       } else {
951         assert(!SingleScratchReg && "Only a single scratch reg available");
952         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
953         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
954             .addReg(ScratchReg, RegState::Kill)
955             .addReg(TempReg, RegState::Kill);
956       }
957 
958       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
959           .addReg(SPReg, RegState::Kill)
960           .addReg(SPReg)
961           .addReg(ScratchReg);
962     } else if (!isLargeFrame) {
963       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
964           .addReg(SPReg)
965           .addImm(NegFrameSize)
966           .addReg(SPReg);
967     } else {
968       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
969       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
970           .addReg(SPReg, RegState::Kill)
971           .addReg(SPReg)
972           .addReg(ScratchReg);
973     }
974   }
975 
976   // Save the TOC register after the stack pointer update if a prologue TOC
977   // save is required for the function.
978   if (MustSaveTOC) {
979     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
980     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
981       .addReg(TOCReg, getKillRegState(true))
982       .addImm(TOCSaveOffset)
983       .addReg(SPReg);
984   }
985 
986   if (!HasRedZone) {
987     assert(!isPPC64 && "A red zone is always available on PPC64");
988     if (HasSTUX) {
989       // The negated frame size is in ScratchReg, and the SPReg has been
990       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
991       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
992       // the stack frame (i.e. the old SP), ideally, we would put the old
993       // SP into a register and use it as the base for the stores. The
994       // problem is that the only available register may be ScratchReg,
995       // which could be R0, and R0 cannot be used as a base address.
996 
997       // First, set ScratchReg to the old SP. This may need to be modified
998       // later.
999       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1000         .addReg(ScratchReg, RegState::Kill)
1001         .addReg(SPReg);
1002 
1003       if (ScratchReg == PPC::R0) {
1004         // R0 cannot be used as a base register, but it can be used as an
1005         // index in a store-indexed.
1006         int LastOffset = 0;
1007         if (HasFP)  {
1008           // R0 += (FPOffset-LastOffset).
1009           // Need addic, since addi treats R0 as 0.
1010           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1011             .addReg(ScratchReg)
1012             .addImm(FPOffset-LastOffset);
1013           LastOffset = FPOffset;
1014           // Store FP into *R0.
1015           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1016             .addReg(FPReg, RegState::Kill)  // Save FP.
1017             .addReg(PPC::ZERO)
1018             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1019         }
1020         if (FI->usesPICBase()) {
1021           // R0 += (PBPOffset-LastOffset).
1022           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1023             .addReg(ScratchReg)
1024             .addImm(PBPOffset-LastOffset);
1025           LastOffset = PBPOffset;
1026           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1027             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1028             .addReg(PPC::ZERO)
1029             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1030         }
1031         if (HasBP) {
1032           // R0 += (BPOffset-LastOffset).
1033           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1034             .addReg(ScratchReg)
1035             .addImm(BPOffset-LastOffset);
1036           LastOffset = BPOffset;
1037           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1038             .addReg(BPReg, RegState::Kill)  // Save BP.
1039             .addReg(PPC::ZERO)
1040             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1041           // BP = R0-LastOffset
1042           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1043             .addReg(ScratchReg, RegState::Kill)
1044             .addImm(-LastOffset);
1045         }
1046       } else {
1047         // ScratchReg is not R0, so use it as the base register. It is
1048         // already set to the old SP, so we can use the offsets directly.
1049 
1050         // Now that the stack frame has been allocated, save all the necessary
1051         // registers using ScratchReg as the base address.
1052         if (HasFP)
1053           BuildMI(MBB, MBBI, dl, StoreInst)
1054             .addReg(FPReg)
1055             .addImm(FPOffset)
1056             .addReg(ScratchReg);
1057         if (FI->usesPICBase())
1058           BuildMI(MBB, MBBI, dl, StoreInst)
1059             .addReg(PPC::R30)
1060             .addImm(PBPOffset)
1061             .addReg(ScratchReg);
1062         if (HasBP) {
1063           BuildMI(MBB, MBBI, dl, StoreInst)
1064             .addReg(BPReg)
1065             .addImm(BPOffset)
1066             .addReg(ScratchReg);
1067           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1068             .addReg(ScratchReg, RegState::Kill)
1069             .addReg(ScratchReg);
1070         }
1071       }
1072     } else {
1073       // The frame size is a known 16-bit constant (fitting in the immediate
1074       // field of STWU). To be here we have to be compiling for PPC32.
1075       // Since the SPReg has been decreased by FrameSize, add it back to each
1076       // offset.
1077       if (HasFP)
1078         BuildMI(MBB, MBBI, dl, StoreInst)
1079           .addReg(FPReg)
1080           .addImm(FrameSize + FPOffset)
1081           .addReg(SPReg);
1082       if (FI->usesPICBase())
1083         BuildMI(MBB, MBBI, dl, StoreInst)
1084           .addReg(PPC::R30)
1085           .addImm(FrameSize + PBPOffset)
1086           .addReg(SPReg);
1087       if (HasBP) {
1088         BuildMI(MBB, MBBI, dl, StoreInst)
1089           .addReg(BPReg)
1090           .addImm(FrameSize + BPOffset)
1091           .addReg(SPReg);
1092         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1093           .addReg(SPReg)
1094           .addImm(FrameSize);
1095       }
1096     }
1097   }
1098 
1099   // Save the LR now.
1100   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1101     SaveLR(LROffset + FrameSize);
1102 
1103   // Add Call Frame Information for the instructions we generated above.
1104   if (needsCFI) {
1105     unsigned CFIIndex;
1106 
1107     if (HasBP) {
1108       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1109       // because if the stack needed aligning then CFA won't be at a fixed
1110       // offset from FP/SP.
1111       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1112       CFIIndex = MF.addFrameInst(
1113           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1114     } else {
1115       // Adjust the definition of CFA to account for the change in SP.
1116       assert(NegFrameSize);
1117       CFIIndex = MF.addFrameInst(
1118           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1119     }
1120     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1121         .addCFIIndex(CFIIndex);
1122 
1123     if (HasFP) {
1124       // Describe where FP was saved, at a fixed offset from CFA.
1125       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1126       CFIIndex = MF.addFrameInst(
1127           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1128       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1129           .addCFIIndex(CFIIndex);
1130     }
1131 
1132     if (FI->usesPICBase()) {
1133       // Describe where FP was saved, at a fixed offset from CFA.
1134       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1135       CFIIndex = MF.addFrameInst(
1136           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1137       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138           .addCFIIndex(CFIIndex);
1139     }
1140 
1141     if (HasBP) {
1142       // Describe where BP was saved, at a fixed offset from CFA.
1143       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1144       CFIIndex = MF.addFrameInst(
1145           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1146       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147           .addCFIIndex(CFIIndex);
1148     }
1149 
1150     if (MustSaveLR) {
1151       // Describe where LR was saved, at a fixed offset from CFA.
1152       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1153       CFIIndex = MF.addFrameInst(
1154           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1155       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1156           .addCFIIndex(CFIIndex);
1157     }
1158   }
1159 
1160   // If there is a frame pointer, copy R1 into R31
1161   if (HasFP) {
1162     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1163       .addReg(SPReg)
1164       .addReg(SPReg);
1165 
1166     if (!HasBP && needsCFI) {
1167       // Change the definition of CFA from SP+offset to FP+offset, because SP
1168       // will change at every alloca.
1169       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1170       unsigned CFIIndex = MF.addFrameInst(
1171           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1172 
1173       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1174           .addCFIIndex(CFIIndex);
1175     }
1176   }
1177 
1178   if (needsCFI) {
1179     // Describe where callee saved registers were saved, at fixed offsets from
1180     // CFA.
1181     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1182     for (const CalleeSavedInfo &I : CSI) {
1183       Register Reg = I.getReg();
1184       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1185 
1186       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1187       // subregisters of CR2. We just need to emit a move of CR2.
1188       if (PPC::CRBITRCRegClass.contains(Reg))
1189         continue;
1190 
1191       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1192         continue;
1193 
1194       // For SVR4, don't emit a move for the CR spill slot if we haven't
1195       // spilled CRs.
1196       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1197           && !MustSaveCR)
1198         continue;
1199 
1200       // For 64-bit SVR4 when we have spilled CRs, the spill location
1201       // is SP+8, not a frame-relative slot.
1202       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1203         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1204         // the whole CR word.  In the ELFv2 ABI, every CR that was
1205         // actually saved gets its own CFI record.
1206         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1207         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1208             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1209         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1210             .addCFIIndex(CFIIndex);
1211         continue;
1212       }
1213 
1214       if (I.isSpilledToReg()) {
1215         unsigned SpilledReg = I.getDstReg();
1216         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1217             nullptr, MRI->getDwarfRegNum(Reg, true),
1218             MRI->getDwarfRegNum(SpilledReg, true)));
1219         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1220           .addCFIIndex(CFIRegister);
1221       } else {
1222         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1223         // We have changed the object offset above but we do not want to change
1224         // the actual offsets in the CFI instruction so we have to undo the
1225         // offset change here.
1226         if (MovingStackUpdateDown)
1227           Offset -= NegFrameSize;
1228 
1229         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1230             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1231         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1232             .addCFIIndex(CFIIndex);
1233       }
1234     }
1235   }
1236 }
1237 
1238 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1239                                         MachineBasicBlock &PrologMBB) const {
1240   bool isPPC64 = Subtarget.isPPC64();
1241   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1242   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1243   MachineFrameInfo &MFI = MF.getFrameInfo();
1244   MachineModuleInfo &MMI = MF.getMMI();
1245   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1246   // AIX assembler does not support cfi directives.
1247   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1248   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1249     int Opc = MI.getOpcode();
1250     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1251   });
1252   if (StackAllocMIPos == PrologMBB.end())
1253     return;
1254   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1255   MachineBasicBlock *CurrentMBB = &PrologMBB;
1256   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1257   MachineInstr &MI = *StackAllocMIPos;
1258   int64_t NegFrameSize = MI.getOperand(2).getImm();
1259   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1260   int64_t NegProbeSize = -(int64_t)ProbeSize;
1261   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1262   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1263   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1264   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1265   Register ScratchReg = MI.getOperand(0).getReg();
1266   Register FPReg = MI.getOperand(1).getReg();
1267   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1268   bool HasBP = RegInfo->hasBasePointer(MF);
1269   Register BPReg = RegInfo->getBaseRegister(MF);
1270   Align MaxAlign = MFI.getMaxAlign();
1271   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1272   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1273   // Subroutines to generate .cfi_* directives.
1274   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1275                             MachineBasicBlock::iterator MBBI, Register Reg) {
1276     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1277     unsigned CFIIndex = MF.addFrameInst(
1278         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1279     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1280         .addCFIIndex(CFIIndex);
1281   };
1282   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1283                          MachineBasicBlock::iterator MBBI, Register Reg,
1284                          int Offset) {
1285     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1286     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1287         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1288     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1289         .addCFIIndex(CFIIndex);
1290   };
1291   // Subroutine to determine if we can use the Imm as part of d-form.
1292   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1293   // Subroutine to materialize the Imm into TempReg.
1294   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1295                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1296                             Register &TempReg) {
1297     assert(isInt<32>(Imm) && "Unhandled imm");
1298     if (isInt<16>(Imm))
1299       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1300           .addImm(Imm);
1301     else {
1302       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1303           .addImm(Imm >> 16);
1304       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1305           .addReg(TempReg)
1306           .addImm(Imm & 0xFFFF);
1307     }
1308   };
1309   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1310   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1311                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1312                               Register NegSizeReg, bool UseDForm,
1313                               Register StoreReg) {
1314     if (UseDForm)
1315       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1316           .addReg(StoreReg)
1317           .addImm(NegSize)
1318           .addReg(SPReg);
1319     else
1320       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1321           .addReg(StoreReg)
1322           .addReg(SPReg)
1323           .addReg(NegSizeReg);
1324   };
1325   // Used to probe stack when realignment is required.
1326   // Note that, according to ABI's requirement, *sp must always equals the
1327   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1328   // Following is pseudo code:
1329   // final_sp = (sp & align) + negframesize;
1330   // neg_gap = final_sp - sp;
1331   // while (neg_gap < negprobesize) {
1332   //   stdu fp, negprobesize(sp);
1333   //   neg_gap -= negprobesize;
1334   // }
1335   // stdux fp, sp, neg_gap
1336   //
1337   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1338   // before probe code, we don't need to save it, so we get one additional reg
1339   // that can be used to materialize the probeside if needed to use xform.
1340   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1341   // now.
1342   //
1343   // The allocations are:
1344   // if (HasBP && HasRedzone) {
1345   //   r0: materialize the probesize if needed so that we can use xform.
1346   //   r12: `neg_gap`
1347   // } else {
1348   //   r0: back-chain pointer
1349   //   r12: `neg_gap`.
1350   // }
1351   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1352                                  MachineBasicBlock::iterator MBBI,
1353                                  Register ScratchReg, Register TempReg) {
1354     assert(HasBP && "The function is supposed to have base pointer when its "
1355                     "stack is realigned.");
1356     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1357 
1358     // FIXME: We can eliminate this limitation if we get more infomation about
1359     // which part of redzone are already used. Used redzone can be treated
1360     // probed. But there might be `holes' in redzone probed, this could
1361     // complicate the implementation.
1362     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1363            "Probe size should be larger or equal to the size of red-zone so "
1364            "that red-zone is not clobbered by probing.");
1365 
1366     Register &FinalStackPtr = TempReg;
1367     // FIXME: We only support NegProbeSize materializable by DForm currently.
1368     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1369     // register.
1370     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1371     assert(isInt<16>(NegProbeSize) &&
1372            "NegProbeSize should be materializable by DForm");
1373     Register CRReg = PPC::CR0;
1374     // Layout of output assembly kinda like:
1375     // bb.0:
1376     //   ...
1377     //   sub $scratchreg, $finalsp, r1
1378     //   cmpdi $scratchreg, <negprobesize>
1379     //   bge bb.2
1380     // bb.1:
1381     //   stdu <backchain>, <negprobesize>(r1)
1382     //   sub $scratchreg, $scratchreg, negprobesize
1383     //   cmpdi $scratchreg, <negprobesize>
1384     //   blt bb.1
1385     // bb.2:
1386     //   stdux <backchain>, r1, $scratchreg
1387     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1388     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1389     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1390     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1391     MF.insert(MBBInsertPoint, ProbeExitMBB);
1392     // bb.2
1393     {
1394       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1395       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1396                        BackChainPointer);
1397       if (HasRedZone)
1398         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1399         // to TempReg to satisfy it.
1400         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1401             .addReg(BPReg)
1402             .addReg(BPReg);
1403       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1404       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1405     }
1406     // bb.0
1407     {
1408       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1409           .addReg(SPReg)
1410           .addReg(FinalStackPtr);
1411       if (!HasRedZone)
1412         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1413       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1414           .addReg(ScratchReg)
1415           .addImm(NegProbeSize);
1416       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1417           .addImm(PPC::PRED_GE)
1418           .addReg(CRReg)
1419           .addMBB(ProbeExitMBB);
1420       MBB.addSuccessor(ProbeLoopBodyMBB);
1421       MBB.addSuccessor(ProbeExitMBB);
1422     }
1423     // bb.1
1424     {
1425       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1426       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1427                        0, true /*UseDForm*/, BackChainPointer);
1428       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1429               ScratchReg)
1430           .addReg(ScratchReg)
1431           .addImm(-NegProbeSize);
1432       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1433               CRReg)
1434           .addReg(ScratchReg)
1435           .addImm(NegProbeSize);
1436       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1437           .addImm(PPC::PRED_LT)
1438           .addReg(CRReg)
1439           .addMBB(ProbeLoopBodyMBB);
1440       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1441       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1442     }
1443     // Update liveins.
1444     recomputeLiveIns(MF);
1445     return ProbeExitMBB;
1446   };
1447   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1448   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1449   // the offset subtracted from SP is determined by SP's runtime value.
1450   if (HasBP && MaxAlign > 1) {
1451     // Calculate final stack pointer.
1452     if (isPPC64)
1453       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1454           .addReg(SPReg)
1455           .addImm(0)
1456           .addImm(64 - Log2(MaxAlign));
1457     else
1458       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1459           .addReg(SPReg)
1460           .addImm(0)
1461           .addImm(32 - Log2(MaxAlign))
1462           .addImm(31);
1463     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1464             FPReg)
1465         .addReg(ScratchReg)
1466         .addReg(SPReg);
1467     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1468     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1469             FPReg)
1470         .addReg(ScratchReg)
1471         .addReg(FPReg);
1472     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1473     if (needsCFI)
1474       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1475   } else {
1476     // Initialize current frame pointer.
1477     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1478     // Use FPReg to calculate CFA.
1479     if (needsCFI)
1480       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1481     // Probe residual part.
1482     if (NegResidualSize) {
1483       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1484       if (!ResidualUseDForm)
1485         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1486       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1487                        ResidualUseDForm, FPReg);
1488     }
1489     bool UseDForm = CanUseDForm(NegProbeSize);
1490     // If number of blocks is small, just probe them directly.
1491     if (NumBlocks < 3) {
1492       if (!UseDForm)
1493         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1494       for (int i = 0; i < NumBlocks; ++i)
1495         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1496                          FPReg);
1497       if (needsCFI) {
1498         // Restore using SPReg to calculate CFA.
1499         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1500       }
1501     } else {
1502       // Since CTR is a volatile register and current shrinkwrap implementation
1503       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1504       // CTR loop to probe.
1505       // Calculate trip count and stores it in CTRReg.
1506       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1507       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1508           .addReg(ScratchReg, RegState::Kill);
1509       if (!UseDForm)
1510         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1511       // Create MBBs of the loop.
1512       MachineFunction::iterator MBBInsertPoint =
1513           std::next(CurrentMBB->getIterator());
1514       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1515       MF.insert(MBBInsertPoint, LoopMBB);
1516       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1517       MF.insert(MBBInsertPoint, ExitMBB);
1518       // Synthesize the loop body.
1519       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1520                        UseDForm, FPReg);
1521       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1522           .addMBB(LoopMBB);
1523       LoopMBB->addSuccessor(ExitMBB);
1524       LoopMBB->addSuccessor(LoopMBB);
1525       // Synthesize the exit MBB.
1526       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1527                       std::next(MachineBasicBlock::iterator(MI)),
1528                       CurrentMBB->end());
1529       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1530       CurrentMBB->addSuccessor(LoopMBB);
1531       if (needsCFI) {
1532         // Restore using SPReg to calculate CFA.
1533         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1534       }
1535       // Update liveins.
1536       recomputeLiveIns(MF);
1537     }
1538   }
1539   ++NumPrologProbed;
1540   MI.eraseFromParent();
1541 }
1542 
1543 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1544                                     MachineBasicBlock &MBB) const {
1545   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1546   DebugLoc dl;
1547 
1548   if (MBBI != MBB.end())
1549     dl = MBBI->getDebugLoc();
1550 
1551   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1552   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1553 
1554   // Get alignment info so we know how to restore the SP.
1555   const MachineFrameInfo &MFI = MF.getFrameInfo();
1556 
1557   // Get the number of bytes allocated from the FrameInfo.
1558   int64_t FrameSize = MFI.getStackSize();
1559 
1560   // Get processor type.
1561   bool isPPC64 = Subtarget.isPPC64();
1562 
1563   // Check if the link register (LR) has been saved.
1564   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1565   bool MustSaveLR = FI->mustSaveLR();
1566   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1567   bool MustSaveCR = !MustSaveCRs.empty();
1568   // Do we have a frame pointer and/or base pointer for this function?
1569   bool HasFP = hasFP(MF);
1570   bool HasBP = RegInfo->hasBasePointer(MF);
1571   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1572   bool HasROPProtect = Subtarget.hasROPProtect();
1573   bool HasPrivileged = Subtarget.hasPrivileged();
1574 
1575   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1576   Register BPReg = RegInfo->getBaseRegister(MF);
1577   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1578   Register ScratchReg;
1579   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1580   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1581                                                  : PPC::MTLR );
1582   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1583                                                  : PPC::LWZ );
1584   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1585                                                            : PPC::LIS );
1586   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1587                                               : PPC::OR );
1588   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1589                                                   : PPC::ORI );
1590   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1591                                                    : PPC::ADDI );
1592   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1593                                                 : PPC::ADD4 );
1594   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1595                                                      : PPC::LWZ);
1596   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1597                                                      : PPC::MTOCRF);
1598   const MCInstrDesc &HashChk =
1599       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1600                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1601   int64_t LROffset = getReturnSaveOffset();
1602 
1603   int64_t FPOffset = 0;
1604 
1605   // Using the same bool variable as below to suppress compiler warnings.
1606   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1607                                               &TempReg);
1608   assert(SingleScratchReg &&
1609          "Could not find an available scratch register");
1610 
1611   SingleScratchReg = ScratchReg == TempReg;
1612 
1613   if (HasFP) {
1614     int FPIndex = FI->getFramePointerSaveIndex();
1615     assert(FPIndex && "No Frame Pointer Save Slot!");
1616     FPOffset = MFI.getObjectOffset(FPIndex);
1617   }
1618 
1619   int64_t BPOffset = 0;
1620   if (HasBP) {
1621       int BPIndex = FI->getBasePointerSaveIndex();
1622       assert(BPIndex && "No Base Pointer Save Slot!");
1623       BPOffset = MFI.getObjectOffset(BPIndex);
1624   }
1625 
1626   int64_t PBPOffset = 0;
1627   if (FI->usesPICBase()) {
1628     int PBPIndex = FI->getPICBasePointerSaveIndex();
1629     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1630     PBPOffset = MFI.getObjectOffset(PBPIndex);
1631   }
1632 
1633   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1634 
1635   if (IsReturnBlock) {
1636     unsigned RetOpcode = MBBI->getOpcode();
1637     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1638                       RetOpcode == PPC::TCRETURNdi ||
1639                       RetOpcode == PPC::TCRETURNai ||
1640                       RetOpcode == PPC::TCRETURNri8 ||
1641                       RetOpcode == PPC::TCRETURNdi8 ||
1642                       RetOpcode == PPC::TCRETURNai8;
1643 
1644     if (UsesTCRet) {
1645       int MaxTCRetDelta = FI->getTailCallSPDelta();
1646       MachineOperand &StackAdjust = MBBI->getOperand(1);
1647       assert(StackAdjust.isImm() && "Expecting immediate value.");
1648       // Adjust stack pointer.
1649       int StackAdj = StackAdjust.getImm();
1650       int Delta = StackAdj - MaxTCRetDelta;
1651       assert((Delta >= 0) && "Delta must be positive");
1652       if (MaxTCRetDelta>0)
1653         FrameSize += (StackAdj +Delta);
1654       else
1655         FrameSize += StackAdj;
1656     }
1657   }
1658 
1659   // Frames of 32KB & larger require special handling because they cannot be
1660   // indexed into with a simple LD/LWZ immediate offset operand.
1661   bool isLargeFrame = !isInt<16>(FrameSize);
1662 
1663   // On targets without red zone, the SP needs to be restored last, so that
1664   // all live contents of the stack frame are upwards of the SP. This means
1665   // that we cannot restore SP just now, since there may be more registers
1666   // to restore from the stack frame (e.g. R31). If the frame size is not
1667   // a simple immediate value, we will need a spare register to hold the
1668   // restored SP. If the frame size is known and small, we can simply adjust
1669   // the offsets of the registers to be restored, and still use SP to restore
1670   // them. In such case, the final update of SP will be to add the frame
1671   // size to it.
1672   // To simplify the code, set RBReg to the base register used to restore
1673   // values from the stack, and set SPAdd to the value that needs to be added
1674   // to the SP at the end. The default values are as if red zone was present.
1675   unsigned RBReg = SPReg;
1676   uint64_t SPAdd = 0;
1677 
1678   // Check if we can move the stack update instruction up the epilogue
1679   // past the callee saves. This will allow the move to LR instruction
1680   // to be executed before the restores of the callee saves which means
1681   // that the callee saves can hide the latency from the MTLR instrcution.
1682   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1683   if (stackUpdateCanBeMoved(MF)) {
1684     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1685     for (CalleeSavedInfo CSI : Info) {
1686       // If the callee saved register is spilled to another register abort the
1687       // stack update movement.
1688       if (CSI.isSpilledToReg()) {
1689         StackUpdateLoc = MBBI;
1690         break;
1691       }
1692       int FrIdx = CSI.getFrameIdx();
1693       // If the frame index is not negative the callee saved info belongs to a
1694       // stack object that is not a fixed stack object. We ignore non-fixed
1695       // stack objects because we won't move the update of the stack pointer
1696       // past them.
1697       if (FrIdx >= 0)
1698         continue;
1699 
1700       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1701         StackUpdateLoc--;
1702       else {
1703         // Abort the operation as we can't update all CSR restores.
1704         StackUpdateLoc = MBBI;
1705         break;
1706       }
1707     }
1708   }
1709 
1710   if (FrameSize) {
1711     // In the prologue, the loaded (or persistent) stack pointer value is
1712     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1713     // zone add this offset back now.
1714 
1715     // If the function has a base pointer, the stack pointer has been copied
1716     // to it so we can restore it by copying in the other direction.
1717     if (HasRedZone && HasBP) {
1718       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1719         addReg(BPReg).
1720         addReg(BPReg);
1721     }
1722     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1723     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1724     // call which invalidates the stack pointer value in SP(0). So we use the
1725     // value of R31 in this case. Similar situation exists with setjmp.
1726     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1727       assert(HasFP && "Expecting a valid frame pointer.");
1728       if (!HasRedZone)
1729         RBReg = FPReg;
1730       if (!isLargeFrame) {
1731         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1732           .addReg(FPReg).addImm(FrameSize);
1733       } else {
1734         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1735         BuildMI(MBB, MBBI, dl, AddInst)
1736           .addReg(RBReg)
1737           .addReg(FPReg)
1738           .addReg(ScratchReg);
1739       }
1740     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1741       if (HasRedZone) {
1742         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1743           .addReg(SPReg)
1744           .addImm(FrameSize);
1745       } else {
1746         // Make sure that adding FrameSize will not overflow the max offset
1747         // size.
1748         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1749                "Local offsets should be negative");
1750         SPAdd = FrameSize;
1751         FPOffset += FrameSize;
1752         BPOffset += FrameSize;
1753         PBPOffset += FrameSize;
1754       }
1755     } else {
1756       // We don't want to use ScratchReg as a base register, because it
1757       // could happen to be R0. Use FP instead, but make sure to preserve it.
1758       if (!HasRedZone) {
1759         // If FP is not saved, copy it to ScratchReg.
1760         if (!HasFP)
1761           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1762             .addReg(FPReg)
1763             .addReg(FPReg);
1764         RBReg = FPReg;
1765       }
1766       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1767         .addImm(0)
1768         .addReg(SPReg);
1769     }
1770   }
1771   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1772   // If there is no red zone, ScratchReg may be needed for holding a useful
1773   // value (although not the base register). Make sure it is not overwritten
1774   // too early.
1775 
1776   // If we need to restore both the LR and the CR and we only have one
1777   // available scratch register, we must do them one at a time.
1778   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1779     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1780     // is live here.
1781     assert(HasRedZone && "Expecting red zone");
1782     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1783       .addImm(CRSaveOffset)
1784       .addReg(SPReg);
1785     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1786       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1787         .addReg(TempReg, getKillRegState(i == e-1));
1788   }
1789 
1790   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1791   // LR is stored in the caller's stack frame. ScratchReg will be needed
1792   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1793   // a base register anyway, because it may happen to be R0.
1794   bool LoadedLR = false;
1795   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1796     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1797       .addImm(LROffset+SPAdd)
1798       .addReg(RBReg);
1799     LoadedLR = true;
1800   }
1801 
1802   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1803     assert(RBReg == SPReg && "Should be using SP as a base register");
1804     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1805       .addImm(CRSaveOffset)
1806       .addReg(RBReg);
1807   }
1808 
1809   if (HasFP) {
1810     // If there is red zone, restore FP directly, since SP has already been
1811     // restored. Otherwise, restore the value of FP into ScratchReg.
1812     if (HasRedZone || RBReg == SPReg)
1813       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1814         .addImm(FPOffset)
1815         .addReg(SPReg);
1816     else
1817       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1818         .addImm(FPOffset)
1819         .addReg(RBReg);
1820   }
1821 
1822   if (FI->usesPICBase())
1823     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1824       .addImm(PBPOffset)
1825       .addReg(RBReg);
1826 
1827   if (HasBP)
1828     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1829       .addImm(BPOffset)
1830       .addReg(RBReg);
1831 
1832   // There is nothing more to be loaded from the stack, so now we can
1833   // restore SP: SP = RBReg + SPAdd.
1834   if (RBReg != SPReg || SPAdd != 0) {
1835     assert(!HasRedZone && "This should not happen with red zone");
1836     // If SPAdd is 0, generate a copy.
1837     if (SPAdd == 0)
1838       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1839         .addReg(RBReg)
1840         .addReg(RBReg);
1841     else
1842       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1843         .addReg(RBReg)
1844         .addImm(SPAdd);
1845 
1846     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1847     if (RBReg == FPReg)
1848       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1849         .addReg(ScratchReg)
1850         .addReg(ScratchReg);
1851 
1852     // Now load the LR from the caller's stack frame.
1853     if (MustSaveLR && !LoadedLR)
1854       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1855         .addImm(LROffset)
1856         .addReg(SPReg);
1857   }
1858 
1859   if (MustSaveCR &&
1860       !(SingleScratchReg && MustSaveLR))
1861     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1862       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1863         .addReg(TempReg, getKillRegState(i == e-1));
1864 
1865   if (MustSaveLR) {
1866     // If ROP protection is required, an extra instruction is added to compute a
1867     // hash and then compare it to the hash stored in the prologue.
1868     if (HasROPProtect) {
1869       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1870       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1871       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1872              "ROP hash check location offset out of range.");
1873       assert(((ImmOffset & 0x7) == 0) &&
1874              "ROP hash check location offset must be 8 byte aligned.");
1875       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1876           .addReg(ScratchReg)
1877           .addImm(ImmOffset)
1878           .addReg(SPReg);
1879     }
1880     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1881   }
1882 
1883   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1884   // call optimization
1885   if (IsReturnBlock) {
1886     unsigned RetOpcode = MBBI->getOpcode();
1887     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1888         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1889         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1890       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1891       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1892 
1893       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1894         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1895           .addReg(SPReg).addImm(CallerAllocatedAmt);
1896       } else {
1897         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1898           .addImm(CallerAllocatedAmt >> 16);
1899         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1900           .addReg(ScratchReg, RegState::Kill)
1901           .addImm(CallerAllocatedAmt & 0xFFFF);
1902         BuildMI(MBB, MBBI, dl, AddInst)
1903           .addReg(SPReg)
1904           .addReg(FPReg)
1905           .addReg(ScratchReg);
1906       }
1907     } else {
1908       createTailCallBranchInstr(MBB);
1909     }
1910   }
1911 }
1912 
1913 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1914   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1915 
1916   // If we got this far a first terminator should exist.
1917   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1918 
1919   DebugLoc dl = MBBI->getDebugLoc();
1920   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1921 
1922   // Create branch instruction for pseudo tail call return instruction.
1923   // The TCRETURNdi variants are direct calls. Valid targets for those are
1924   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1925   // since we can tail call external functions with PC-Rel (i.e. we don't need
1926   // to worry about different TOC pointers). Some of the external functions will
1927   // be MO_GlobalAddress while others like memcpy for example, are going to
1928   // be MO_ExternalSymbol.
1929   unsigned RetOpcode = MBBI->getOpcode();
1930   if (RetOpcode == PPC::TCRETURNdi) {
1931     MBBI = MBB.getLastNonDebugInstr();
1932     MachineOperand &JumpTarget = MBBI->getOperand(0);
1933     if (JumpTarget.isGlobal())
1934       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1935         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1936     else if (JumpTarget.isSymbol())
1937       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1938         addExternalSymbol(JumpTarget.getSymbolName());
1939     else
1940       llvm_unreachable("Expecting Global or External Symbol");
1941   } else if (RetOpcode == PPC::TCRETURNri) {
1942     MBBI = MBB.getLastNonDebugInstr();
1943     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1944     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1945   } else if (RetOpcode == PPC::TCRETURNai) {
1946     MBBI = MBB.getLastNonDebugInstr();
1947     MachineOperand &JumpTarget = MBBI->getOperand(0);
1948     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1949   } else if (RetOpcode == PPC::TCRETURNdi8) {
1950     MBBI = MBB.getLastNonDebugInstr();
1951     MachineOperand &JumpTarget = MBBI->getOperand(0);
1952     if (JumpTarget.isGlobal())
1953       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1954         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1955     else if (JumpTarget.isSymbol())
1956       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1957         addExternalSymbol(JumpTarget.getSymbolName());
1958     else
1959       llvm_unreachable("Expecting Global or External Symbol");
1960   } else if (RetOpcode == PPC::TCRETURNri8) {
1961     MBBI = MBB.getLastNonDebugInstr();
1962     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1963     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1964   } else if (RetOpcode == PPC::TCRETURNai8) {
1965     MBBI = MBB.getLastNonDebugInstr();
1966     MachineOperand &JumpTarget = MBBI->getOperand(0);
1967     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1968   }
1969 }
1970 
1971 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1972                                             BitVector &SavedRegs,
1973                                             RegScavenger *RS) const {
1974   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1975 
1976   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1977 
1978   // Do not explicitly save the callee saved VSRp registers.
1979   // The individual VSR subregisters will be saved instead.
1980   SavedRegs.reset(PPC::VSRp26);
1981   SavedRegs.reset(PPC::VSRp27);
1982   SavedRegs.reset(PPC::VSRp28);
1983   SavedRegs.reset(PPC::VSRp29);
1984   SavedRegs.reset(PPC::VSRp30);
1985   SavedRegs.reset(PPC::VSRp31);
1986 
1987   //  Save and clear the LR state.
1988   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1989   unsigned LR = RegInfo->getRARegister();
1990   FI->setMustSaveLR(MustSaveLR(MF, LR));
1991   SavedRegs.reset(LR);
1992 
1993   //  Save R31 if necessary
1994   int FPSI = FI->getFramePointerSaveIndex();
1995   const bool isPPC64 = Subtarget.isPPC64();
1996   MachineFrameInfo &MFI = MF.getFrameInfo();
1997 
1998   // If the frame pointer save index hasn't been defined yet.
1999   if (!FPSI && needsFP(MF)) {
2000     // Find out what the fix offset of the frame pointer save area.
2001     int FPOffset = getFramePointerSaveOffset();
2002     // Allocate the frame index for frame pointer save area.
2003     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2004     // Save the result.
2005     FI->setFramePointerSaveIndex(FPSI);
2006   }
2007 
2008   int BPSI = FI->getBasePointerSaveIndex();
2009   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2010     int BPOffset = getBasePointerSaveOffset();
2011     // Allocate the frame index for the base pointer save area.
2012     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2013     // Save the result.
2014     FI->setBasePointerSaveIndex(BPSI);
2015   }
2016 
2017   // Reserve stack space for the PIC Base register (R30).
2018   // Only used in SVR4 32-bit.
2019   if (FI->usesPICBase()) {
2020     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2021     FI->setPICBasePointerSaveIndex(PBPSI);
2022   }
2023 
2024   // Make sure we don't explicitly spill r31, because, for example, we have
2025   // some inline asm which explicitly clobbers it, when we otherwise have a
2026   // frame pointer and are using r31's spill slot for the prologue/epilogue
2027   // code. Same goes for the base pointer and the PIC base register.
2028   if (needsFP(MF))
2029     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2030   if (RegInfo->hasBasePointer(MF))
2031     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2032   if (FI->usesPICBase())
2033     SavedRegs.reset(PPC::R30);
2034 
2035   // Reserve stack space to move the linkage area to in case of a tail call.
2036   int TCSPDelta = 0;
2037   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2038       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2039     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2040   }
2041 
2042   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2043   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2044   // object at the offset of the CR-save slot in the linkage area. The actual
2045   // save and restore of the condition register will be created as part of the
2046   // prologue and epilogue insertion, but the FixedStack object is needed to
2047   // keep the CalleSavedInfo valid.
2048   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2049        SavedRegs.test(PPC::CR4))) {
2050     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2051     const int64_t SpillOffset =
2052         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2053     int FrameIdx =
2054         MFI.CreateFixedObject(SpillSize, SpillOffset,
2055                               /* IsImmutable */ true, /* IsAliased */ false);
2056     FI->setCRSpillFrameIndex(FrameIdx);
2057   }
2058 }
2059 
2060 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2061                                                        RegScavenger *RS) const {
2062   // Get callee saved register information.
2063   MachineFrameInfo &MFI = MF.getFrameInfo();
2064   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2065 
2066   // If the function is shrink-wrapped, and if the function has a tail call, the
2067   // tail call might not be in the new RestoreBlock, so real branch instruction
2068   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2069   // RestoreBlock. So we handle this case here.
2070   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2071     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2072     for (MachineBasicBlock &MBB : MF) {
2073       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2074         createTailCallBranchInstr(MBB);
2075     }
2076   }
2077 
2078   // Early exit if no callee saved registers are modified!
2079   if (CSI.empty() && !needsFP(MF)) {
2080     addScavengingSpillSlot(MF, RS);
2081     return;
2082   }
2083 
2084   unsigned MinGPR = PPC::R31;
2085   unsigned MinG8R = PPC::X31;
2086   unsigned MinFPR = PPC::F31;
2087   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2088 
2089   bool HasGPSaveArea = false;
2090   bool HasG8SaveArea = false;
2091   bool HasFPSaveArea = false;
2092   bool HasVRSaveArea = false;
2093 
2094   SmallVector<CalleeSavedInfo, 18> GPRegs;
2095   SmallVector<CalleeSavedInfo, 18> G8Regs;
2096   SmallVector<CalleeSavedInfo, 18> FPRegs;
2097   SmallVector<CalleeSavedInfo, 18> VRegs;
2098 
2099   for (const CalleeSavedInfo &I : CSI) {
2100     Register Reg = I.getReg();
2101     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2102             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2103            "Not expecting to try to spill R2 in a function that must save TOC");
2104     if (PPC::GPRCRegClass.contains(Reg)) {
2105       HasGPSaveArea = true;
2106 
2107       GPRegs.push_back(I);
2108 
2109       if (Reg < MinGPR) {
2110         MinGPR = Reg;
2111       }
2112     } else if (PPC::G8RCRegClass.contains(Reg)) {
2113       HasG8SaveArea = true;
2114 
2115       G8Regs.push_back(I);
2116 
2117       if (Reg < MinG8R) {
2118         MinG8R = Reg;
2119       }
2120     } else if (PPC::F8RCRegClass.contains(Reg)) {
2121       HasFPSaveArea = true;
2122 
2123       FPRegs.push_back(I);
2124 
2125       if (Reg < MinFPR) {
2126         MinFPR = Reg;
2127       }
2128     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2129                PPC::CRRCRegClass.contains(Reg)) {
2130       ; // do nothing, as we already know whether CRs are spilled
2131     } else if (PPC::VRRCRegClass.contains(Reg) ||
2132                PPC::SPERCRegClass.contains(Reg)) {
2133       // Altivec and SPE are mutually exclusive, but have the same stack
2134       // alignment requirements, so overload the save area for both cases.
2135       HasVRSaveArea = true;
2136 
2137       VRegs.push_back(I);
2138 
2139       if (Reg < MinVR) {
2140         MinVR = Reg;
2141       }
2142     } else {
2143       llvm_unreachable("Unknown RegisterClass!");
2144     }
2145   }
2146 
2147   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2148   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2149 
2150   int64_t LowerBound = 0;
2151 
2152   // Take into account stack space reserved for tail calls.
2153   int TCSPDelta = 0;
2154   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2155       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2156     LowerBound = TCSPDelta;
2157   }
2158 
2159   // The Floating-point register save area is right below the back chain word
2160   // of the previous stack frame.
2161   if (HasFPSaveArea) {
2162     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2163       int FI = FPRegs[i].getFrameIdx();
2164 
2165       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2166     }
2167 
2168     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2169   }
2170 
2171   // Check whether the frame pointer register is allocated. If so, make sure it
2172   // is spilled to the correct offset.
2173   if (needsFP(MF)) {
2174     int FI = PFI->getFramePointerSaveIndex();
2175     assert(FI && "No Frame Pointer Save Slot!");
2176     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2177     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2178     HasGPSaveArea = true;
2179   }
2180 
2181   if (PFI->usesPICBase()) {
2182     int FI = PFI->getPICBasePointerSaveIndex();
2183     assert(FI && "No PIC Base Pointer Save Slot!");
2184     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2185 
2186     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2187     HasGPSaveArea = true;
2188   }
2189 
2190   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2191   if (RegInfo->hasBasePointer(MF)) {
2192     int FI = PFI->getBasePointerSaveIndex();
2193     assert(FI && "No Base Pointer Save Slot!");
2194     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2195 
2196     Register BP = RegInfo->getBaseRegister(MF);
2197     if (PPC::G8RCRegClass.contains(BP)) {
2198       MinG8R = std::min<unsigned>(MinG8R, BP);
2199       HasG8SaveArea = true;
2200     } else if (PPC::GPRCRegClass.contains(BP)) {
2201       MinGPR = std::min<unsigned>(MinGPR, BP);
2202       HasGPSaveArea = true;
2203     }
2204   }
2205 
2206   // General register save area starts right below the Floating-point
2207   // register save area.
2208   if (HasGPSaveArea || HasG8SaveArea) {
2209     // Move general register save area spill slots down, taking into account
2210     // the size of the Floating-point register save area.
2211     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2212       if (!GPRegs[i].isSpilledToReg()) {
2213         int FI = GPRegs[i].getFrameIdx();
2214         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2215       }
2216     }
2217 
2218     // Move general register save area spill slots down, taking into account
2219     // the size of the Floating-point register save area.
2220     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2221       if (!G8Regs[i].isSpilledToReg()) {
2222         int FI = G8Regs[i].getFrameIdx();
2223         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2224       }
2225     }
2226 
2227     unsigned MinReg =
2228       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2229                          TRI->getEncodingValue(MinG8R));
2230 
2231     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2232     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2233   }
2234 
2235   // For 32-bit only, the CR save area is below the general register
2236   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2237   // to the stack pointer and hence does not need an adjustment here.
2238   // Only CR2 (the first nonvolatile spilled) has an associated frame
2239   // index so that we have a single uniform save area.
2240   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2241     // Adjust the frame index of the CR spill slot.
2242     for (const auto &CSInfo : CSI) {
2243       if (CSInfo.getReg() == PPC::CR2) {
2244         int FI = CSInfo.getFrameIdx();
2245         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2246         break;
2247       }
2248     }
2249 
2250     LowerBound -= 4; // The CR save area is always 4 bytes long.
2251   }
2252 
2253   // Both Altivec and SPE have the same alignment and padding requirements
2254   // within the stack frame.
2255   if (HasVRSaveArea) {
2256     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2257     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2258     // we are using negative number here (the stack grows downward). We should
2259     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2260     // is the alignment size ( n = 16 here) and y is the size after aligning.
2261     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2262     LowerBound &= ~(15);
2263 
2264     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2265       int FI = VRegs[i].getFrameIdx();
2266 
2267       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2268     }
2269   }
2270 
2271   addScavengingSpillSlot(MF, RS);
2272 }
2273 
2274 void
2275 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2276                                          RegScavenger *RS) const {
2277   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2278   // a large stack, which will require scavenging a register to materialize a
2279   // large offset.
2280 
2281   // We need to have a scavenger spill slot for spills if the frame size is
2282   // large. In case there is no free register for large-offset addressing,
2283   // this slot is used for the necessary emergency spill. Also, we need the
2284   // slot for dynamic stack allocations.
2285 
2286   // The scavenger might be invoked if the frame offset does not fit into
2287   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2288   // We don't know the complete frame size here because we've not yet computed
2289   // callee-saved register spills or the needed alignment padding.
2290   unsigned StackSize = determineFrameLayout(MF, true);
2291   MachineFrameInfo &MFI = MF.getFrameInfo();
2292   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2293 
2294   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2295       (hasSpills(MF) && NeedSpills)) {
2296     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2297     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2298     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2299     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2300     unsigned Size = TRI.getSpillSize(RC);
2301     Align Alignment = TRI.getSpillAlign(RC);
2302     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2303 
2304     // Might we have over-aligned allocas?
2305     bool HasAlVars =
2306         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2307 
2308     // These kinds of spills might need two registers.
2309     if (spillsCR(MF) || HasAlVars)
2310       RS->addScavengingFrameIndex(
2311           MFI.CreateStackObject(Size, Alignment, false));
2312   }
2313 }
2314 
2315 // This function checks if a callee saved gpr can be spilled to a volatile
2316 // vector register. This occurs for leaf functions when the option
2317 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2318 // which were not spilled to vectors, return false so the target independent
2319 // code can handle them by assigning a FrameIdx to a stack slot.
2320 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2321     MachineFunction &MF, const TargetRegisterInfo *TRI,
2322     std::vector<CalleeSavedInfo> &CSI) const {
2323 
2324   if (CSI.empty())
2325     return true; // Early exit if no callee saved registers are modified!
2326 
2327   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2328   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2329   const MachineRegisterInfo &MRI = MF.getRegInfo();
2330 
2331   if (Subtarget.hasSPE()) {
2332     // In case of SPE we only have SuperRegs and CRs
2333     // in our CalleSaveInfo vector.
2334 
2335     for (auto &CalleeSaveReg : CSI) {
2336       MCPhysReg Reg = CalleeSaveReg.getReg();
2337       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2338       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2339 
2340       if ( // Check only for SuperRegs.
2341           Lower &&
2342           // Replace Reg if only lower-32 bits modified
2343           !MRI.isPhysRegModified(Higher))
2344         CalleeSaveReg = CalleeSavedInfo(Lower);
2345     }
2346   }
2347 
2348   // Early exit if cannot spill gprs to volatile vector registers.
2349   MachineFrameInfo &MFI = MF.getFrameInfo();
2350   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2351     return false;
2352 
2353   // Build a BitVector of VSRs that can be used for spilling GPRs.
2354   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2355   BitVector BVCalleeSaved(TRI->getNumRegs());
2356   for (unsigned i = 0; CSRegs[i]; ++i)
2357     BVCalleeSaved.set(CSRegs[i]);
2358 
2359   for (unsigned Reg : BVAllocatable.set_bits()) {
2360     // Set to 0 if the register is not a volatile VSX register, or if it is
2361     // used in the function.
2362     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2363         MRI.isPhysRegUsed(Reg))
2364       BVAllocatable.reset(Reg);
2365   }
2366 
2367   bool AllSpilledToReg = true;
2368   unsigned LastVSRUsedForSpill = 0;
2369   for (auto &CS : CSI) {
2370     if (BVAllocatable.none())
2371       return false;
2372 
2373     Register Reg = CS.getReg();
2374 
2375     if (!PPC::G8RCRegClass.contains(Reg)) {
2376       AllSpilledToReg = false;
2377       continue;
2378     }
2379 
2380     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2381     // into one VSR using the mtvsrdd instruction.
2382     if (LastVSRUsedForSpill != 0) {
2383       CS.setDstReg(LastVSRUsedForSpill);
2384       BVAllocatable.reset(LastVSRUsedForSpill);
2385       LastVSRUsedForSpill = 0;
2386       continue;
2387     }
2388 
2389     unsigned VolatileVFReg = BVAllocatable.find_first();
2390     if (VolatileVFReg < BVAllocatable.size()) {
2391       CS.setDstReg(VolatileVFReg);
2392       LastVSRUsedForSpill = VolatileVFReg;
2393     } else {
2394       AllSpilledToReg = false;
2395     }
2396   }
2397   return AllSpilledToReg;
2398 }
2399 
2400 bool PPCFrameLowering::spillCalleeSavedRegisters(
2401     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2402     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2403 
2404   MachineFunction *MF = MBB.getParent();
2405   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2406   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2407   bool MustSaveTOC = FI->mustSaveTOC();
2408   DebugLoc DL;
2409   bool CRSpilled = false;
2410   MachineInstrBuilder CRMIB;
2411   BitVector Spilled(TRI->getNumRegs());
2412 
2413   VSRContainingGPRs.clear();
2414 
2415   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2416   // or two GPRs, so we need table to record information for later save/restore.
2417   for (const CalleeSavedInfo &Info : CSI) {
2418     if (Info.isSpilledToReg()) {
2419       auto &SpilledVSR =
2420           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2421       assert(SpilledVSR.second == 0 &&
2422              "Can't spill more than two GPRs into VSR!");
2423       if (SpilledVSR.first == 0)
2424         SpilledVSR.first = Info.getReg();
2425       else
2426         SpilledVSR.second = Info.getReg();
2427     }
2428   }
2429 
2430   for (const CalleeSavedInfo &I : CSI) {
2431     Register Reg = I.getReg();
2432 
2433     // CR2 through CR4 are the nonvolatile CR fields.
2434     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2435 
2436     // Add the callee-saved register as live-in; it's killed at the spill.
2437     // Do not do this for callee-saved registers that are live-in to the
2438     // function because they will already be marked live-in and this will be
2439     // adding it for a second time. It is an error to add the same register
2440     // to the set more than once.
2441     const MachineRegisterInfo &MRI = MF->getRegInfo();
2442     bool IsLiveIn = MRI.isLiveIn(Reg);
2443     if (!IsLiveIn)
2444        MBB.addLiveIn(Reg);
2445 
2446     if (CRSpilled && IsCRField) {
2447       CRMIB.addReg(Reg, RegState::ImplicitKill);
2448       continue;
2449     }
2450 
2451     // The actual spill will happen in the prologue.
2452     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2453       continue;
2454 
2455     // Insert the spill to the stack frame.
2456     if (IsCRField) {
2457       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2458       if (!Subtarget.is32BitELFABI()) {
2459         // The actual spill will happen at the start of the prologue.
2460         FuncInfo->addMustSaveCR(Reg);
2461       } else {
2462         CRSpilled = true;
2463         FuncInfo->setSpillsCR();
2464 
2465         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2466         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2467         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2468                   .addReg(Reg, RegState::ImplicitKill);
2469 
2470         MBB.insert(MI, CRMIB);
2471         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2472                                          .addReg(PPC::R12,
2473                                                  getKillRegState(true)),
2474                                          I.getFrameIdx()));
2475       }
2476     } else {
2477       if (I.isSpilledToReg()) {
2478         unsigned Dst = I.getDstReg();
2479 
2480         if (Spilled[Dst])
2481           continue;
2482 
2483         if (VSRContainingGPRs[Dst].second != 0) {
2484           assert(Subtarget.hasP9Vector() &&
2485                  "mtvsrdd is unavailable on pre-P9 targets.");
2486 
2487           NumPESpillVSR += 2;
2488           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2489               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2490               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2491         } else if (VSRContainingGPRs[Dst].second == 0) {
2492           assert(Subtarget.hasP8Vector() &&
2493                  "Can't move GPR to VSR on pre-P8 targets.");
2494 
2495           ++NumPESpillVSR;
2496           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2497                   TRI->getSubReg(Dst, PPC::sub_64))
2498               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2499         } else {
2500           llvm_unreachable("More than two GPRs spilled to a VSR!");
2501         }
2502         Spilled.set(Dst);
2503       } else {
2504         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2505         // Use !IsLiveIn for the kill flag.
2506         // We do not want to kill registers that are live in this function
2507         // before their use because they will become undefined registers.
2508         // Functions without NoUnwind need to preserve the order of elements in
2509         // saved vector registers.
2510         if (Subtarget.needsSwapsForVSXMemOps() &&
2511             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2512           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2513                                        I.getFrameIdx(), RC, TRI);
2514         else
2515           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2516                                   TRI, Register());
2517       }
2518     }
2519   }
2520   return true;
2521 }
2522 
2523 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2524                        bool CR4Spilled, MachineBasicBlock &MBB,
2525                        MachineBasicBlock::iterator MI,
2526                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2527 
2528   MachineFunction *MF = MBB.getParent();
2529   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2530   DebugLoc DL;
2531   unsigned MoveReg = PPC::R12;
2532 
2533   // 32-bit:  FP-relative
2534   MBB.insert(MI,
2535              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2536                                CSI[CSIIndex].getFrameIdx()));
2537 
2538   unsigned RestoreOp = PPC::MTOCRF;
2539   if (CR2Spilled)
2540     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2541                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2542 
2543   if (CR3Spilled)
2544     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2545                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2546 
2547   if (CR4Spilled)
2548     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2549                .addReg(MoveReg, getKillRegState(true)));
2550 }
2551 
2552 MachineBasicBlock::iterator PPCFrameLowering::
2553 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2554                               MachineBasicBlock::iterator I) const {
2555   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2556   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2557       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2558     // Add (actually subtract) back the amount the callee popped on return.
2559     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2560       bool is64Bit = Subtarget.isPPC64();
2561       CalleeAmt *= -1;
2562       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2563       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2564       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2565       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2566       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2567       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2568       const DebugLoc &dl = I->getDebugLoc();
2569 
2570       if (isInt<16>(CalleeAmt)) {
2571         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2572           .addReg(StackReg, RegState::Kill)
2573           .addImm(CalleeAmt);
2574       } else {
2575         MachineBasicBlock::iterator MBBI = I;
2576         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2577           .addImm(CalleeAmt >> 16);
2578         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2579           .addReg(TmpReg, RegState::Kill)
2580           .addImm(CalleeAmt & 0xFFFF);
2581         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2582           .addReg(StackReg, RegState::Kill)
2583           .addReg(TmpReg);
2584       }
2585     }
2586   }
2587   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2588   return MBB.erase(I);
2589 }
2590 
2591 static bool isCalleeSavedCR(unsigned Reg) {
2592   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2593 }
2594 
2595 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2596     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2597     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2598   MachineFunction *MF = MBB.getParent();
2599   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2600   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2601   bool MustSaveTOC = FI->mustSaveTOC();
2602   bool CR2Spilled = false;
2603   bool CR3Spilled = false;
2604   bool CR4Spilled = false;
2605   unsigned CSIIndex = 0;
2606   BitVector Restored(TRI->getNumRegs());
2607 
2608   // Initialize insertion-point logic; we will be restoring in reverse
2609   // order of spill.
2610   MachineBasicBlock::iterator I = MI, BeforeI = I;
2611   bool AtStart = I == MBB.begin();
2612 
2613   if (!AtStart)
2614     --BeforeI;
2615 
2616   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2617     Register Reg = CSI[i].getReg();
2618 
2619     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2620       continue;
2621 
2622     // Restore of callee saved condition register field is handled during
2623     // epilogue insertion.
2624     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2625       continue;
2626 
2627     if (Reg == PPC::CR2) {
2628       CR2Spilled = true;
2629       // The spill slot is associated only with CR2, which is the
2630       // first nonvolatile spilled.  Save it here.
2631       CSIIndex = i;
2632       continue;
2633     } else if (Reg == PPC::CR3) {
2634       CR3Spilled = true;
2635       continue;
2636     } else if (Reg == PPC::CR4) {
2637       CR4Spilled = true;
2638       continue;
2639     } else {
2640       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2641       // least one CR register, restore all spilled CRs together.
2642       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2643         bool is31 = needsFP(*MF);
2644         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2645                    CSIIndex);
2646         CR2Spilled = CR3Spilled = CR4Spilled = false;
2647       }
2648 
2649       if (CSI[i].isSpilledToReg()) {
2650         DebugLoc DL;
2651         unsigned Dst = CSI[i].getDstReg();
2652 
2653         if (Restored[Dst])
2654           continue;
2655 
2656         if (VSRContainingGPRs[Dst].second != 0) {
2657           assert(Subtarget.hasP9Vector());
2658           NumPEReloadVSR += 2;
2659           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2660                   VSRContainingGPRs[Dst].second)
2661               .addReg(Dst);
2662           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2663                   VSRContainingGPRs[Dst].first)
2664               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2665         } else if (VSRContainingGPRs[Dst].second == 0) {
2666           assert(Subtarget.hasP8Vector());
2667           ++NumPEReloadVSR;
2668           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2669                   VSRContainingGPRs[Dst].first)
2670               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2671         } else {
2672           llvm_unreachable("More than two GPRs spilled to a VSR!");
2673         }
2674 
2675         Restored.set(Dst);
2676 
2677       } else {
2678        // Default behavior for non-CR saves.
2679         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2680 
2681         // Functions without NoUnwind need to preserve the order of elements in
2682         // saved vector registers.
2683         if (Subtarget.needsSwapsForVSXMemOps() &&
2684             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2685           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2686                                         TRI);
2687         else
2688           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2689                                    Register());
2690 
2691         assert(I != MBB.begin() &&
2692                "loadRegFromStackSlot didn't insert any code!");
2693       }
2694     }
2695 
2696     // Insert in reverse order.
2697     if (AtStart)
2698       I = MBB.begin();
2699     else {
2700       I = BeforeI;
2701       ++I;
2702     }
2703   }
2704 
2705   // If we haven't yet spilled the CRs, do so now.
2706   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2707     assert(Subtarget.is32BitELFABI() &&
2708            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2709     bool is31 = needsFP(*MF);
2710     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2711   }
2712 
2713   return true;
2714 }
2715 
2716 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2717   return TOCSaveOffset;
2718 }
2719 
2720 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2721   return FramePointerSaveOffset;
2722 }
2723 
2724 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2725   return BasePointerSaveOffset;
2726 }
2727 
2728 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2729   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2730     return false;
2731   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2732 }
2733 
2734 uint64_t PPCFrameLowering::getStackThreshold() const {
2735   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2736   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2737   // Scratch register contains a signed 64-bit number, which is negative
2738   // when extending the stack and is positive when releasing the stack frame.
2739   // To make `stux` and `add` paired, the absolute value of the number contained
2740   // in the scratch register should be the same. Thus the maximum stack size
2741   // is (2^63)-1, i.e., LONG_MAX.
2742   if (Subtarget.isPPC64())
2743     return LONG_MAX;
2744 
2745   return TargetFrameLowering::getStackThreshold();
2746 }
2747