xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision ff4636a4ab00b633c15eb3942c26126ceb2662e6)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF); // No special alignment.
321 
322   // Note: for PPC32 SVR4ABI, we can still generate stackless
323   // code if all local vars are reg-allocated.
324   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
325 
326   // Check whether we can skip adjusting the stack pointer (by using red zone)
327   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
328     // No need for frame
329     return 0;
330   }
331 
332   // Get the maximum call frame size of all the calls.
333   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
334 
335   // Maximum call frame needs to be at least big enough for linkage area.
336   unsigned minCallFrameSize = getLinkageSize();
337   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
338 
339   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
340   // that allocations will be aligned.
341   if (MFI.hasVarSizedObjects())
342     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
343 
344   // Update the new max call frame size if the caller passes in a valid pointer.
345   if (NewMaxCallFrameSize)
346     *NewMaxCallFrameSize = maxCallFrameSize;
347 
348   // Include call frame size in total.
349   FrameSize += maxCallFrameSize;
350 
351   // Make sure the frame is aligned.
352   FrameSize = alignTo(FrameSize, Alignment);
353 
354   return FrameSize;
355 }
356 
357 // hasFP - Return true if the specified function actually has a dedicated frame
358 // pointer register.
359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
360   const MachineFrameInfo &MFI = MF.getFrameInfo();
361   // FIXME: This is pretty much broken by design: hasFP() might be called really
362   // early, before the stack layout was calculated and thus hasFP() might return
363   // true or false here depending on the time of call.
364   return (MFI.getStackSize()) && needsFP(MF);
365 }
366 
367 // needsFP - Return true if the specified function should have a dedicated frame
368 // pointer register.  This is true if the function has variable sized allocas or
369 // if frame pointer elimination is disabled.
370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
371   const MachineFrameInfo &MFI = MF.getFrameInfo();
372 
373   // Naked functions have no stack frame pushed, so we don't have a frame
374   // pointer.
375   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
376     return false;
377 
378   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
379          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
380          MF.exposesReturnsTwice() ||
381          (MF.getTarget().Options.GuaranteedTailCallOpt &&
382           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
383 }
384 
385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
386   bool is31 = needsFP(MF);
387   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
388   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
389 
390   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
391   bool HasBP = RegInfo->hasBasePointer(MF);
392   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
393   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
394 
395   for (MachineBasicBlock &MBB : MF)
396     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
397       --MBBI;
398       for (MachineOperand &MO : MBBI->operands()) {
399         if (!MO.isReg())
400           continue;
401 
402         switch (MO.getReg()) {
403         case PPC::FP:
404           MO.setReg(FPReg);
405           break;
406         case PPC::FP8:
407           MO.setReg(FP8Reg);
408           break;
409         case PPC::BP:
410           MO.setReg(BPReg);
411           break;
412         case PPC::BP8:
413           MO.setReg(BP8Reg);
414           break;
415 
416         }
417       }
418     }
419 }
420 
421 /*  This function will do the following:
422     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
423       respectively (defaults recommended by the ABI) and return true
424     - If MBB is not an entry block, initialize the register scavenger and look
425       for available registers.
426     - If the defaults (R0/R12) are available, return true
427     - If TwoUniqueRegsRequired is set to true, it looks for two unique
428       registers. Otherwise, look for a single available register.
429       - If the required registers are found, set SR1 and SR2 and return true.
430       - If the required registers are not found, set SR2 or both SR1 and SR2 to
431         PPC::NoRegister and return false.
432 
433     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
434     is not set, this function will attempt to find two different registers, but
435     still return true if only one register is available (and set SR1 == SR2).
436 */
437 bool
438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
439                                       bool UseAtEnd,
440                                       bool TwoUniqueRegsRequired,
441                                       Register *SR1,
442                                       Register *SR2) const {
443   RegScavenger RS;
444   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
445   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
446 
447   // Set the defaults for the two scratch registers.
448   if (SR1)
449     *SR1 = R0;
450 
451   if (SR2) {
452     assert (SR1 && "Asking for the second scratch register but not the first?");
453     *SR2 = R12;
454   }
455 
456   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
457   if ((UseAtEnd && MBB->isReturnBlock()) ||
458       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
459     return true;
460 
461   if (UseAtEnd) {
462     // The scratch register will be used before the first terminator (or at the
463     // end of the block if there are no terminators).
464     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
465     if (MBBI == MBB->begin()) {
466       RS.enterBasicBlock(*MBB);
467     } else {
468       RS.enterBasicBlockEnd(*MBB);
469       RS.backward(MBBI);
470     }
471   } else {
472     // The scratch register will be used at the start of the block.
473     RS.enterBasicBlock(*MBB);
474   }
475 
476   // If the two registers are available, we're all good.
477   // Note that we only return here if both R0 and R12 are available because
478   // although the function may not require two unique registers, it may benefit
479   // from having two so we should try to provide them.
480   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
481     return true;
482 
483   // Get the list of callee-saved registers for the target.
484   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
485   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
486 
487   // Get all the available registers in the block.
488   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
489                                      &PPC::GPRCRegClass);
490 
491   // We shouldn't use callee-saved registers as scratch registers as they may be
492   // available when looking for a candidate block for shrink wrapping but not
493   // available when the actual prologue/epilogue is being emitted because they
494   // were added as live-in to the prologue block by PrologueEpilogueInserter.
495   for (int i = 0; CSRegs[i]; ++i)
496     BV.reset(CSRegs[i]);
497 
498   // Set the first scratch register to the first available one.
499   if (SR1) {
500     int FirstScratchReg = BV.find_first();
501     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
502   }
503 
504   // If there is another one available, set the second scratch register to that.
505   // Otherwise, set it to either PPC::NoRegister if this function requires two
506   // or to whatever SR1 is set to if this function doesn't require two.
507   if (SR2) {
508     int SecondScratchReg = BV.find_next(*SR1);
509     if (SecondScratchReg != -1)
510       *SR2 = SecondScratchReg;
511     else
512       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
513   }
514 
515   // Now that we've done our best to provide both registers, double check
516   // whether we were unable to provide enough.
517   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
518     return false;
519 
520   return true;
521 }
522 
523 // We need a scratch register for spilling LR and for spilling CR. By default,
524 // we use two scratch registers to hide latency. However, if only one scratch
525 // register is available, we can adjust for that by not overlapping the spill
526 // code. However, if we need to realign the stack (i.e. have a base pointer)
527 // and the stack frame is large, we need two scratch registers.
528 // Also, stack probe requires two scratch registers, one for old sp, one for
529 // large frame and large probe size.
530 bool
531 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
532   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
533   MachineFunction &MF = *(MBB->getParent());
534   bool HasBP = RegInfo->hasBasePointer(MF);
535   unsigned FrameSize = determineFrameLayout(MF);
536   int NegFrameSize = -FrameSize;
537   bool IsLargeFrame = !isInt<16>(NegFrameSize);
538   MachineFrameInfo &MFI = MF.getFrameInfo();
539   Align MaxAlign = MFI.getMaxAlign();
540   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
541   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
542 
543   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
544          TLI.hasInlineStackProbe(MF);
545 }
546 
547 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
548   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
549 
550   return findScratchRegister(TmpMBB, false,
551                              twoUniqueScratchRegsRequired(TmpMBB));
552 }
553 
554 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
555   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
556 
557   return findScratchRegister(TmpMBB, true);
558 }
559 
560 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
561   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
562   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
563 
564   // Abort if there is no register info or function info.
565   if (!RegInfo || !FI)
566     return false;
567 
568   // Only move the stack update on ELFv2 ABI and PPC64.
569   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
570     return false;
571 
572   // Check the frame size first and return false if it does not fit the
573   // requirements.
574   // We need a non-zero frame size as well as a frame that will fit in the red
575   // zone. This is because by moving the stack pointer update we are now storing
576   // to the red zone until the stack pointer is updated. If we get an interrupt
577   // inside the prologue but before the stack update we now have a number of
578   // stores to the red zone and those stores must all fit.
579   MachineFrameInfo &MFI = MF.getFrameInfo();
580   unsigned FrameSize = MFI.getStackSize();
581   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
582     return false;
583 
584   // Frame pointers and base pointers complicate matters so don't do anything
585   // if we have them. For example having a frame pointer will sometimes require
586   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
587   // difficult. Similar situation exists with setjmp.
588   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
589     return false;
590 
591   // Calls to fast_cc functions use different rules for passing parameters on
592   // the stack from the ABI and using PIC base in the function imposes
593   // similar restrictions to using the base pointer. It is not generally safe
594   // to move the stack pointer update in these situations.
595   if (FI->hasFastCall() || FI->usesPICBase())
596     return false;
597 
598   // Finally we can move the stack update if we do not require register
599   // scavenging. Register scavenging can introduce more spills and so
600   // may make the frame size larger than we have computed.
601   return !RegInfo->requiresFrameIndexScavenging(MF);
602 }
603 
604 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
605                                     MachineBasicBlock &MBB) const {
606   MachineBasicBlock::iterator MBBI = MBB.begin();
607   MachineFrameInfo &MFI = MF.getFrameInfo();
608   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
609   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
610   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
611 
612   MachineModuleInfo &MMI = MF.getMMI();
613   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
614   DebugLoc dl;
615   // AIX assembler does not support cfi directives.
616   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
617 
618   const bool HasFastMFLR = Subtarget.hasFastMFLR();
619 
620   // Get processor type.
621   bool isPPC64 = Subtarget.isPPC64();
622   // Get the ABI.
623   bool isSVR4ABI = Subtarget.isSVR4ABI();
624   bool isELFv2ABI = Subtarget.isELFv2ABI();
625   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
626 
627   // Work out frame sizes.
628   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
629   int64_t NegFrameSize = -FrameSize;
630   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
631     llvm_unreachable("Unhandled stack size!");
632 
633   if (MFI.isFrameAddressTaken())
634     replaceFPWithRealFP(MF);
635 
636   // Check if the link register (LR) must be saved.
637   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
638   bool MustSaveLR = FI->mustSaveLR();
639   bool MustSaveTOC = FI->mustSaveTOC();
640   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
641   bool MustSaveCR = !MustSaveCRs.empty();
642   // Do we have a frame pointer and/or base pointer for this function?
643   bool HasFP = hasFP(MF);
644   bool HasBP = RegInfo->hasBasePointer(MF);
645   bool HasRedZone = isPPC64 || !isSVR4ABI;
646   bool HasROPProtect = Subtarget.hasROPProtect();
647   bool HasPrivileged = Subtarget.hasPrivileged();
648 
649   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
650   Register BPReg = RegInfo->getBaseRegister(MF);
651   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
652   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
653   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
654   Register ScratchReg;
655   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
656   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
657   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
658                                                 : PPC::MFLR );
659   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
660                                                  : PPC::STW );
661   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
662                                                      : PPC::STWU );
663   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
664                                                         : PPC::STWUX);
665   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
666                                               : PPC::OR );
667   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
668                                                             : PPC::SUBFC);
669   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
670                                                                : PPC::SUBFIC);
671   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
672                                                            : PPC::MFCR);
673   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
674   const MCInstrDesc &HashST =
675       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
676                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
677 
678   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
679   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
680   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
681   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
682   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
683          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
684 
685   // Using the same bool variable as below to suppress compiler warnings.
686   bool SingleScratchReg = findScratchRegister(
687       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
688   assert(SingleScratchReg &&
689          "Required number of registers not available in this block");
690 
691   SingleScratchReg = ScratchReg == TempReg;
692 
693   int64_t LROffset = getReturnSaveOffset();
694 
695   int64_t FPOffset = 0;
696   if (HasFP) {
697     MachineFrameInfo &MFI = MF.getFrameInfo();
698     int FPIndex = FI->getFramePointerSaveIndex();
699     assert(FPIndex && "No Frame Pointer Save Slot!");
700     FPOffset = MFI.getObjectOffset(FPIndex);
701   }
702 
703   int64_t BPOffset = 0;
704   if (HasBP) {
705     MachineFrameInfo &MFI = MF.getFrameInfo();
706     int BPIndex = FI->getBasePointerSaveIndex();
707     assert(BPIndex && "No Base Pointer Save Slot!");
708     BPOffset = MFI.getObjectOffset(BPIndex);
709   }
710 
711   int64_t PBPOffset = 0;
712   if (FI->usesPICBase()) {
713     MachineFrameInfo &MFI = MF.getFrameInfo();
714     int PBPIndex = FI->getPICBasePointerSaveIndex();
715     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
716     PBPOffset = MFI.getObjectOffset(PBPIndex);
717   }
718 
719   // Get stack alignments.
720   Align MaxAlign = MFI.getMaxAlign();
721   if (HasBP && MaxAlign > 1)
722     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
723 
724   // Frames of 32KB & larger require special handling because they cannot be
725   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
726   bool isLargeFrame = !isInt<16>(NegFrameSize);
727 
728   // Check if we can move the stack update instruction (stdu) down the prologue
729   // past the callee saves. Hopefully this will avoid the situation where the
730   // saves are waiting for the update on the store with update to complete.
731   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
732   bool MovingStackUpdateDown = false;
733 
734   // Check if we can move the stack update.
735   if (stackUpdateCanBeMoved(MF)) {
736     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
737     for (CalleeSavedInfo CSI : Info) {
738       // If the callee saved register is spilled to a register instead of the
739       // stack then the spill no longer uses the stack pointer.
740       // This can lead to two consequences:
741       // 1) We no longer need to update the stack because the function does not
742       //    spill any callee saved registers to stack.
743       // 2) We have a situation where we still have to update the stack pointer
744       //    even though some registers are spilled to other registers. In
745       //    this case the current code moves the stack update to an incorrect
746       //    position.
747       // In either case we should abort moving the stack update operation.
748       if (CSI.isSpilledToReg()) {
749         StackUpdateLoc = MBBI;
750         MovingStackUpdateDown = false;
751         break;
752       }
753 
754       int FrIdx = CSI.getFrameIdx();
755       // If the frame index is not negative the callee saved info belongs to a
756       // stack object that is not a fixed stack object. We ignore non-fixed
757       // stack objects because we won't move the stack update pointer past them.
758       if (FrIdx >= 0)
759         continue;
760 
761       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
762         StackUpdateLoc++;
763         MovingStackUpdateDown = true;
764       } else {
765         // We need all of the Frame Indices to meet these conditions.
766         // If they do not, abort the whole operation.
767         StackUpdateLoc = MBBI;
768         MovingStackUpdateDown = false;
769         break;
770       }
771     }
772 
773     // If the operation was not aborted then update the object offset.
774     if (MovingStackUpdateDown) {
775       for (CalleeSavedInfo CSI : Info) {
776         int FrIdx = CSI.getFrameIdx();
777         if (FrIdx < 0)
778           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
779       }
780     }
781   }
782 
783   // Where in the prologue we move the CR fields depends on how many scratch
784   // registers we have, and if we need to save the link register or not. This
785   // lambda is to avoid duplicating the logic in 2 places.
786   auto BuildMoveFromCR = [&]() {
787     if (isELFv2ABI && MustSaveCRs.size() == 1) {
788     // In the ELFv2 ABI, we are not required to save all CR fields.
789     // If only one CR field is clobbered, it is more efficient to use
790     // mfocrf to selectively save just that field, because mfocrf has short
791     // latency compares to mfcr.
792       assert(isPPC64 && "V2 ABI is 64-bit only.");
793       MachineInstrBuilder MIB =
794           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
795       MIB.addReg(MustSaveCRs[0], RegState::Kill);
796     } else {
797       MachineInstrBuilder MIB =
798           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
799       for (unsigned CRfield : MustSaveCRs)
800         MIB.addReg(CRfield, RegState::ImplicitKill);
801     }
802   };
803 
804   // If we need to spill the CR and the LR but we don't have two separate
805   // registers available, we must spill them one at a time
806   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
807     BuildMoveFromCR();
808     BuildMI(MBB, MBBI, dl, StoreWordInst)
809         .addReg(TempReg, getKillRegState(true))
810         .addImm(CRSaveOffset)
811         .addReg(SPReg);
812   }
813 
814   if (MustSaveLR)
815     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
816 
817   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
818     BuildMoveFromCR();
819 
820   if (HasRedZone) {
821     if (HasFP)
822       BuildMI(MBB, MBBI, dl, StoreInst)
823         .addReg(FPReg)
824         .addImm(FPOffset)
825         .addReg(SPReg);
826     if (FI->usesPICBase())
827       BuildMI(MBB, MBBI, dl, StoreInst)
828         .addReg(PPC::R30)
829         .addImm(PBPOffset)
830         .addReg(SPReg);
831     if (HasBP)
832       BuildMI(MBB, MBBI, dl, StoreInst)
833         .addReg(BPReg)
834         .addImm(BPOffset)
835         .addReg(SPReg);
836   }
837 
838   // Generate the instruction to store the LR. In the case where ROP protection
839   // is required the register holding the LR should not be killed as it will be
840   // used by the hash store instruction.
841   auto SaveLR = [&](int64_t Offset) {
842     assert(MustSaveLR && "LR is not required to be saved!");
843     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
844         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
845         .addImm(Offset)
846         .addReg(SPReg);
847 
848     // Add the ROP protection Hash Store instruction.
849     // NOTE: This is technically a violation of the ABI. The hash can be saved
850     // up to 512 bytes into the Protected Zone. This can be outside of the
851     // initial 288 byte volatile program storage region in the Protected Zone.
852     // However, this restriction will be removed in an upcoming revision of the
853     // ABI.
854     if (HasROPProtect) {
855       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
856       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
857       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
858              "ROP hash save offset out of range.");
859       assert(((ImmOffset & 0x7) == 0) &&
860              "ROP hash save offset must be 8 byte aligned.");
861       BuildMI(MBB, StackUpdateLoc, dl, HashST)
862           .addReg(ScratchReg, getKillRegState(true))
863           .addImm(ImmOffset)
864           .addReg(SPReg);
865     }
866   };
867 
868   if (MustSaveLR && HasFastMFLR)
869       SaveLR(LROffset);
870 
871   if (MustSaveCR &&
872       !(SingleScratchReg && MustSaveLR)) {
873     assert(HasRedZone && "A red zone is always available on PPC64");
874     BuildMI(MBB, MBBI, dl, StoreWordInst)
875       .addReg(TempReg, getKillRegState(true))
876       .addImm(CRSaveOffset)
877       .addReg(SPReg);
878   }
879 
880   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
881   if (!FrameSize) {
882     if (MustSaveLR && !HasFastMFLR)
883       SaveLR(LROffset);
884     return;
885   }
886 
887   // Adjust stack pointer: r1 += NegFrameSize.
888   // If there is a preferred stack alignment, align R1 now
889 
890   if (HasBP && HasRedZone) {
891     // Save a copy of r1 as the base pointer.
892     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
893       .addReg(SPReg)
894       .addReg(SPReg);
895   }
896 
897   // Have we generated a STUX instruction to claim stack frame? If so,
898   // the negated frame size will be placed in ScratchReg.
899   bool HasSTUX =
900       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
901       (HasBP && MaxAlign > 1) || isLargeFrame;
902 
903   // If we use STUX to update the stack pointer, we need the two scratch
904   // registers TempReg and ScratchReg, we have to save LR here which is stored
905   // in ScratchReg.
906   // If the offset can not be encoded into the store instruction, we also have
907   // to save LR here.
908   if (MustSaveLR && !HasFastMFLR &&
909       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
910     SaveLR(LROffset);
911 
912   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
913   // pointer is always stored at SP, we will get a free probe due to an essential
914   // STU(X) instruction.
915   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
916     // To be consistent with other targets, a pseudo instruction is emitted and
917     // will be later expanded in `inlineStackProbe`.
918     BuildMI(MBB, MBBI, dl,
919             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
920                             : PPC::PROBED_STACKALLOC_32))
921         .addDef(TempReg)
922         .addDef(ScratchReg) // ScratchReg stores the old sp.
923         .addImm(NegFrameSize);
924     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
925     // update the ScratchReg to meet the assumption that ScratchReg contains
926     // the NegFrameSize. This solution is rather tricky.
927     if (!HasRedZone) {
928       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
929           .addReg(ScratchReg)
930           .addReg(SPReg);
931     }
932   } else {
933     // This condition must be kept in sync with canUseAsPrologue.
934     if (HasBP && MaxAlign > 1) {
935       if (isPPC64)
936         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
937             .addReg(SPReg)
938             .addImm(0)
939             .addImm(64 - Log2(MaxAlign));
940       else // PPC32...
941         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
942             .addReg(SPReg)
943             .addImm(0)
944             .addImm(32 - Log2(MaxAlign))
945             .addImm(31);
946       if (!isLargeFrame) {
947         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
948             .addReg(ScratchReg, RegState::Kill)
949             .addImm(NegFrameSize);
950       } else {
951         assert(!SingleScratchReg && "Only a single scratch reg available");
952         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
953         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
954             .addReg(ScratchReg, RegState::Kill)
955             .addReg(TempReg, RegState::Kill);
956       }
957 
958       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
959           .addReg(SPReg, RegState::Kill)
960           .addReg(SPReg)
961           .addReg(ScratchReg);
962     } else if (!isLargeFrame) {
963       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
964           .addReg(SPReg)
965           .addImm(NegFrameSize)
966           .addReg(SPReg);
967     } else {
968       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
969       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
970           .addReg(SPReg, RegState::Kill)
971           .addReg(SPReg)
972           .addReg(ScratchReg);
973     }
974   }
975 
976   // Save the TOC register after the stack pointer update if a prologue TOC
977   // save is required for the function.
978   if (MustSaveTOC) {
979     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
980     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
981       .addReg(TOCReg, getKillRegState(true))
982       .addImm(TOCSaveOffset)
983       .addReg(SPReg);
984   }
985 
986   if (!HasRedZone) {
987     assert(!isPPC64 && "A red zone is always available on PPC64");
988     if (HasSTUX) {
989       // The negated frame size is in ScratchReg, and the SPReg has been
990       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
991       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
992       // the stack frame (i.e. the old SP), ideally, we would put the old
993       // SP into a register and use it as the base for the stores. The
994       // problem is that the only available register may be ScratchReg,
995       // which could be R0, and R0 cannot be used as a base address.
996 
997       // First, set ScratchReg to the old SP. This may need to be modified
998       // later.
999       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1000         .addReg(ScratchReg, RegState::Kill)
1001         .addReg(SPReg);
1002 
1003       if (ScratchReg == PPC::R0) {
1004         // R0 cannot be used as a base register, but it can be used as an
1005         // index in a store-indexed.
1006         int LastOffset = 0;
1007         if (HasFP)  {
1008           // R0 += (FPOffset-LastOffset).
1009           // Need addic, since addi treats R0 as 0.
1010           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1011             .addReg(ScratchReg)
1012             .addImm(FPOffset-LastOffset);
1013           LastOffset = FPOffset;
1014           // Store FP into *R0.
1015           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1016             .addReg(FPReg, RegState::Kill)  // Save FP.
1017             .addReg(PPC::ZERO)
1018             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1019         }
1020         if (FI->usesPICBase()) {
1021           // R0 += (PBPOffset-LastOffset).
1022           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1023             .addReg(ScratchReg)
1024             .addImm(PBPOffset-LastOffset);
1025           LastOffset = PBPOffset;
1026           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1027             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1028             .addReg(PPC::ZERO)
1029             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1030         }
1031         if (HasBP) {
1032           // R0 += (BPOffset-LastOffset).
1033           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1034             .addReg(ScratchReg)
1035             .addImm(BPOffset-LastOffset);
1036           LastOffset = BPOffset;
1037           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1038             .addReg(BPReg, RegState::Kill)  // Save BP.
1039             .addReg(PPC::ZERO)
1040             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1041           // BP = R0-LastOffset
1042           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1043             .addReg(ScratchReg, RegState::Kill)
1044             .addImm(-LastOffset);
1045         }
1046       } else {
1047         // ScratchReg is not R0, so use it as the base register. It is
1048         // already set to the old SP, so we can use the offsets directly.
1049 
1050         // Now that the stack frame has been allocated, save all the necessary
1051         // registers using ScratchReg as the base address.
1052         if (HasFP)
1053           BuildMI(MBB, MBBI, dl, StoreInst)
1054             .addReg(FPReg)
1055             .addImm(FPOffset)
1056             .addReg(ScratchReg);
1057         if (FI->usesPICBase())
1058           BuildMI(MBB, MBBI, dl, StoreInst)
1059             .addReg(PPC::R30)
1060             .addImm(PBPOffset)
1061             .addReg(ScratchReg);
1062         if (HasBP) {
1063           BuildMI(MBB, MBBI, dl, StoreInst)
1064             .addReg(BPReg)
1065             .addImm(BPOffset)
1066             .addReg(ScratchReg);
1067           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1068             .addReg(ScratchReg, RegState::Kill)
1069             .addReg(ScratchReg);
1070         }
1071       }
1072     } else {
1073       // The frame size is a known 16-bit constant (fitting in the immediate
1074       // field of STWU). To be here we have to be compiling for PPC32.
1075       // Since the SPReg has been decreased by FrameSize, add it back to each
1076       // offset.
1077       if (HasFP)
1078         BuildMI(MBB, MBBI, dl, StoreInst)
1079           .addReg(FPReg)
1080           .addImm(FrameSize + FPOffset)
1081           .addReg(SPReg);
1082       if (FI->usesPICBase())
1083         BuildMI(MBB, MBBI, dl, StoreInst)
1084           .addReg(PPC::R30)
1085           .addImm(FrameSize + PBPOffset)
1086           .addReg(SPReg);
1087       if (HasBP) {
1088         BuildMI(MBB, MBBI, dl, StoreInst)
1089           .addReg(BPReg)
1090           .addImm(FrameSize + BPOffset)
1091           .addReg(SPReg);
1092         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1093           .addReg(SPReg)
1094           .addImm(FrameSize);
1095       }
1096     }
1097   }
1098 
1099   // Save the LR now.
1100   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1101     SaveLR(LROffset + FrameSize);
1102 
1103   // Add Call Frame Information for the instructions we generated above.
1104   if (needsCFI) {
1105     unsigned CFIIndex;
1106 
1107     if (HasBP) {
1108       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1109       // because if the stack needed aligning then CFA won't be at a fixed
1110       // offset from FP/SP.
1111       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1112       CFIIndex = MF.addFrameInst(
1113           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1114     } else {
1115       // Adjust the definition of CFA to account for the change in SP.
1116       assert(NegFrameSize);
1117       CFIIndex = MF.addFrameInst(
1118           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1119     }
1120     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1121         .addCFIIndex(CFIIndex);
1122 
1123     if (HasFP) {
1124       // Describe where FP was saved, at a fixed offset from CFA.
1125       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1126       CFIIndex = MF.addFrameInst(
1127           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1128       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1129           .addCFIIndex(CFIIndex);
1130     }
1131 
1132     if (FI->usesPICBase()) {
1133       // Describe where FP was saved, at a fixed offset from CFA.
1134       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1135       CFIIndex = MF.addFrameInst(
1136           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1137       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1138           .addCFIIndex(CFIIndex);
1139     }
1140 
1141     if (HasBP) {
1142       // Describe where BP was saved, at a fixed offset from CFA.
1143       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1144       CFIIndex = MF.addFrameInst(
1145           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1146       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147           .addCFIIndex(CFIIndex);
1148     }
1149 
1150     if (MustSaveLR) {
1151       // Describe where LR was saved, at a fixed offset from CFA.
1152       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1153       CFIIndex = MF.addFrameInst(
1154           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1155       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1156           .addCFIIndex(CFIIndex);
1157     }
1158   }
1159 
1160   // If there is a frame pointer, copy R1 into R31
1161   if (HasFP) {
1162     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1163       .addReg(SPReg)
1164       .addReg(SPReg);
1165 
1166     if (!HasBP && needsCFI) {
1167       // Change the definition of CFA from SP+offset to FP+offset, because SP
1168       // will change at every alloca.
1169       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1170       unsigned CFIIndex = MF.addFrameInst(
1171           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1172 
1173       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1174           .addCFIIndex(CFIIndex);
1175     }
1176   }
1177 
1178   if (needsCFI) {
1179     // Describe where callee saved registers were saved, at fixed offsets from
1180     // CFA.
1181     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1182     for (const CalleeSavedInfo &I : CSI) {
1183       Register Reg = I.getReg();
1184       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1185 
1186       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1187       // subregisters of CR2. We just need to emit a move of CR2.
1188       if (PPC::CRBITRCRegClass.contains(Reg))
1189         continue;
1190 
1191       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1192         continue;
1193 
1194       // For SVR4, don't emit a move for the CR spill slot if we haven't
1195       // spilled CRs.
1196       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1197           && !MustSaveCR)
1198         continue;
1199 
1200       // For 64-bit SVR4 when we have spilled CRs, the spill location
1201       // is SP+8, not a frame-relative slot.
1202       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1203         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1204         // the whole CR word.  In the ELFv2 ABI, every CR that was
1205         // actually saved gets its own CFI record.
1206         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1207         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1208             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1209         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1210             .addCFIIndex(CFIIndex);
1211         continue;
1212       }
1213 
1214       if (I.isSpilledToReg()) {
1215         unsigned SpilledReg = I.getDstReg();
1216         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1217             nullptr, MRI->getDwarfRegNum(Reg, true),
1218             MRI->getDwarfRegNum(SpilledReg, true)));
1219         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1220           .addCFIIndex(CFIRegister);
1221       } else {
1222         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1223         // We have changed the object offset above but we do not want to change
1224         // the actual offsets in the CFI instruction so we have to undo the
1225         // offset change here.
1226         if (MovingStackUpdateDown)
1227           Offset -= NegFrameSize;
1228 
1229         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1230             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1231         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1232             .addCFIIndex(CFIIndex);
1233       }
1234     }
1235   }
1236 }
1237 
1238 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1239                                         MachineBasicBlock &PrologMBB) const {
1240   bool isPPC64 = Subtarget.isPPC64();
1241   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1242   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1243   MachineFrameInfo &MFI = MF.getFrameInfo();
1244   MachineModuleInfo &MMI = MF.getMMI();
1245   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1246   // AIX assembler does not support cfi directives.
1247   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1248   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1249     int Opc = MI.getOpcode();
1250     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1251   });
1252   if (StackAllocMIPos == PrologMBB.end())
1253     return;
1254   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1255   MachineBasicBlock *CurrentMBB = &PrologMBB;
1256   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1257   MachineInstr &MI = *StackAllocMIPos;
1258   int64_t NegFrameSize = MI.getOperand(2).getImm();
1259   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1260   int64_t NegProbeSize = -(int64_t)ProbeSize;
1261   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1262   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1263   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1264   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1265   Register ScratchReg = MI.getOperand(0).getReg();
1266   Register FPReg = MI.getOperand(1).getReg();
1267   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1268   bool HasBP = RegInfo->hasBasePointer(MF);
1269   Register BPReg = RegInfo->getBaseRegister(MF);
1270   Align MaxAlign = MFI.getMaxAlign();
1271   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1272   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1273   // Subroutines to generate .cfi_* directives.
1274   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1275                             MachineBasicBlock::iterator MBBI, Register Reg) {
1276     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1277     unsigned CFIIndex = MF.addFrameInst(
1278         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1279     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1280         .addCFIIndex(CFIIndex);
1281   };
1282   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1283                          MachineBasicBlock::iterator MBBI, Register Reg,
1284                          int Offset) {
1285     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1286     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1287         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1288     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1289         .addCFIIndex(CFIIndex);
1290   };
1291   // Subroutine to determine if we can use the Imm as part of d-form.
1292   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1293   // Subroutine to materialize the Imm into TempReg.
1294   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1295                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1296                             Register &TempReg) {
1297     assert(isInt<32>(Imm) && "Unhandled imm");
1298     if (isInt<16>(Imm))
1299       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1300           .addImm(Imm);
1301     else {
1302       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1303           .addImm(Imm >> 16);
1304       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1305           .addReg(TempReg)
1306           .addImm(Imm & 0xFFFF);
1307     }
1308   };
1309   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1310   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1311                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1312                               Register NegSizeReg, bool UseDForm,
1313                               Register StoreReg) {
1314     if (UseDForm)
1315       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1316           .addReg(StoreReg)
1317           .addImm(NegSize)
1318           .addReg(SPReg);
1319     else
1320       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1321           .addReg(StoreReg)
1322           .addReg(SPReg)
1323           .addReg(NegSizeReg);
1324   };
1325   // Used to probe stack when realignment is required.
1326   // Note that, according to ABI's requirement, *sp must always equals the
1327   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1328   // Following is pseudo code:
1329   // final_sp = (sp & align) + negframesize;
1330   // neg_gap = final_sp - sp;
1331   // while (neg_gap < negprobesize) {
1332   //   stdu fp, negprobesize(sp);
1333   //   neg_gap -= negprobesize;
1334   // }
1335   // stdux fp, sp, neg_gap
1336   //
1337   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1338   // before probe code, we don't need to save it, so we get one additional reg
1339   // that can be used to materialize the probeside if needed to use xform.
1340   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1341   // now.
1342   //
1343   // The allocations are:
1344   // if (HasBP && HasRedzone) {
1345   //   r0: materialize the probesize if needed so that we can use xform.
1346   //   r12: `neg_gap`
1347   // } else {
1348   //   r0: back-chain pointer
1349   //   r12: `neg_gap`.
1350   // }
1351   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1352                                  MachineBasicBlock::iterator MBBI,
1353                                  Register ScratchReg, Register TempReg) {
1354     assert(HasBP && "The function is supposed to have base pointer when its "
1355                     "stack is realigned.");
1356     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1357 
1358     // FIXME: We can eliminate this limitation if we get more infomation about
1359     // which part of redzone are already used. Used redzone can be treated
1360     // probed. But there might be `holes' in redzone probed, this could
1361     // complicate the implementation.
1362     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1363            "Probe size should be larger or equal to the size of red-zone so "
1364            "that red-zone is not clobbered by probing.");
1365 
1366     Register &FinalStackPtr = TempReg;
1367     // FIXME: We only support NegProbeSize materializable by DForm currently.
1368     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1369     // register.
1370     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1371     assert(isInt<16>(NegProbeSize) &&
1372            "NegProbeSize should be materializable by DForm");
1373     Register CRReg = PPC::CR0;
1374     // Layout of output assembly kinda like:
1375     // bb.0:
1376     //   ...
1377     //   sub $scratchreg, $finalsp, r1
1378     //   cmpdi $scratchreg, <negprobesize>
1379     //   bge bb.2
1380     // bb.1:
1381     //   stdu <backchain>, <negprobesize>(r1)
1382     //   sub $scratchreg, $scratchreg, negprobesize
1383     //   cmpdi $scratchreg, <negprobesize>
1384     //   blt bb.1
1385     // bb.2:
1386     //   stdux <backchain>, r1, $scratchreg
1387     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1388     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1389     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1390     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1391     MF.insert(MBBInsertPoint, ProbeExitMBB);
1392     // bb.2
1393     {
1394       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1395       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1396                        BackChainPointer);
1397       if (HasRedZone)
1398         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1399         // to TempReg to satisfy it.
1400         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1401             .addReg(BPReg)
1402             .addReg(BPReg);
1403       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1404       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1405     }
1406     // bb.0
1407     {
1408       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1409           .addReg(SPReg)
1410           .addReg(FinalStackPtr);
1411       if (!HasRedZone)
1412         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1413       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1414           .addReg(ScratchReg)
1415           .addImm(NegProbeSize);
1416       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1417           .addImm(PPC::PRED_GE)
1418           .addReg(CRReg)
1419           .addMBB(ProbeExitMBB);
1420       MBB.addSuccessor(ProbeLoopBodyMBB);
1421       MBB.addSuccessor(ProbeExitMBB);
1422     }
1423     // bb.1
1424     {
1425       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1426       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1427                        0, true /*UseDForm*/, BackChainPointer);
1428       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1429               ScratchReg)
1430           .addReg(ScratchReg)
1431           .addImm(-NegProbeSize);
1432       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1433               CRReg)
1434           .addReg(ScratchReg)
1435           .addImm(NegProbeSize);
1436       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1437           .addImm(PPC::PRED_LT)
1438           .addReg(CRReg)
1439           .addMBB(ProbeLoopBodyMBB);
1440       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1441       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1442     }
1443     // Update liveins.
1444     bool anyChange = false;
1445     do {
1446       anyChange = recomputeLiveIns(*ProbeExitMBB) ||
1447                   recomputeLiveIns(*ProbeLoopBodyMBB);
1448     } while (anyChange);
1449     return ProbeExitMBB;
1450   };
1451   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1452   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1453   // the offset subtracted from SP is determined by SP's runtime value.
1454   if (HasBP && MaxAlign > 1) {
1455     // Calculate final stack pointer.
1456     if (isPPC64)
1457       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1458           .addReg(SPReg)
1459           .addImm(0)
1460           .addImm(64 - Log2(MaxAlign));
1461     else
1462       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1463           .addReg(SPReg)
1464           .addImm(0)
1465           .addImm(32 - Log2(MaxAlign))
1466           .addImm(31);
1467     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1468             FPReg)
1469         .addReg(ScratchReg)
1470         .addReg(SPReg);
1471     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1472     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1473             FPReg)
1474         .addReg(ScratchReg)
1475         .addReg(FPReg);
1476     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1477     if (needsCFI)
1478       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1479   } else {
1480     // Initialize current frame pointer.
1481     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1482     // Use FPReg to calculate CFA.
1483     if (needsCFI)
1484       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1485     // Probe residual part.
1486     if (NegResidualSize) {
1487       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1488       if (!ResidualUseDForm)
1489         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1490       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1491                        ResidualUseDForm, FPReg);
1492     }
1493     bool UseDForm = CanUseDForm(NegProbeSize);
1494     // If number of blocks is small, just probe them directly.
1495     if (NumBlocks < 3) {
1496       if (!UseDForm)
1497         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1498       for (int i = 0; i < NumBlocks; ++i)
1499         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1500                          FPReg);
1501       if (needsCFI) {
1502         // Restore using SPReg to calculate CFA.
1503         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1504       }
1505     } else {
1506       // Since CTR is a volatile register and current shrinkwrap implementation
1507       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1508       // CTR loop to probe.
1509       // Calculate trip count and stores it in CTRReg.
1510       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1511       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1512           .addReg(ScratchReg, RegState::Kill);
1513       if (!UseDForm)
1514         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1515       // Create MBBs of the loop.
1516       MachineFunction::iterator MBBInsertPoint =
1517           std::next(CurrentMBB->getIterator());
1518       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1519       MF.insert(MBBInsertPoint, LoopMBB);
1520       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1521       MF.insert(MBBInsertPoint, ExitMBB);
1522       // Synthesize the loop body.
1523       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1524                        UseDForm, FPReg);
1525       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1526           .addMBB(LoopMBB);
1527       LoopMBB->addSuccessor(ExitMBB);
1528       LoopMBB->addSuccessor(LoopMBB);
1529       // Synthesize the exit MBB.
1530       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1531                       std::next(MachineBasicBlock::iterator(MI)),
1532                       CurrentMBB->end());
1533       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1534       CurrentMBB->addSuccessor(LoopMBB);
1535       if (needsCFI) {
1536         // Restore using SPReg to calculate CFA.
1537         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1538       }
1539       // Update liveins.
1540       bool anyChange = false;
1541       do {
1542         anyChange = recomputeLiveIns(*ExitMBB) || recomputeLiveIns(*LoopMBB);
1543       } while (anyChange);
1544     }
1545   }
1546   ++NumPrologProbed;
1547   MI.eraseFromParent();
1548 }
1549 
1550 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1551                                     MachineBasicBlock &MBB) const {
1552   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1553   DebugLoc dl;
1554 
1555   if (MBBI != MBB.end())
1556     dl = MBBI->getDebugLoc();
1557 
1558   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1559   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1560 
1561   // Get alignment info so we know how to restore the SP.
1562   const MachineFrameInfo &MFI = MF.getFrameInfo();
1563 
1564   // Get the number of bytes allocated from the FrameInfo.
1565   int64_t FrameSize = MFI.getStackSize();
1566 
1567   // Get processor type.
1568   bool isPPC64 = Subtarget.isPPC64();
1569 
1570   // Check if the link register (LR) has been saved.
1571   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1572   bool MustSaveLR = FI->mustSaveLR();
1573   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1574   bool MustSaveCR = !MustSaveCRs.empty();
1575   // Do we have a frame pointer and/or base pointer for this function?
1576   bool HasFP = hasFP(MF);
1577   bool HasBP = RegInfo->hasBasePointer(MF);
1578   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1579   bool HasROPProtect = Subtarget.hasROPProtect();
1580   bool HasPrivileged = Subtarget.hasPrivileged();
1581 
1582   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1583   Register BPReg = RegInfo->getBaseRegister(MF);
1584   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1585   Register ScratchReg;
1586   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1587   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1588                                                  : PPC::MTLR );
1589   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1590                                                  : PPC::LWZ );
1591   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1592                                                            : PPC::LIS );
1593   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1594                                               : PPC::OR );
1595   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1596                                                   : PPC::ORI );
1597   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1598                                                    : PPC::ADDI );
1599   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1600                                                 : PPC::ADD4 );
1601   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1602                                                      : PPC::LWZ);
1603   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1604                                                      : PPC::MTOCRF);
1605   const MCInstrDesc &HashChk =
1606       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1607                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1608   int64_t LROffset = getReturnSaveOffset();
1609 
1610   int64_t FPOffset = 0;
1611 
1612   // Using the same bool variable as below to suppress compiler warnings.
1613   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1614                                               &TempReg);
1615   assert(SingleScratchReg &&
1616          "Could not find an available scratch register");
1617 
1618   SingleScratchReg = ScratchReg == TempReg;
1619 
1620   if (HasFP) {
1621     int FPIndex = FI->getFramePointerSaveIndex();
1622     assert(FPIndex && "No Frame Pointer Save Slot!");
1623     FPOffset = MFI.getObjectOffset(FPIndex);
1624   }
1625 
1626   int64_t BPOffset = 0;
1627   if (HasBP) {
1628       int BPIndex = FI->getBasePointerSaveIndex();
1629       assert(BPIndex && "No Base Pointer Save Slot!");
1630       BPOffset = MFI.getObjectOffset(BPIndex);
1631   }
1632 
1633   int64_t PBPOffset = 0;
1634   if (FI->usesPICBase()) {
1635     int PBPIndex = FI->getPICBasePointerSaveIndex();
1636     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1637     PBPOffset = MFI.getObjectOffset(PBPIndex);
1638   }
1639 
1640   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1641 
1642   if (IsReturnBlock) {
1643     unsigned RetOpcode = MBBI->getOpcode();
1644     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1645                       RetOpcode == PPC::TCRETURNdi ||
1646                       RetOpcode == PPC::TCRETURNai ||
1647                       RetOpcode == PPC::TCRETURNri8 ||
1648                       RetOpcode == PPC::TCRETURNdi8 ||
1649                       RetOpcode == PPC::TCRETURNai8;
1650 
1651     if (UsesTCRet) {
1652       int MaxTCRetDelta = FI->getTailCallSPDelta();
1653       MachineOperand &StackAdjust = MBBI->getOperand(1);
1654       assert(StackAdjust.isImm() && "Expecting immediate value.");
1655       // Adjust stack pointer.
1656       int StackAdj = StackAdjust.getImm();
1657       int Delta = StackAdj - MaxTCRetDelta;
1658       assert((Delta >= 0) && "Delta must be positive");
1659       if (MaxTCRetDelta>0)
1660         FrameSize += (StackAdj +Delta);
1661       else
1662         FrameSize += StackAdj;
1663     }
1664   }
1665 
1666   // Frames of 32KB & larger require special handling because they cannot be
1667   // indexed into with a simple LD/LWZ immediate offset operand.
1668   bool isLargeFrame = !isInt<16>(FrameSize);
1669 
1670   // On targets without red zone, the SP needs to be restored last, so that
1671   // all live contents of the stack frame are upwards of the SP. This means
1672   // that we cannot restore SP just now, since there may be more registers
1673   // to restore from the stack frame (e.g. R31). If the frame size is not
1674   // a simple immediate value, we will need a spare register to hold the
1675   // restored SP. If the frame size is known and small, we can simply adjust
1676   // the offsets of the registers to be restored, and still use SP to restore
1677   // them. In such case, the final update of SP will be to add the frame
1678   // size to it.
1679   // To simplify the code, set RBReg to the base register used to restore
1680   // values from the stack, and set SPAdd to the value that needs to be added
1681   // to the SP at the end. The default values are as if red zone was present.
1682   unsigned RBReg = SPReg;
1683   uint64_t SPAdd = 0;
1684 
1685   // Check if we can move the stack update instruction up the epilogue
1686   // past the callee saves. This will allow the move to LR instruction
1687   // to be executed before the restores of the callee saves which means
1688   // that the callee saves can hide the latency from the MTLR instrcution.
1689   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1690   if (stackUpdateCanBeMoved(MF)) {
1691     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1692     for (CalleeSavedInfo CSI : Info) {
1693       // If the callee saved register is spilled to another register abort the
1694       // stack update movement.
1695       if (CSI.isSpilledToReg()) {
1696         StackUpdateLoc = MBBI;
1697         break;
1698       }
1699       int FrIdx = CSI.getFrameIdx();
1700       // If the frame index is not negative the callee saved info belongs to a
1701       // stack object that is not a fixed stack object. We ignore non-fixed
1702       // stack objects because we won't move the update of the stack pointer
1703       // past them.
1704       if (FrIdx >= 0)
1705         continue;
1706 
1707       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1708         StackUpdateLoc--;
1709       else {
1710         // Abort the operation as we can't update all CSR restores.
1711         StackUpdateLoc = MBBI;
1712         break;
1713       }
1714     }
1715   }
1716 
1717   if (FrameSize) {
1718     // In the prologue, the loaded (or persistent) stack pointer value is
1719     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1720     // zone add this offset back now.
1721 
1722     // If the function has a base pointer, the stack pointer has been copied
1723     // to it so we can restore it by copying in the other direction.
1724     if (HasRedZone && HasBP) {
1725       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1726         addReg(BPReg).
1727         addReg(BPReg);
1728     }
1729     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1730     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1731     // call which invalidates the stack pointer value in SP(0). So we use the
1732     // value of R31 in this case. Similar situation exists with setjmp.
1733     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1734       assert(HasFP && "Expecting a valid frame pointer.");
1735       if (!HasRedZone)
1736         RBReg = FPReg;
1737       if (!isLargeFrame) {
1738         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1739           .addReg(FPReg).addImm(FrameSize);
1740       } else {
1741         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1742         BuildMI(MBB, MBBI, dl, AddInst)
1743           .addReg(RBReg)
1744           .addReg(FPReg)
1745           .addReg(ScratchReg);
1746       }
1747     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1748       if (HasRedZone) {
1749         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1750           .addReg(SPReg)
1751           .addImm(FrameSize);
1752       } else {
1753         // Make sure that adding FrameSize will not overflow the max offset
1754         // size.
1755         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1756                "Local offsets should be negative");
1757         SPAdd = FrameSize;
1758         FPOffset += FrameSize;
1759         BPOffset += FrameSize;
1760         PBPOffset += FrameSize;
1761       }
1762     } else {
1763       // We don't want to use ScratchReg as a base register, because it
1764       // could happen to be R0. Use FP instead, but make sure to preserve it.
1765       if (!HasRedZone) {
1766         // If FP is not saved, copy it to ScratchReg.
1767         if (!HasFP)
1768           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1769             .addReg(FPReg)
1770             .addReg(FPReg);
1771         RBReg = FPReg;
1772       }
1773       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1774         .addImm(0)
1775         .addReg(SPReg);
1776     }
1777   }
1778   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1779   // If there is no red zone, ScratchReg may be needed for holding a useful
1780   // value (although not the base register). Make sure it is not overwritten
1781   // too early.
1782 
1783   // If we need to restore both the LR and the CR and we only have one
1784   // available scratch register, we must do them one at a time.
1785   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1786     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1787     // is live here.
1788     assert(HasRedZone && "Expecting red zone");
1789     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1790       .addImm(CRSaveOffset)
1791       .addReg(SPReg);
1792     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1793       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1794         .addReg(TempReg, getKillRegState(i == e-1));
1795   }
1796 
1797   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1798   // LR is stored in the caller's stack frame. ScratchReg will be needed
1799   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1800   // a base register anyway, because it may happen to be R0.
1801   bool LoadedLR = false;
1802   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1803     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1804       .addImm(LROffset+SPAdd)
1805       .addReg(RBReg);
1806     LoadedLR = true;
1807   }
1808 
1809   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1810     assert(RBReg == SPReg && "Should be using SP as a base register");
1811     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1812       .addImm(CRSaveOffset)
1813       .addReg(RBReg);
1814   }
1815 
1816   if (HasFP) {
1817     // If there is red zone, restore FP directly, since SP has already been
1818     // restored. Otherwise, restore the value of FP into ScratchReg.
1819     if (HasRedZone || RBReg == SPReg)
1820       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1821         .addImm(FPOffset)
1822         .addReg(SPReg);
1823     else
1824       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1825         .addImm(FPOffset)
1826         .addReg(RBReg);
1827   }
1828 
1829   if (FI->usesPICBase())
1830     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1831       .addImm(PBPOffset)
1832       .addReg(RBReg);
1833 
1834   if (HasBP)
1835     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1836       .addImm(BPOffset)
1837       .addReg(RBReg);
1838 
1839   // There is nothing more to be loaded from the stack, so now we can
1840   // restore SP: SP = RBReg + SPAdd.
1841   if (RBReg != SPReg || SPAdd != 0) {
1842     assert(!HasRedZone && "This should not happen with red zone");
1843     // If SPAdd is 0, generate a copy.
1844     if (SPAdd == 0)
1845       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1846         .addReg(RBReg)
1847         .addReg(RBReg);
1848     else
1849       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1850         .addReg(RBReg)
1851         .addImm(SPAdd);
1852 
1853     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1854     if (RBReg == FPReg)
1855       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1856         .addReg(ScratchReg)
1857         .addReg(ScratchReg);
1858 
1859     // Now load the LR from the caller's stack frame.
1860     if (MustSaveLR && !LoadedLR)
1861       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1862         .addImm(LROffset)
1863         .addReg(SPReg);
1864   }
1865 
1866   if (MustSaveCR &&
1867       !(SingleScratchReg && MustSaveLR))
1868     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1869       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1870         .addReg(TempReg, getKillRegState(i == e-1));
1871 
1872   if (MustSaveLR) {
1873     // If ROP protection is required, an extra instruction is added to compute a
1874     // hash and then compare it to the hash stored in the prologue.
1875     if (HasROPProtect) {
1876       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1877       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1878       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1879              "ROP hash check location offset out of range.");
1880       assert(((ImmOffset & 0x7) == 0) &&
1881              "ROP hash check location offset must be 8 byte aligned.");
1882       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1883           .addReg(ScratchReg)
1884           .addImm(ImmOffset)
1885           .addReg(SPReg);
1886     }
1887     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1888   }
1889 
1890   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1891   // call optimization
1892   if (IsReturnBlock) {
1893     unsigned RetOpcode = MBBI->getOpcode();
1894     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1895         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1896         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1897       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1898       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1899 
1900       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1901         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1902           .addReg(SPReg).addImm(CallerAllocatedAmt);
1903       } else {
1904         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1905           .addImm(CallerAllocatedAmt >> 16);
1906         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1907           .addReg(ScratchReg, RegState::Kill)
1908           .addImm(CallerAllocatedAmt & 0xFFFF);
1909         BuildMI(MBB, MBBI, dl, AddInst)
1910           .addReg(SPReg)
1911           .addReg(FPReg)
1912           .addReg(ScratchReg);
1913       }
1914     } else {
1915       createTailCallBranchInstr(MBB);
1916     }
1917   }
1918 }
1919 
1920 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1921   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1922 
1923   // If we got this far a first terminator should exist.
1924   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1925 
1926   DebugLoc dl = MBBI->getDebugLoc();
1927   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1928 
1929   // Create branch instruction for pseudo tail call return instruction.
1930   // The TCRETURNdi variants are direct calls. Valid targets for those are
1931   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1932   // since we can tail call external functions with PC-Rel (i.e. we don't need
1933   // to worry about different TOC pointers). Some of the external functions will
1934   // be MO_GlobalAddress while others like memcpy for example, are going to
1935   // be MO_ExternalSymbol.
1936   unsigned RetOpcode = MBBI->getOpcode();
1937   if (RetOpcode == PPC::TCRETURNdi) {
1938     MBBI = MBB.getLastNonDebugInstr();
1939     MachineOperand &JumpTarget = MBBI->getOperand(0);
1940     if (JumpTarget.isGlobal())
1941       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1942         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1943     else if (JumpTarget.isSymbol())
1944       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1945         addExternalSymbol(JumpTarget.getSymbolName());
1946     else
1947       llvm_unreachable("Expecting Global or External Symbol");
1948   } else if (RetOpcode == PPC::TCRETURNri) {
1949     MBBI = MBB.getLastNonDebugInstr();
1950     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1951     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1952   } else if (RetOpcode == PPC::TCRETURNai) {
1953     MBBI = MBB.getLastNonDebugInstr();
1954     MachineOperand &JumpTarget = MBBI->getOperand(0);
1955     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1956   } else if (RetOpcode == PPC::TCRETURNdi8) {
1957     MBBI = MBB.getLastNonDebugInstr();
1958     MachineOperand &JumpTarget = MBBI->getOperand(0);
1959     if (JumpTarget.isGlobal())
1960       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1961         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1962     else if (JumpTarget.isSymbol())
1963       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1964         addExternalSymbol(JumpTarget.getSymbolName());
1965     else
1966       llvm_unreachable("Expecting Global or External Symbol");
1967   } else if (RetOpcode == PPC::TCRETURNri8) {
1968     MBBI = MBB.getLastNonDebugInstr();
1969     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1970     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1971   } else if (RetOpcode == PPC::TCRETURNai8) {
1972     MBBI = MBB.getLastNonDebugInstr();
1973     MachineOperand &JumpTarget = MBBI->getOperand(0);
1974     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1975   }
1976 }
1977 
1978 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1979                                             BitVector &SavedRegs,
1980                                             RegScavenger *RS) const {
1981   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1982 
1983   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1984 
1985   // Do not explicitly save the callee saved VSRp registers.
1986   // The individual VSR subregisters will be saved instead.
1987   SavedRegs.reset(PPC::VSRp26);
1988   SavedRegs.reset(PPC::VSRp27);
1989   SavedRegs.reset(PPC::VSRp28);
1990   SavedRegs.reset(PPC::VSRp29);
1991   SavedRegs.reset(PPC::VSRp30);
1992   SavedRegs.reset(PPC::VSRp31);
1993 
1994   //  Save and clear the LR state.
1995   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1996   unsigned LR = RegInfo->getRARegister();
1997   FI->setMustSaveLR(MustSaveLR(MF, LR));
1998   SavedRegs.reset(LR);
1999 
2000   //  Save R31 if necessary
2001   int FPSI = FI->getFramePointerSaveIndex();
2002   const bool isPPC64 = Subtarget.isPPC64();
2003   MachineFrameInfo &MFI = MF.getFrameInfo();
2004 
2005   // If the frame pointer save index hasn't been defined yet.
2006   if (!FPSI && needsFP(MF)) {
2007     // Find out what the fix offset of the frame pointer save area.
2008     int FPOffset = getFramePointerSaveOffset();
2009     // Allocate the frame index for frame pointer save area.
2010     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2011     // Save the result.
2012     FI->setFramePointerSaveIndex(FPSI);
2013   }
2014 
2015   int BPSI = FI->getBasePointerSaveIndex();
2016   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2017     int BPOffset = getBasePointerSaveOffset();
2018     // Allocate the frame index for the base pointer save area.
2019     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2020     // Save the result.
2021     FI->setBasePointerSaveIndex(BPSI);
2022   }
2023 
2024   // Reserve stack space for the PIC Base register (R30).
2025   // Only used in SVR4 32-bit.
2026   if (FI->usesPICBase()) {
2027     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2028     FI->setPICBasePointerSaveIndex(PBPSI);
2029   }
2030 
2031   // Make sure we don't explicitly spill r31, because, for example, we have
2032   // some inline asm which explicitly clobbers it, when we otherwise have a
2033   // frame pointer and are using r31's spill slot for the prologue/epilogue
2034   // code. Same goes for the base pointer and the PIC base register.
2035   if (needsFP(MF))
2036     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2037   if (RegInfo->hasBasePointer(MF))
2038     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2039   if (FI->usesPICBase())
2040     SavedRegs.reset(PPC::R30);
2041 
2042   // Reserve stack space to move the linkage area to in case of a tail call.
2043   int TCSPDelta = 0;
2044   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2045       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2046     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2047   }
2048 
2049   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2050   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2051   // object at the offset of the CR-save slot in the linkage area. The actual
2052   // save and restore of the condition register will be created as part of the
2053   // prologue and epilogue insertion, but the FixedStack object is needed to
2054   // keep the CalleSavedInfo valid.
2055   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2056        SavedRegs.test(PPC::CR4))) {
2057     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2058     const int64_t SpillOffset =
2059         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2060     int FrameIdx =
2061         MFI.CreateFixedObject(SpillSize, SpillOffset,
2062                               /* IsImmutable */ true, /* IsAliased */ false);
2063     FI->setCRSpillFrameIndex(FrameIdx);
2064   }
2065 }
2066 
2067 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2068                                                        RegScavenger *RS) const {
2069   // Get callee saved register information.
2070   MachineFrameInfo &MFI = MF.getFrameInfo();
2071   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2072 
2073   // If the function is shrink-wrapped, and if the function has a tail call, the
2074   // tail call might not be in the new RestoreBlock, so real branch instruction
2075   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2076   // RestoreBlock. So we handle this case here.
2077   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2078     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2079     for (MachineBasicBlock &MBB : MF) {
2080       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2081         createTailCallBranchInstr(MBB);
2082     }
2083   }
2084 
2085   // Early exit if no callee saved registers are modified!
2086   if (CSI.empty() && !needsFP(MF)) {
2087     addScavengingSpillSlot(MF, RS);
2088     return;
2089   }
2090 
2091   unsigned MinGPR = PPC::R31;
2092   unsigned MinG8R = PPC::X31;
2093   unsigned MinFPR = PPC::F31;
2094   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2095 
2096   bool HasGPSaveArea = false;
2097   bool HasG8SaveArea = false;
2098   bool HasFPSaveArea = false;
2099   bool HasVRSaveArea = false;
2100 
2101   SmallVector<CalleeSavedInfo, 18> GPRegs;
2102   SmallVector<CalleeSavedInfo, 18> G8Regs;
2103   SmallVector<CalleeSavedInfo, 18> FPRegs;
2104   SmallVector<CalleeSavedInfo, 18> VRegs;
2105 
2106   for (const CalleeSavedInfo &I : CSI) {
2107     Register Reg = I.getReg();
2108     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2109             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2110            "Not expecting to try to spill R2 in a function that must save TOC");
2111     if (PPC::GPRCRegClass.contains(Reg)) {
2112       HasGPSaveArea = true;
2113 
2114       GPRegs.push_back(I);
2115 
2116       if (Reg < MinGPR) {
2117         MinGPR = Reg;
2118       }
2119     } else if (PPC::G8RCRegClass.contains(Reg)) {
2120       HasG8SaveArea = true;
2121 
2122       G8Regs.push_back(I);
2123 
2124       if (Reg < MinG8R) {
2125         MinG8R = Reg;
2126       }
2127     } else if (PPC::F8RCRegClass.contains(Reg)) {
2128       HasFPSaveArea = true;
2129 
2130       FPRegs.push_back(I);
2131 
2132       if (Reg < MinFPR) {
2133         MinFPR = Reg;
2134       }
2135     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2136                PPC::CRRCRegClass.contains(Reg)) {
2137       ; // do nothing, as we already know whether CRs are spilled
2138     } else if (PPC::VRRCRegClass.contains(Reg) ||
2139                PPC::SPERCRegClass.contains(Reg)) {
2140       // Altivec and SPE are mutually exclusive, but have the same stack
2141       // alignment requirements, so overload the save area for both cases.
2142       HasVRSaveArea = true;
2143 
2144       VRegs.push_back(I);
2145 
2146       if (Reg < MinVR) {
2147         MinVR = Reg;
2148       }
2149     } else {
2150       llvm_unreachable("Unknown RegisterClass!");
2151     }
2152   }
2153 
2154   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2155   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2156 
2157   int64_t LowerBound = 0;
2158 
2159   // Take into account stack space reserved for tail calls.
2160   int TCSPDelta = 0;
2161   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2162       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2163     LowerBound = TCSPDelta;
2164   }
2165 
2166   // The Floating-point register save area is right below the back chain word
2167   // of the previous stack frame.
2168   if (HasFPSaveArea) {
2169     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2170       int FI = FPRegs[i].getFrameIdx();
2171 
2172       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2173     }
2174 
2175     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2176   }
2177 
2178   // Check whether the frame pointer register is allocated. If so, make sure it
2179   // is spilled to the correct offset.
2180   if (needsFP(MF)) {
2181     int FI = PFI->getFramePointerSaveIndex();
2182     assert(FI && "No Frame Pointer Save Slot!");
2183     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2184     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2185     HasGPSaveArea = true;
2186   }
2187 
2188   if (PFI->usesPICBase()) {
2189     int FI = PFI->getPICBasePointerSaveIndex();
2190     assert(FI && "No PIC Base Pointer Save Slot!");
2191     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2192 
2193     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2194     HasGPSaveArea = true;
2195   }
2196 
2197   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2198   if (RegInfo->hasBasePointer(MF)) {
2199     int FI = PFI->getBasePointerSaveIndex();
2200     assert(FI && "No Base Pointer Save Slot!");
2201     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2202 
2203     Register BP = RegInfo->getBaseRegister(MF);
2204     if (PPC::G8RCRegClass.contains(BP)) {
2205       MinG8R = std::min<unsigned>(MinG8R, BP);
2206       HasG8SaveArea = true;
2207     } else if (PPC::GPRCRegClass.contains(BP)) {
2208       MinGPR = std::min<unsigned>(MinGPR, BP);
2209       HasGPSaveArea = true;
2210     }
2211   }
2212 
2213   // General register save area starts right below the Floating-point
2214   // register save area.
2215   if (HasGPSaveArea || HasG8SaveArea) {
2216     // Move general register save area spill slots down, taking into account
2217     // the size of the Floating-point register save area.
2218     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2219       if (!GPRegs[i].isSpilledToReg()) {
2220         int FI = GPRegs[i].getFrameIdx();
2221         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2222       }
2223     }
2224 
2225     // Move general register save area spill slots down, taking into account
2226     // the size of the Floating-point register save area.
2227     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2228       if (!G8Regs[i].isSpilledToReg()) {
2229         int FI = G8Regs[i].getFrameIdx();
2230         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2231       }
2232     }
2233 
2234     unsigned MinReg =
2235       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2236                          TRI->getEncodingValue(MinG8R));
2237 
2238     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2239     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2240   }
2241 
2242   // For 32-bit only, the CR save area is below the general register
2243   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2244   // to the stack pointer and hence does not need an adjustment here.
2245   // Only CR2 (the first nonvolatile spilled) has an associated frame
2246   // index so that we have a single uniform save area.
2247   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2248     // Adjust the frame index of the CR spill slot.
2249     for (const auto &CSInfo : CSI) {
2250       if (CSInfo.getReg() == PPC::CR2) {
2251         int FI = CSInfo.getFrameIdx();
2252         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2253         break;
2254       }
2255     }
2256 
2257     LowerBound -= 4; // The CR save area is always 4 bytes long.
2258   }
2259 
2260   // Both Altivec and SPE have the same alignment and padding requirements
2261   // within the stack frame.
2262   if (HasVRSaveArea) {
2263     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2264     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2265     // we are using negative number here (the stack grows downward). We should
2266     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2267     // is the alignment size ( n = 16 here) and y is the size after aligning.
2268     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2269     LowerBound &= ~(15);
2270 
2271     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2272       int FI = VRegs[i].getFrameIdx();
2273 
2274       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2275     }
2276   }
2277 
2278   addScavengingSpillSlot(MF, RS);
2279 }
2280 
2281 void
2282 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2283                                          RegScavenger *RS) const {
2284   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2285   // a large stack, which will require scavenging a register to materialize a
2286   // large offset.
2287 
2288   // We need to have a scavenger spill slot for spills if the frame size is
2289   // large. In case there is no free register for large-offset addressing,
2290   // this slot is used for the necessary emergency spill. Also, we need the
2291   // slot for dynamic stack allocations.
2292 
2293   // The scavenger might be invoked if the frame offset does not fit into
2294   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2295   // We don't know the complete frame size here because we've not yet computed
2296   // callee-saved register spills or the needed alignment padding.
2297   unsigned StackSize = determineFrameLayout(MF, true);
2298   MachineFrameInfo &MFI = MF.getFrameInfo();
2299   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2300 
2301   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2302       (hasSpills(MF) && NeedSpills)) {
2303     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2304     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2305     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2306     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2307     unsigned Size = TRI.getSpillSize(RC);
2308     Align Alignment = TRI.getSpillAlign(RC);
2309     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2310 
2311     // Might we have over-aligned allocas?
2312     bool HasAlVars =
2313         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2314 
2315     // These kinds of spills might need two registers.
2316     if (spillsCR(MF) || HasAlVars)
2317       RS->addScavengingFrameIndex(
2318           MFI.CreateStackObject(Size, Alignment, false));
2319   }
2320 }
2321 
2322 // This function checks if a callee saved gpr can be spilled to a volatile
2323 // vector register. This occurs for leaf functions when the option
2324 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2325 // which were not spilled to vectors, return false so the target independent
2326 // code can handle them by assigning a FrameIdx to a stack slot.
2327 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2328     MachineFunction &MF, const TargetRegisterInfo *TRI,
2329     std::vector<CalleeSavedInfo> &CSI) const {
2330 
2331   if (CSI.empty())
2332     return true; // Early exit if no callee saved registers are modified!
2333 
2334   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2335   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2336   const MachineRegisterInfo &MRI = MF.getRegInfo();
2337 
2338   if (Subtarget.hasSPE()) {
2339     // In case of SPE we only have SuperRegs and CRs
2340     // in our CalleSaveInfo vector.
2341 
2342     for (auto &CalleeSaveReg : CSI) {
2343       MCPhysReg Reg = CalleeSaveReg.getReg();
2344       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2345       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2346 
2347       if ( // Check only for SuperRegs.
2348           Lower &&
2349           // Replace Reg if only lower-32 bits modified
2350           !MRI.isPhysRegModified(Higher))
2351         CalleeSaveReg = CalleeSavedInfo(Lower);
2352     }
2353   }
2354 
2355   // Early exit if cannot spill gprs to volatile vector registers.
2356   MachineFrameInfo &MFI = MF.getFrameInfo();
2357   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2358     return false;
2359 
2360   // Build a BitVector of VSRs that can be used for spilling GPRs.
2361   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2362   BitVector BVCalleeSaved(TRI->getNumRegs());
2363   for (unsigned i = 0; CSRegs[i]; ++i)
2364     BVCalleeSaved.set(CSRegs[i]);
2365 
2366   for (unsigned Reg : BVAllocatable.set_bits()) {
2367     // Set to 0 if the register is not a volatile VSX register, or if it is
2368     // used in the function.
2369     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2370         MRI.isPhysRegUsed(Reg))
2371       BVAllocatable.reset(Reg);
2372   }
2373 
2374   bool AllSpilledToReg = true;
2375   unsigned LastVSRUsedForSpill = 0;
2376   for (auto &CS : CSI) {
2377     if (BVAllocatable.none())
2378       return false;
2379 
2380     Register Reg = CS.getReg();
2381 
2382     if (!PPC::G8RCRegClass.contains(Reg)) {
2383       AllSpilledToReg = false;
2384       continue;
2385     }
2386 
2387     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2388     // into one VSR using the mtvsrdd instruction.
2389     if (LastVSRUsedForSpill != 0) {
2390       CS.setDstReg(LastVSRUsedForSpill);
2391       BVAllocatable.reset(LastVSRUsedForSpill);
2392       LastVSRUsedForSpill = 0;
2393       continue;
2394     }
2395 
2396     unsigned VolatileVFReg = BVAllocatable.find_first();
2397     if (VolatileVFReg < BVAllocatable.size()) {
2398       CS.setDstReg(VolatileVFReg);
2399       LastVSRUsedForSpill = VolatileVFReg;
2400     } else {
2401       AllSpilledToReg = false;
2402     }
2403   }
2404   return AllSpilledToReg;
2405 }
2406 
2407 bool PPCFrameLowering::spillCalleeSavedRegisters(
2408     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2409     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2410 
2411   MachineFunction *MF = MBB.getParent();
2412   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2413   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2414   bool MustSaveTOC = FI->mustSaveTOC();
2415   DebugLoc DL;
2416   bool CRSpilled = false;
2417   MachineInstrBuilder CRMIB;
2418   BitVector Spilled(TRI->getNumRegs());
2419 
2420   VSRContainingGPRs.clear();
2421 
2422   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2423   // or two GPRs, so we need table to record information for later save/restore.
2424   for (const CalleeSavedInfo &Info : CSI) {
2425     if (Info.isSpilledToReg()) {
2426       auto &SpilledVSR =
2427           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2428       assert(SpilledVSR.second == 0 &&
2429              "Can't spill more than two GPRs into VSR!");
2430       if (SpilledVSR.first == 0)
2431         SpilledVSR.first = Info.getReg();
2432       else
2433         SpilledVSR.second = Info.getReg();
2434     }
2435   }
2436 
2437   for (const CalleeSavedInfo &I : CSI) {
2438     Register Reg = I.getReg();
2439 
2440     // CR2 through CR4 are the nonvolatile CR fields.
2441     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2442 
2443     // Add the callee-saved register as live-in; it's killed at the spill.
2444     // Do not do this for callee-saved registers that are live-in to the
2445     // function because they will already be marked live-in and this will be
2446     // adding it for a second time. It is an error to add the same register
2447     // to the set more than once.
2448     const MachineRegisterInfo &MRI = MF->getRegInfo();
2449     bool IsLiveIn = MRI.isLiveIn(Reg);
2450     if (!IsLiveIn)
2451        MBB.addLiveIn(Reg);
2452 
2453     if (CRSpilled && IsCRField) {
2454       CRMIB.addReg(Reg, RegState::ImplicitKill);
2455       continue;
2456     }
2457 
2458     // The actual spill will happen in the prologue.
2459     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2460       continue;
2461 
2462     // Insert the spill to the stack frame.
2463     if (IsCRField) {
2464       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2465       if (!Subtarget.is32BitELFABI()) {
2466         // The actual spill will happen at the start of the prologue.
2467         FuncInfo->addMustSaveCR(Reg);
2468       } else {
2469         CRSpilled = true;
2470         FuncInfo->setSpillsCR();
2471 
2472         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2473         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2474         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2475                   .addReg(Reg, RegState::ImplicitKill);
2476 
2477         MBB.insert(MI, CRMIB);
2478         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2479                                          .addReg(PPC::R12,
2480                                                  getKillRegState(true)),
2481                                          I.getFrameIdx()));
2482       }
2483     } else {
2484       if (I.isSpilledToReg()) {
2485         unsigned Dst = I.getDstReg();
2486 
2487         if (Spilled[Dst])
2488           continue;
2489 
2490         if (VSRContainingGPRs[Dst].second != 0) {
2491           assert(Subtarget.hasP9Vector() &&
2492                  "mtvsrdd is unavailable on pre-P9 targets.");
2493 
2494           NumPESpillVSR += 2;
2495           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2496               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2497               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2498         } else if (VSRContainingGPRs[Dst].second == 0) {
2499           assert(Subtarget.hasP8Vector() &&
2500                  "Can't move GPR to VSR on pre-P8 targets.");
2501 
2502           ++NumPESpillVSR;
2503           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2504                   TRI->getSubReg(Dst, PPC::sub_64))
2505               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2506         } else {
2507           llvm_unreachable("More than two GPRs spilled to a VSR!");
2508         }
2509         Spilled.set(Dst);
2510       } else {
2511         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2512         // Use !IsLiveIn for the kill flag.
2513         // We do not want to kill registers that are live in this function
2514         // before their use because they will become undefined registers.
2515         // Functions without NoUnwind need to preserve the order of elements in
2516         // saved vector registers.
2517         if (Subtarget.needsSwapsForVSXMemOps() &&
2518             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2519           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2520                                        I.getFrameIdx(), RC, TRI);
2521         else
2522           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2523                                   TRI, Register());
2524       }
2525     }
2526   }
2527   return true;
2528 }
2529 
2530 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2531                        bool CR4Spilled, MachineBasicBlock &MBB,
2532                        MachineBasicBlock::iterator MI,
2533                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2534 
2535   MachineFunction *MF = MBB.getParent();
2536   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2537   DebugLoc DL;
2538   unsigned MoveReg = PPC::R12;
2539 
2540   // 32-bit:  FP-relative
2541   MBB.insert(MI,
2542              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2543                                CSI[CSIIndex].getFrameIdx()));
2544 
2545   unsigned RestoreOp = PPC::MTOCRF;
2546   if (CR2Spilled)
2547     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2548                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2549 
2550   if (CR3Spilled)
2551     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2552                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2553 
2554   if (CR4Spilled)
2555     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2556                .addReg(MoveReg, getKillRegState(true)));
2557 }
2558 
2559 MachineBasicBlock::iterator PPCFrameLowering::
2560 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2561                               MachineBasicBlock::iterator I) const {
2562   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2563   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2564       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2565     // Add (actually subtract) back the amount the callee popped on return.
2566     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2567       bool is64Bit = Subtarget.isPPC64();
2568       CalleeAmt *= -1;
2569       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2570       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2571       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2572       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2573       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2574       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2575       const DebugLoc &dl = I->getDebugLoc();
2576 
2577       if (isInt<16>(CalleeAmt)) {
2578         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2579           .addReg(StackReg, RegState::Kill)
2580           .addImm(CalleeAmt);
2581       } else {
2582         MachineBasicBlock::iterator MBBI = I;
2583         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2584           .addImm(CalleeAmt >> 16);
2585         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2586           .addReg(TmpReg, RegState::Kill)
2587           .addImm(CalleeAmt & 0xFFFF);
2588         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2589           .addReg(StackReg, RegState::Kill)
2590           .addReg(TmpReg);
2591       }
2592     }
2593   }
2594   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2595   return MBB.erase(I);
2596 }
2597 
2598 static bool isCalleeSavedCR(unsigned Reg) {
2599   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2600 }
2601 
2602 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2603     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2604     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2605   MachineFunction *MF = MBB.getParent();
2606   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2607   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2608   bool MustSaveTOC = FI->mustSaveTOC();
2609   bool CR2Spilled = false;
2610   bool CR3Spilled = false;
2611   bool CR4Spilled = false;
2612   unsigned CSIIndex = 0;
2613   BitVector Restored(TRI->getNumRegs());
2614 
2615   // Initialize insertion-point logic; we will be restoring in reverse
2616   // order of spill.
2617   MachineBasicBlock::iterator I = MI, BeforeI = I;
2618   bool AtStart = I == MBB.begin();
2619 
2620   if (!AtStart)
2621     --BeforeI;
2622 
2623   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2624     Register Reg = CSI[i].getReg();
2625 
2626     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2627       continue;
2628 
2629     // Restore of callee saved condition register field is handled during
2630     // epilogue insertion.
2631     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2632       continue;
2633 
2634     if (Reg == PPC::CR2) {
2635       CR2Spilled = true;
2636       // The spill slot is associated only with CR2, which is the
2637       // first nonvolatile spilled.  Save it here.
2638       CSIIndex = i;
2639       continue;
2640     } else if (Reg == PPC::CR3) {
2641       CR3Spilled = true;
2642       continue;
2643     } else if (Reg == PPC::CR4) {
2644       CR4Spilled = true;
2645       continue;
2646     } else {
2647       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2648       // least one CR register, restore all spilled CRs together.
2649       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2650         bool is31 = needsFP(*MF);
2651         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2652                    CSIIndex);
2653         CR2Spilled = CR3Spilled = CR4Spilled = false;
2654       }
2655 
2656       if (CSI[i].isSpilledToReg()) {
2657         DebugLoc DL;
2658         unsigned Dst = CSI[i].getDstReg();
2659 
2660         if (Restored[Dst])
2661           continue;
2662 
2663         if (VSRContainingGPRs[Dst].second != 0) {
2664           assert(Subtarget.hasP9Vector());
2665           NumPEReloadVSR += 2;
2666           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2667                   VSRContainingGPRs[Dst].second)
2668               .addReg(Dst);
2669           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2670                   VSRContainingGPRs[Dst].first)
2671               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2672         } else if (VSRContainingGPRs[Dst].second == 0) {
2673           assert(Subtarget.hasP8Vector());
2674           ++NumPEReloadVSR;
2675           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2676                   VSRContainingGPRs[Dst].first)
2677               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2678         } else {
2679           llvm_unreachable("More than two GPRs spilled to a VSR!");
2680         }
2681 
2682         Restored.set(Dst);
2683 
2684       } else {
2685        // Default behavior for non-CR saves.
2686         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2687 
2688         // Functions without NoUnwind need to preserve the order of elements in
2689         // saved vector registers.
2690         if (Subtarget.needsSwapsForVSXMemOps() &&
2691             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2692           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2693                                         TRI);
2694         else
2695           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2696                                    Register());
2697 
2698         assert(I != MBB.begin() &&
2699                "loadRegFromStackSlot didn't insert any code!");
2700       }
2701     }
2702 
2703     // Insert in reverse order.
2704     if (AtStart)
2705       I = MBB.begin();
2706     else {
2707       I = BeforeI;
2708       ++I;
2709     }
2710   }
2711 
2712   // If we haven't yet spilled the CRs, do so now.
2713   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2714     assert(Subtarget.is32BitELFABI() &&
2715            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2716     bool is31 = needsFP(*MF);
2717     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2718   }
2719 
2720   return true;
2721 }
2722 
2723 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2724   return TOCSaveOffset;
2725 }
2726 
2727 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2728   return FramePointerSaveOffset;
2729 }
2730 
2731 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2732   return BasePointerSaveOffset;
2733 }
2734 
2735 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2736   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2737     return false;
2738   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2739 }
2740 
2741 uint64_t PPCFrameLowering::getStackThreshold() const {
2742   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2743   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2744   // Scratch register contains a signed 64-bit number, which is negative
2745   // when extending the stack and is positive when releasing the stack frame.
2746   // To make `stux` and `add` paired, the absolute value of the number contained
2747   // in the scratch register should be the same. Thus the maximum stack size
2748   // is (2^63)-1, i.e., LONG_MAX.
2749   if (Subtarget.isPPC64())
2750     return LONG_MAX;
2751 
2752   return TargetFrameLowering::getStackThreshold();
2753 }
2754