xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 0f0cfcff2ca65e295cd84d3eda6f8e93b76cb3a8)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF) && // No special alignment.
321                        !MFI.isFrameAddressTaken();
322 
323   // Note: for PPC32 SVR4ABI, we can still generate stackless
324   // code if all local vars are reg-allocated.
325   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
326 
327   // Check whether we can skip adjusting the stack pointer (by using red zone)
328   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
329     // No need for frame
330     return 0;
331   }
332 
333   // Get the maximum call frame size of all the calls.
334   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
335 
336   // Maximum call frame needs to be at least big enough for linkage area.
337   unsigned minCallFrameSize = getLinkageSize();
338   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
339 
340   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
341   // that allocations will be aligned.
342   if (MFI.hasVarSizedObjects())
343     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
344 
345   // Update the new max call frame size if the caller passes in a valid pointer.
346   if (NewMaxCallFrameSize)
347     *NewMaxCallFrameSize = maxCallFrameSize;
348 
349   // Include call frame size in total.
350   FrameSize += maxCallFrameSize;
351 
352   // Make sure the frame is aligned.
353   FrameSize = alignTo(FrameSize, Alignment);
354 
355   return FrameSize;
356 }
357 
358 // hasFP - Return true if the specified function actually has a dedicated frame
359 // pointer register.
360 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
361   const MachineFrameInfo &MFI = MF.getFrameInfo();
362   // FIXME: This is pretty much broken by design: hasFP() might be called really
363   // early, before the stack layout was calculated and thus hasFP() might return
364   // true or false here depending on the time of call.
365   return (MFI.getStackSize()) && needsFP(MF);
366 }
367 
368 // needsFP - Return true if the specified function should have a dedicated frame
369 // pointer register.  This is true if the function has variable sized allocas or
370 // if frame pointer elimination is disabled.
371 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
372   const MachineFrameInfo &MFI = MF.getFrameInfo();
373 
374   // Naked functions have no stack frame pushed, so we don't have a frame
375   // pointer.
376   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
377     return false;
378 
379   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
380          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
381          MF.exposesReturnsTwice() ||
382          (MF.getTarget().Options.GuaranteedTailCallOpt &&
383           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
384 }
385 
386 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
387   // When there is dynamic alloca in this function, we can not use the frame
388   // pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1
389   // always points to the backchain.
390   bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects();
391   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
392   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
393 
394   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
395   bool HasBP = RegInfo->hasBasePointer(MF);
396   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
397   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
398 
399   for (MachineBasicBlock &MBB : MF)
400     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
401       --MBBI;
402       for (MachineOperand &MO : MBBI->operands()) {
403         if (!MO.isReg())
404           continue;
405 
406         switch (MO.getReg()) {
407         case PPC::FP:
408           MO.setReg(FPReg);
409           break;
410         case PPC::FP8:
411           MO.setReg(FP8Reg);
412           break;
413         case PPC::BP:
414           MO.setReg(BPReg);
415           break;
416         case PPC::BP8:
417           MO.setReg(BP8Reg);
418           break;
419 
420         }
421       }
422     }
423 }
424 
425 /*  This function will do the following:
426     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
427       respectively (defaults recommended by the ABI) and return true
428     - If MBB is not an entry block, initialize the register scavenger and look
429       for available registers.
430     - If the defaults (R0/R12) are available, return true
431     - If TwoUniqueRegsRequired is set to true, it looks for two unique
432       registers. Otherwise, look for a single available register.
433       - If the required registers are found, set SR1 and SR2 and return true.
434       - If the required registers are not found, set SR2 or both SR1 and SR2 to
435         PPC::NoRegister and return false.
436 
437     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
438     is not set, this function will attempt to find two different registers, but
439     still return true if only one register is available (and set SR1 == SR2).
440 */
441 bool
442 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
443                                       bool UseAtEnd,
444                                       bool TwoUniqueRegsRequired,
445                                       Register *SR1,
446                                       Register *SR2) const {
447   RegScavenger RS;
448   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
449   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
450 
451   // Set the defaults for the two scratch registers.
452   if (SR1)
453     *SR1 = R0;
454 
455   if (SR2) {
456     assert (SR1 && "Asking for the second scratch register but not the first?");
457     *SR2 = R12;
458   }
459 
460   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
461   if ((UseAtEnd && MBB->isReturnBlock()) ||
462       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
463     return true;
464 
465   if (UseAtEnd) {
466     // The scratch register will be used before the first terminator (or at the
467     // end of the block if there are no terminators).
468     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469     if (MBBI == MBB->begin()) {
470       RS.enterBasicBlock(*MBB);
471     } else {
472       RS.enterBasicBlockEnd(*MBB);
473       RS.backward(MBBI);
474     }
475   } else {
476     // The scratch register will be used at the start of the block.
477     RS.enterBasicBlock(*MBB);
478   }
479 
480   // If the two registers are available, we're all good.
481   // Note that we only return here if both R0 and R12 are available because
482   // although the function may not require two unique registers, it may benefit
483   // from having two so we should try to provide them.
484   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
485     return true;
486 
487   // Get the list of callee-saved registers for the target.
488   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
489   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
490 
491   // Get all the available registers in the block.
492   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
493                                      &PPC::GPRCRegClass);
494 
495   // We shouldn't use callee-saved registers as scratch registers as they may be
496   // available when looking for a candidate block for shrink wrapping but not
497   // available when the actual prologue/epilogue is being emitted because they
498   // were added as live-in to the prologue block by PrologueEpilogueInserter.
499   for (int i = 0; CSRegs[i]; ++i)
500     BV.reset(CSRegs[i]);
501 
502   // Set the first scratch register to the first available one.
503   if (SR1) {
504     int FirstScratchReg = BV.find_first();
505     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
506   }
507 
508   // If there is another one available, set the second scratch register to that.
509   // Otherwise, set it to either PPC::NoRegister if this function requires two
510   // or to whatever SR1 is set to if this function doesn't require two.
511   if (SR2) {
512     int SecondScratchReg = BV.find_next(*SR1);
513     if (SecondScratchReg != -1)
514       *SR2 = SecondScratchReg;
515     else
516       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
517   }
518 
519   // Now that we've done our best to provide both registers, double check
520   // whether we were unable to provide enough.
521   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
522     return false;
523 
524   return true;
525 }
526 
527 // We need a scratch register for spilling LR and for spilling CR. By default,
528 // we use two scratch registers to hide latency. However, if only one scratch
529 // register is available, we can adjust for that by not overlapping the spill
530 // code. However, if we need to realign the stack (i.e. have a base pointer)
531 // and the stack frame is large, we need two scratch registers.
532 // Also, stack probe requires two scratch registers, one for old sp, one for
533 // large frame and large probe size.
534 bool
535 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
536   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
537   MachineFunction &MF = *(MBB->getParent());
538   bool HasBP = RegInfo->hasBasePointer(MF);
539   unsigned FrameSize = determineFrameLayout(MF);
540   int NegFrameSize = -FrameSize;
541   bool IsLargeFrame = !isInt<16>(NegFrameSize);
542   MachineFrameInfo &MFI = MF.getFrameInfo();
543   Align MaxAlign = MFI.getMaxAlign();
544   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
545   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
546 
547   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
548          TLI.hasInlineStackProbe(MF);
549 }
550 
551 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
552   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
553 
554   return findScratchRegister(TmpMBB, false,
555                              twoUniqueScratchRegsRequired(TmpMBB));
556 }
557 
558 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
559   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
560 
561   return findScratchRegister(TmpMBB, true);
562 }
563 
564 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
565   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
566   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
567 
568   // Abort if there is no register info or function info.
569   if (!RegInfo || !FI)
570     return false;
571 
572   // Only move the stack update on ELFv2 ABI and PPC64.
573   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
574     return false;
575 
576   // Check the frame size first and return false if it does not fit the
577   // requirements.
578   // We need a non-zero frame size as well as a frame that will fit in the red
579   // zone. This is because by moving the stack pointer update we are now storing
580   // to the red zone until the stack pointer is updated. If we get an interrupt
581   // inside the prologue but before the stack update we now have a number of
582   // stores to the red zone and those stores must all fit.
583   MachineFrameInfo &MFI = MF.getFrameInfo();
584   unsigned FrameSize = MFI.getStackSize();
585   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
586     return false;
587 
588   // Frame pointers and base pointers complicate matters so don't do anything
589   // if we have them. For example having a frame pointer will sometimes require
590   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
591   // difficult. Similar situation exists with setjmp.
592   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
593     return false;
594 
595   // Calls to fast_cc functions use different rules for passing parameters on
596   // the stack from the ABI and using PIC base in the function imposes
597   // similar restrictions to using the base pointer. It is not generally safe
598   // to move the stack pointer update in these situations.
599   if (FI->hasFastCall() || FI->usesPICBase())
600     return false;
601 
602   // Finally we can move the stack update if we do not require register
603   // scavenging. Register scavenging can introduce more spills and so
604   // may make the frame size larger than we have computed.
605   return !RegInfo->requiresFrameIndexScavenging(MF);
606 }
607 
608 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
609                                     MachineBasicBlock &MBB) const {
610   MachineBasicBlock::iterator MBBI = MBB.begin();
611   MachineFrameInfo &MFI = MF.getFrameInfo();
612   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
613   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
614   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
615 
616   const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
617   DebugLoc dl;
618   // AIX assembler does not support cfi directives.
619   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
620 
621   const bool HasFastMFLR = Subtarget.hasFastMFLR();
622 
623   // Get processor type.
624   bool isPPC64 = Subtarget.isPPC64();
625   // Get the ABI.
626   bool isSVR4ABI = Subtarget.isSVR4ABI();
627   bool isELFv2ABI = Subtarget.isELFv2ABI();
628   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
629 
630   // Work out frame sizes.
631   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
632   int64_t NegFrameSize = -FrameSize;
633   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
634     llvm_unreachable("Unhandled stack size!");
635 
636   if (MFI.isFrameAddressTaken())
637     replaceFPWithRealFP(MF);
638 
639   // Check if the link register (LR) must be saved.
640   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
641   bool MustSaveLR = FI->mustSaveLR();
642   bool MustSaveTOC = FI->mustSaveTOC();
643   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
644   bool MustSaveCR = !MustSaveCRs.empty();
645   // Do we have a frame pointer and/or base pointer for this function?
646   bool HasFP = hasFP(MF);
647   bool HasBP = RegInfo->hasBasePointer(MF);
648   bool HasRedZone = isPPC64 || !isSVR4ABI;
649   bool HasROPProtect = Subtarget.hasROPProtect();
650   bool HasPrivileged = Subtarget.hasPrivileged();
651 
652   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
653   Register BPReg = RegInfo->getBaseRegister(MF);
654   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
655   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
656   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
657   Register ScratchReg;
658   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
659   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
660   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
661                                                 : PPC::MFLR );
662   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
663                                                  : PPC::STW );
664   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
665                                                      : PPC::STWU );
666   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
667                                                         : PPC::STWUX);
668   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
669                                               : PPC::OR );
670   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
671                                                             : PPC::SUBFC);
672   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
673                                                                : PPC::SUBFIC);
674   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
675                                                            : PPC::MFCR);
676   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
677   const MCInstrDesc &HashST =
678       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
679                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
680 
681   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
682   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
683   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
684   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
685   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
686          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
687 
688   // Using the same bool variable as below to suppress compiler warnings.
689   bool SingleScratchReg = findScratchRegister(
690       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
691   assert(SingleScratchReg &&
692          "Required number of registers not available in this block");
693 
694   SingleScratchReg = ScratchReg == TempReg;
695 
696   int64_t LROffset = getReturnSaveOffset();
697 
698   int64_t FPOffset = 0;
699   if (HasFP) {
700     MachineFrameInfo &MFI = MF.getFrameInfo();
701     int FPIndex = FI->getFramePointerSaveIndex();
702     assert(FPIndex && "No Frame Pointer Save Slot!");
703     FPOffset = MFI.getObjectOffset(FPIndex);
704   }
705 
706   int64_t BPOffset = 0;
707   if (HasBP) {
708     MachineFrameInfo &MFI = MF.getFrameInfo();
709     int BPIndex = FI->getBasePointerSaveIndex();
710     assert(BPIndex && "No Base Pointer Save Slot!");
711     BPOffset = MFI.getObjectOffset(BPIndex);
712   }
713 
714   int64_t PBPOffset = 0;
715   if (FI->usesPICBase()) {
716     MachineFrameInfo &MFI = MF.getFrameInfo();
717     int PBPIndex = FI->getPICBasePointerSaveIndex();
718     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
719     PBPOffset = MFI.getObjectOffset(PBPIndex);
720   }
721 
722   // Get stack alignments.
723   Align MaxAlign = MFI.getMaxAlign();
724   if (HasBP && MaxAlign > 1)
725     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
726 
727   // Frames of 32KB & larger require special handling because they cannot be
728   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
729   bool isLargeFrame = !isInt<16>(NegFrameSize);
730 
731   // Check if we can move the stack update instruction (stdu) down the prologue
732   // past the callee saves. Hopefully this will avoid the situation where the
733   // saves are waiting for the update on the store with update to complete.
734   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
735   bool MovingStackUpdateDown = false;
736 
737   // Check if we can move the stack update.
738   if (stackUpdateCanBeMoved(MF)) {
739     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
740     for (CalleeSavedInfo CSI : Info) {
741       // If the callee saved register is spilled to a register instead of the
742       // stack then the spill no longer uses the stack pointer.
743       // This can lead to two consequences:
744       // 1) We no longer need to update the stack because the function does not
745       //    spill any callee saved registers to stack.
746       // 2) We have a situation where we still have to update the stack pointer
747       //    even though some registers are spilled to other registers. In
748       //    this case the current code moves the stack update to an incorrect
749       //    position.
750       // In either case we should abort moving the stack update operation.
751       if (CSI.isSpilledToReg()) {
752         StackUpdateLoc = MBBI;
753         MovingStackUpdateDown = false;
754         break;
755       }
756 
757       int FrIdx = CSI.getFrameIdx();
758       // If the frame index is not negative the callee saved info belongs to a
759       // stack object that is not a fixed stack object. We ignore non-fixed
760       // stack objects because we won't move the stack update pointer past them.
761       if (FrIdx >= 0)
762         continue;
763 
764       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
765         StackUpdateLoc++;
766         MovingStackUpdateDown = true;
767       } else {
768         // We need all of the Frame Indices to meet these conditions.
769         // If they do not, abort the whole operation.
770         StackUpdateLoc = MBBI;
771         MovingStackUpdateDown = false;
772         break;
773       }
774     }
775 
776     // If the operation was not aborted then update the object offset.
777     if (MovingStackUpdateDown) {
778       for (CalleeSavedInfo CSI : Info) {
779         int FrIdx = CSI.getFrameIdx();
780         if (FrIdx < 0)
781           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
782       }
783     }
784   }
785 
786   // Where in the prologue we move the CR fields depends on how many scratch
787   // registers we have, and if we need to save the link register or not. This
788   // lambda is to avoid duplicating the logic in 2 places.
789   auto BuildMoveFromCR = [&]() {
790     if (isELFv2ABI && MustSaveCRs.size() == 1) {
791     // In the ELFv2 ABI, we are not required to save all CR fields.
792     // If only one CR field is clobbered, it is more efficient to use
793     // mfocrf to selectively save just that field, because mfocrf has short
794     // latency compares to mfcr.
795       assert(isPPC64 && "V2 ABI is 64-bit only.");
796       MachineInstrBuilder MIB =
797           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
798       MIB.addReg(MustSaveCRs[0], RegState::Kill);
799     } else {
800       MachineInstrBuilder MIB =
801           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
802       for (unsigned CRfield : MustSaveCRs)
803         MIB.addReg(CRfield, RegState::ImplicitKill);
804     }
805   };
806 
807   // If we need to spill the CR and the LR but we don't have two separate
808   // registers available, we must spill them one at a time
809   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
810     BuildMoveFromCR();
811     BuildMI(MBB, MBBI, dl, StoreWordInst)
812         .addReg(TempReg, getKillRegState(true))
813         .addImm(CRSaveOffset)
814         .addReg(SPReg);
815   }
816 
817   if (MustSaveLR)
818     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
819 
820   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
821     BuildMoveFromCR();
822 
823   if (HasRedZone) {
824     if (HasFP)
825       BuildMI(MBB, MBBI, dl, StoreInst)
826         .addReg(FPReg)
827         .addImm(FPOffset)
828         .addReg(SPReg);
829     if (FI->usesPICBase())
830       BuildMI(MBB, MBBI, dl, StoreInst)
831         .addReg(PPC::R30)
832         .addImm(PBPOffset)
833         .addReg(SPReg);
834     if (HasBP)
835       BuildMI(MBB, MBBI, dl, StoreInst)
836         .addReg(BPReg)
837         .addImm(BPOffset)
838         .addReg(SPReg);
839   }
840 
841   // Generate the instruction to store the LR. In the case where ROP protection
842   // is required the register holding the LR should not be killed as it will be
843   // used by the hash store instruction.
844   auto SaveLR = [&](int64_t Offset) {
845     assert(MustSaveLR && "LR is not required to be saved!");
846     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
847         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
848         .addImm(Offset)
849         .addReg(SPReg);
850 
851     // Add the ROP protection Hash Store instruction.
852     // NOTE: This is technically a violation of the ABI. The hash can be saved
853     // up to 512 bytes into the Protected Zone. This can be outside of the
854     // initial 288 byte volatile program storage region in the Protected Zone.
855     // However, this restriction will be removed in an upcoming revision of the
856     // ABI.
857     if (HasROPProtect) {
858       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
859       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
860       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
861              "ROP hash save offset out of range.");
862       assert(((ImmOffset & 0x7) == 0) &&
863              "ROP hash save offset must be 8 byte aligned.");
864       BuildMI(MBB, StackUpdateLoc, dl, HashST)
865           .addReg(ScratchReg, getKillRegState(true))
866           .addImm(ImmOffset)
867           .addReg(SPReg);
868     }
869   };
870 
871   if (MustSaveLR && HasFastMFLR)
872       SaveLR(LROffset);
873 
874   if (MustSaveCR &&
875       !(SingleScratchReg && MustSaveLR)) {
876     assert(HasRedZone && "A red zone is always available on PPC64");
877     BuildMI(MBB, MBBI, dl, StoreWordInst)
878       .addReg(TempReg, getKillRegState(true))
879       .addImm(CRSaveOffset)
880       .addReg(SPReg);
881   }
882 
883   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
884   if (!FrameSize) {
885     if (MustSaveLR && !HasFastMFLR)
886       SaveLR(LROffset);
887     return;
888   }
889 
890   // Adjust stack pointer: r1 += NegFrameSize.
891   // If there is a preferred stack alignment, align R1 now
892 
893   if (HasBP && HasRedZone) {
894     // Save a copy of r1 as the base pointer.
895     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
896       .addReg(SPReg)
897       .addReg(SPReg);
898   }
899 
900   // Have we generated a STUX instruction to claim stack frame? If so,
901   // the negated frame size will be placed in ScratchReg.
902   bool HasSTUX =
903       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
904       (HasBP && MaxAlign > 1) || isLargeFrame;
905 
906   // If we use STUX to update the stack pointer, we need the two scratch
907   // registers TempReg and ScratchReg, we have to save LR here which is stored
908   // in ScratchReg.
909   // If the offset can not be encoded into the store instruction, we also have
910   // to save LR here.
911   if (MustSaveLR && !HasFastMFLR &&
912       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
913     SaveLR(LROffset);
914 
915   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
916   // pointer is always stored at SP, we will get a free probe due to an essential
917   // STU(X) instruction.
918   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
919     // To be consistent with other targets, a pseudo instruction is emitted and
920     // will be later expanded in `inlineStackProbe`.
921     BuildMI(MBB, MBBI, dl,
922             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
923                             : PPC::PROBED_STACKALLOC_32))
924         .addDef(TempReg)
925         .addDef(ScratchReg) // ScratchReg stores the old sp.
926         .addImm(NegFrameSize);
927     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
928     // update the ScratchReg to meet the assumption that ScratchReg contains
929     // the NegFrameSize. This solution is rather tricky.
930     if (!HasRedZone) {
931       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
932           .addReg(ScratchReg)
933           .addReg(SPReg);
934     }
935   } else {
936     // This condition must be kept in sync with canUseAsPrologue.
937     if (HasBP && MaxAlign > 1) {
938       if (isPPC64)
939         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
940             .addReg(SPReg)
941             .addImm(0)
942             .addImm(64 - Log2(MaxAlign));
943       else // PPC32...
944         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
945             .addReg(SPReg)
946             .addImm(0)
947             .addImm(32 - Log2(MaxAlign))
948             .addImm(31);
949       if (!isLargeFrame) {
950         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
951             .addReg(ScratchReg, RegState::Kill)
952             .addImm(NegFrameSize);
953       } else {
954         assert(!SingleScratchReg && "Only a single scratch reg available");
955         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
956         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
957             .addReg(ScratchReg, RegState::Kill)
958             .addReg(TempReg, RegState::Kill);
959       }
960 
961       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
962           .addReg(SPReg, RegState::Kill)
963           .addReg(SPReg)
964           .addReg(ScratchReg);
965     } else if (!isLargeFrame) {
966       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
967           .addReg(SPReg)
968           .addImm(NegFrameSize)
969           .addReg(SPReg);
970     } else {
971       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
972       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
973           .addReg(SPReg, RegState::Kill)
974           .addReg(SPReg)
975           .addReg(ScratchReg);
976     }
977   }
978 
979   // Save the TOC register after the stack pointer update if a prologue TOC
980   // save is required for the function.
981   if (MustSaveTOC) {
982     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
983     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
984       .addReg(TOCReg, getKillRegState(true))
985       .addImm(TOCSaveOffset)
986       .addReg(SPReg);
987   }
988 
989   if (!HasRedZone) {
990     assert(!isPPC64 && "A red zone is always available on PPC64");
991     if (HasSTUX) {
992       // The negated frame size is in ScratchReg, and the SPReg has been
993       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
994       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
995       // the stack frame (i.e. the old SP), ideally, we would put the old
996       // SP into a register and use it as the base for the stores. The
997       // problem is that the only available register may be ScratchReg,
998       // which could be R0, and R0 cannot be used as a base address.
999 
1000       // First, set ScratchReg to the old SP. This may need to be modified
1001       // later.
1002       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1003         .addReg(ScratchReg, RegState::Kill)
1004         .addReg(SPReg);
1005 
1006       if (ScratchReg == PPC::R0) {
1007         // R0 cannot be used as a base register, but it can be used as an
1008         // index in a store-indexed.
1009         int LastOffset = 0;
1010         if (HasFP)  {
1011           // R0 += (FPOffset-LastOffset).
1012           // Need addic, since addi treats R0 as 0.
1013           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1014             .addReg(ScratchReg)
1015             .addImm(FPOffset-LastOffset);
1016           LastOffset = FPOffset;
1017           // Store FP into *R0.
1018           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1019             .addReg(FPReg, RegState::Kill)  // Save FP.
1020             .addReg(PPC::ZERO)
1021             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1022         }
1023         if (FI->usesPICBase()) {
1024           // R0 += (PBPOffset-LastOffset).
1025           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1026             .addReg(ScratchReg)
1027             .addImm(PBPOffset-LastOffset);
1028           LastOffset = PBPOffset;
1029           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1030             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1031             .addReg(PPC::ZERO)
1032             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1033         }
1034         if (HasBP) {
1035           // R0 += (BPOffset-LastOffset).
1036           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1037             .addReg(ScratchReg)
1038             .addImm(BPOffset-LastOffset);
1039           LastOffset = BPOffset;
1040           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1041             .addReg(BPReg, RegState::Kill)  // Save BP.
1042             .addReg(PPC::ZERO)
1043             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1044           // BP = R0-LastOffset
1045           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1046             .addReg(ScratchReg, RegState::Kill)
1047             .addImm(-LastOffset);
1048         }
1049       } else {
1050         // ScratchReg is not R0, so use it as the base register. It is
1051         // already set to the old SP, so we can use the offsets directly.
1052 
1053         // Now that the stack frame has been allocated, save all the necessary
1054         // registers using ScratchReg as the base address.
1055         if (HasFP)
1056           BuildMI(MBB, MBBI, dl, StoreInst)
1057             .addReg(FPReg)
1058             .addImm(FPOffset)
1059             .addReg(ScratchReg);
1060         if (FI->usesPICBase())
1061           BuildMI(MBB, MBBI, dl, StoreInst)
1062             .addReg(PPC::R30)
1063             .addImm(PBPOffset)
1064             .addReg(ScratchReg);
1065         if (HasBP) {
1066           BuildMI(MBB, MBBI, dl, StoreInst)
1067             .addReg(BPReg)
1068             .addImm(BPOffset)
1069             .addReg(ScratchReg);
1070           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1071             .addReg(ScratchReg, RegState::Kill)
1072             .addReg(ScratchReg);
1073         }
1074       }
1075     } else {
1076       // The frame size is a known 16-bit constant (fitting in the immediate
1077       // field of STWU). To be here we have to be compiling for PPC32.
1078       // Since the SPReg has been decreased by FrameSize, add it back to each
1079       // offset.
1080       if (HasFP)
1081         BuildMI(MBB, MBBI, dl, StoreInst)
1082           .addReg(FPReg)
1083           .addImm(FrameSize + FPOffset)
1084           .addReg(SPReg);
1085       if (FI->usesPICBase())
1086         BuildMI(MBB, MBBI, dl, StoreInst)
1087           .addReg(PPC::R30)
1088           .addImm(FrameSize + PBPOffset)
1089           .addReg(SPReg);
1090       if (HasBP) {
1091         BuildMI(MBB, MBBI, dl, StoreInst)
1092           .addReg(BPReg)
1093           .addImm(FrameSize + BPOffset)
1094           .addReg(SPReg);
1095         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1096           .addReg(SPReg)
1097           .addImm(FrameSize);
1098       }
1099     }
1100   }
1101 
1102   // Save the LR now.
1103   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1104     SaveLR(LROffset + FrameSize);
1105 
1106   // Add Call Frame Information for the instructions we generated above.
1107   if (needsCFI) {
1108     unsigned CFIIndex;
1109 
1110     if (HasBP) {
1111       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1112       // because if the stack needed aligning then CFA won't be at a fixed
1113       // offset from FP/SP.
1114       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1115       CFIIndex = MF.addFrameInst(
1116           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1117     } else {
1118       // Adjust the definition of CFA to account for the change in SP.
1119       assert(NegFrameSize);
1120       CFIIndex = MF.addFrameInst(
1121           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1122     }
1123     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1124         .addCFIIndex(CFIIndex);
1125 
1126     if (HasFP) {
1127       // Describe where FP was saved, at a fixed offset from CFA.
1128       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1129       CFIIndex = MF.addFrameInst(
1130           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1131       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1132           .addCFIIndex(CFIIndex);
1133     }
1134 
1135     if (FI->usesPICBase()) {
1136       // Describe where FP was saved, at a fixed offset from CFA.
1137       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1138       CFIIndex = MF.addFrameInst(
1139           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1140       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1141           .addCFIIndex(CFIIndex);
1142     }
1143 
1144     if (HasBP) {
1145       // Describe where BP was saved, at a fixed offset from CFA.
1146       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1147       CFIIndex = MF.addFrameInst(
1148           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1149       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1150           .addCFIIndex(CFIIndex);
1151     }
1152 
1153     if (MustSaveLR) {
1154       // Describe where LR was saved, at a fixed offset from CFA.
1155       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1156       CFIIndex = MF.addFrameInst(
1157           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1158       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1159           .addCFIIndex(CFIIndex);
1160     }
1161   }
1162 
1163   // If there is a frame pointer, copy R1 into R31
1164   if (HasFP) {
1165     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1166       .addReg(SPReg)
1167       .addReg(SPReg);
1168 
1169     if (!HasBP && needsCFI) {
1170       // Change the definition of CFA from SP+offset to FP+offset, because SP
1171       // will change at every alloca.
1172       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1173       unsigned CFIIndex = MF.addFrameInst(
1174           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1175 
1176       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1177           .addCFIIndex(CFIIndex);
1178     }
1179   }
1180 
1181   if (needsCFI) {
1182     // Describe where callee saved registers were saved, at fixed offsets from
1183     // CFA.
1184     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1185     for (const CalleeSavedInfo &I : CSI) {
1186       Register Reg = I.getReg();
1187       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1188 
1189       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1190       // subregisters of CR2. We just need to emit a move of CR2.
1191       if (PPC::CRBITRCRegClass.contains(Reg))
1192         continue;
1193 
1194       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1195         continue;
1196 
1197       // For 64-bit SVR4 when we have spilled CRs, the spill location
1198       // is SP+8, not a frame-relative slot.
1199       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1200         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1201         // the whole CR word.  In the ELFv2 ABI, every CR that was
1202         // actually saved gets its own CFI record.
1203         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1204         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1205             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1206         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1207             .addCFIIndex(CFIIndex);
1208         continue;
1209       }
1210 
1211       if (I.isSpilledToReg()) {
1212         unsigned SpilledReg = I.getDstReg();
1213         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1214             nullptr, MRI->getDwarfRegNum(Reg, true),
1215             MRI->getDwarfRegNum(SpilledReg, true)));
1216         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1217           .addCFIIndex(CFIRegister);
1218       } else {
1219         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1220         // We have changed the object offset above but we do not want to change
1221         // the actual offsets in the CFI instruction so we have to undo the
1222         // offset change here.
1223         if (MovingStackUpdateDown)
1224           Offset -= NegFrameSize;
1225 
1226         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1227             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1228         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1229             .addCFIIndex(CFIIndex);
1230       }
1231     }
1232   }
1233 }
1234 
1235 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1236                                         MachineBasicBlock &PrologMBB) const {
1237   bool isPPC64 = Subtarget.isPPC64();
1238   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1239   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1240   MachineFrameInfo &MFI = MF.getFrameInfo();
1241   const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
1242   // AIX assembler does not support cfi directives.
1243   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1244   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1245     int Opc = MI.getOpcode();
1246     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1247   });
1248   if (StackAllocMIPos == PrologMBB.end())
1249     return;
1250   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1251   MachineBasicBlock *CurrentMBB = &PrologMBB;
1252   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1253   MachineInstr &MI = *StackAllocMIPos;
1254   int64_t NegFrameSize = MI.getOperand(2).getImm();
1255   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1256   int64_t NegProbeSize = -(int64_t)ProbeSize;
1257   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1258   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1259   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1260   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1261   Register ScratchReg = MI.getOperand(0).getReg();
1262   Register FPReg = MI.getOperand(1).getReg();
1263   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1264   bool HasBP = RegInfo->hasBasePointer(MF);
1265   Register BPReg = RegInfo->getBaseRegister(MF);
1266   Align MaxAlign = MFI.getMaxAlign();
1267   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1268   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1269   // Subroutines to generate .cfi_* directives.
1270   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1271                             MachineBasicBlock::iterator MBBI, Register Reg) {
1272     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1273     unsigned CFIIndex = MF.addFrameInst(
1274         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1275     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1276         .addCFIIndex(CFIIndex);
1277   };
1278   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1279                          MachineBasicBlock::iterator MBBI, Register Reg,
1280                          int Offset) {
1281     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1282     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1283         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1284     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1285         .addCFIIndex(CFIIndex);
1286   };
1287   // Subroutine to determine if we can use the Imm as part of d-form.
1288   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1289   // Subroutine to materialize the Imm into TempReg.
1290   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1291                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1292                             Register &TempReg) {
1293     assert(isInt<32>(Imm) && "Unhandled imm");
1294     if (isInt<16>(Imm))
1295       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1296           .addImm(Imm);
1297     else {
1298       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1299           .addImm(Imm >> 16);
1300       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1301           .addReg(TempReg)
1302           .addImm(Imm & 0xFFFF);
1303     }
1304   };
1305   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1306   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1307                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1308                               Register NegSizeReg, bool UseDForm,
1309                               Register StoreReg) {
1310     if (UseDForm)
1311       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1312           .addReg(StoreReg)
1313           .addImm(NegSize)
1314           .addReg(SPReg);
1315     else
1316       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1317           .addReg(StoreReg)
1318           .addReg(SPReg)
1319           .addReg(NegSizeReg);
1320   };
1321   // Used to probe stack when realignment is required.
1322   // Note that, according to ABI's requirement, *sp must always equals the
1323   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1324   // Following is pseudo code:
1325   // final_sp = (sp & align) + negframesize;
1326   // neg_gap = final_sp - sp;
1327   // while (neg_gap < negprobesize) {
1328   //   stdu fp, negprobesize(sp);
1329   //   neg_gap -= negprobesize;
1330   // }
1331   // stdux fp, sp, neg_gap
1332   //
1333   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1334   // before probe code, we don't need to save it, so we get one additional reg
1335   // that can be used to materialize the probeside if needed to use xform.
1336   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1337   // now.
1338   //
1339   // The allocations are:
1340   // if (HasBP && HasRedzone) {
1341   //   r0: materialize the probesize if needed so that we can use xform.
1342   //   r12: `neg_gap`
1343   // } else {
1344   //   r0: back-chain pointer
1345   //   r12: `neg_gap`.
1346   // }
1347   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1348                                  MachineBasicBlock::iterator MBBI,
1349                                  Register ScratchReg, Register TempReg) {
1350     assert(HasBP && "The function is supposed to have base pointer when its "
1351                     "stack is realigned.");
1352     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1353 
1354     // FIXME: We can eliminate this limitation if we get more infomation about
1355     // which part of redzone are already used. Used redzone can be treated
1356     // probed. But there might be `holes' in redzone probed, this could
1357     // complicate the implementation.
1358     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1359            "Probe size should be larger or equal to the size of red-zone so "
1360            "that red-zone is not clobbered by probing.");
1361 
1362     Register &FinalStackPtr = TempReg;
1363     // FIXME: We only support NegProbeSize materializable by DForm currently.
1364     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1365     // register.
1366     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1367     assert(isInt<16>(NegProbeSize) &&
1368            "NegProbeSize should be materializable by DForm");
1369     Register CRReg = PPC::CR0;
1370     // Layout of output assembly kinda like:
1371     // bb.0:
1372     //   ...
1373     //   sub $scratchreg, $finalsp, r1
1374     //   cmpdi $scratchreg, <negprobesize>
1375     //   bge bb.2
1376     // bb.1:
1377     //   stdu <backchain>, <negprobesize>(r1)
1378     //   sub $scratchreg, $scratchreg, negprobesize
1379     //   cmpdi $scratchreg, <negprobesize>
1380     //   blt bb.1
1381     // bb.2:
1382     //   stdux <backchain>, r1, $scratchreg
1383     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1384     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1385     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1386     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1387     MF.insert(MBBInsertPoint, ProbeExitMBB);
1388     // bb.2
1389     {
1390       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1391       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1392                        BackChainPointer);
1393       if (HasRedZone)
1394         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1395         // to TempReg to satisfy it.
1396         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1397             .addReg(BPReg)
1398             .addReg(BPReg);
1399       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1400       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1401     }
1402     // bb.0
1403     {
1404       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1405           .addReg(SPReg)
1406           .addReg(FinalStackPtr);
1407       if (!HasRedZone)
1408         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1409       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1410           .addReg(ScratchReg)
1411           .addImm(NegProbeSize);
1412       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1413           .addImm(PPC::PRED_GE)
1414           .addReg(CRReg)
1415           .addMBB(ProbeExitMBB);
1416       MBB.addSuccessor(ProbeLoopBodyMBB);
1417       MBB.addSuccessor(ProbeExitMBB);
1418     }
1419     // bb.1
1420     {
1421       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1422       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1423                        0, true /*UseDForm*/, BackChainPointer);
1424       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1425               ScratchReg)
1426           .addReg(ScratchReg)
1427           .addImm(-NegProbeSize);
1428       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1429               CRReg)
1430           .addReg(ScratchReg)
1431           .addImm(NegProbeSize);
1432       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1433           .addImm(PPC::PRED_LT)
1434           .addReg(CRReg)
1435           .addMBB(ProbeLoopBodyMBB);
1436       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1437       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1438     }
1439     // Update liveins.
1440     fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB});
1441     return ProbeExitMBB;
1442   };
1443   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1444   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1445   // the offset subtracted from SP is determined by SP's runtime value.
1446   if (HasBP && MaxAlign > 1) {
1447     // Calculate final stack pointer.
1448     if (isPPC64)
1449       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1450           .addReg(SPReg)
1451           .addImm(0)
1452           .addImm(64 - Log2(MaxAlign));
1453     else
1454       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1455           .addReg(SPReg)
1456           .addImm(0)
1457           .addImm(32 - Log2(MaxAlign))
1458           .addImm(31);
1459     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1460             FPReg)
1461         .addReg(ScratchReg)
1462         .addReg(SPReg);
1463     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1464     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1465             FPReg)
1466         .addReg(ScratchReg)
1467         .addReg(FPReg);
1468     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1469     if (needsCFI)
1470       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1471   } else {
1472     // Initialize current frame pointer.
1473     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1474     // Use FPReg to calculate CFA.
1475     if (needsCFI)
1476       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1477     // Probe residual part.
1478     if (NegResidualSize) {
1479       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1480       if (!ResidualUseDForm)
1481         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1482       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1483                        ResidualUseDForm, FPReg);
1484     }
1485     bool UseDForm = CanUseDForm(NegProbeSize);
1486     // If number of blocks is small, just probe them directly.
1487     if (NumBlocks < 3) {
1488       if (!UseDForm)
1489         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1490       for (int i = 0; i < NumBlocks; ++i)
1491         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1492                          FPReg);
1493       if (needsCFI) {
1494         // Restore using SPReg to calculate CFA.
1495         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1496       }
1497     } else {
1498       // Since CTR is a volatile register and current shrinkwrap implementation
1499       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1500       // CTR loop to probe.
1501       // Calculate trip count and stores it in CTRReg.
1502       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1503       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1504           .addReg(ScratchReg, RegState::Kill);
1505       if (!UseDForm)
1506         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1507       // Create MBBs of the loop.
1508       MachineFunction::iterator MBBInsertPoint =
1509           std::next(CurrentMBB->getIterator());
1510       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1511       MF.insert(MBBInsertPoint, LoopMBB);
1512       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1513       MF.insert(MBBInsertPoint, ExitMBB);
1514       // Synthesize the loop body.
1515       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1516                        UseDForm, FPReg);
1517       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1518           .addMBB(LoopMBB);
1519       LoopMBB->addSuccessor(ExitMBB);
1520       LoopMBB->addSuccessor(LoopMBB);
1521       // Synthesize the exit MBB.
1522       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1523                       std::next(MachineBasicBlock::iterator(MI)),
1524                       CurrentMBB->end());
1525       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1526       CurrentMBB->addSuccessor(LoopMBB);
1527       if (needsCFI) {
1528         // Restore using SPReg to calculate CFA.
1529         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1530       }
1531       // Update liveins.
1532       fullyRecomputeLiveIns({ExitMBB, LoopMBB});
1533     }
1534   }
1535   ++NumPrologProbed;
1536   MI.eraseFromParent();
1537 }
1538 
1539 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1540                                     MachineBasicBlock &MBB) const {
1541   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1542   DebugLoc dl;
1543 
1544   if (MBBI != MBB.end())
1545     dl = MBBI->getDebugLoc();
1546 
1547   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1548   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1549 
1550   // Get alignment info so we know how to restore the SP.
1551   const MachineFrameInfo &MFI = MF.getFrameInfo();
1552 
1553   // Get the number of bytes allocated from the FrameInfo.
1554   int64_t FrameSize = MFI.getStackSize();
1555 
1556   // Get processor type.
1557   bool isPPC64 = Subtarget.isPPC64();
1558 
1559   // Check if the link register (LR) has been saved.
1560   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1561   bool MustSaveLR = FI->mustSaveLR();
1562   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1563   bool MustSaveCR = !MustSaveCRs.empty();
1564   // Do we have a frame pointer and/or base pointer for this function?
1565   bool HasFP = hasFP(MF);
1566   bool HasBP = RegInfo->hasBasePointer(MF);
1567   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1568   bool HasROPProtect = Subtarget.hasROPProtect();
1569   bool HasPrivileged = Subtarget.hasPrivileged();
1570 
1571   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1572   Register BPReg = RegInfo->getBaseRegister(MF);
1573   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1574   Register ScratchReg;
1575   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1576   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1577                                                  : PPC::MTLR );
1578   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1579                                                  : PPC::LWZ );
1580   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1581                                                            : PPC::LIS );
1582   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1583                                               : PPC::OR );
1584   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1585                                                   : PPC::ORI );
1586   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1587                                                    : PPC::ADDI );
1588   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1589                                                 : PPC::ADD4 );
1590   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1591                                                      : PPC::LWZ);
1592   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1593                                                      : PPC::MTOCRF);
1594   const MCInstrDesc &HashChk =
1595       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1596                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1597   int64_t LROffset = getReturnSaveOffset();
1598 
1599   int64_t FPOffset = 0;
1600 
1601   // Using the same bool variable as below to suppress compiler warnings.
1602   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1603                                               &TempReg);
1604   assert(SingleScratchReg &&
1605          "Could not find an available scratch register");
1606 
1607   SingleScratchReg = ScratchReg == TempReg;
1608 
1609   if (HasFP) {
1610     int FPIndex = FI->getFramePointerSaveIndex();
1611     assert(FPIndex && "No Frame Pointer Save Slot!");
1612     FPOffset = MFI.getObjectOffset(FPIndex);
1613   }
1614 
1615   int64_t BPOffset = 0;
1616   if (HasBP) {
1617       int BPIndex = FI->getBasePointerSaveIndex();
1618       assert(BPIndex && "No Base Pointer Save Slot!");
1619       BPOffset = MFI.getObjectOffset(BPIndex);
1620   }
1621 
1622   int64_t PBPOffset = 0;
1623   if (FI->usesPICBase()) {
1624     int PBPIndex = FI->getPICBasePointerSaveIndex();
1625     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1626     PBPOffset = MFI.getObjectOffset(PBPIndex);
1627   }
1628 
1629   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1630 
1631   if (IsReturnBlock) {
1632     unsigned RetOpcode = MBBI->getOpcode();
1633     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1634                       RetOpcode == PPC::TCRETURNdi ||
1635                       RetOpcode == PPC::TCRETURNai ||
1636                       RetOpcode == PPC::TCRETURNri8 ||
1637                       RetOpcode == PPC::TCRETURNdi8 ||
1638                       RetOpcode == PPC::TCRETURNai8;
1639 
1640     if (UsesTCRet) {
1641       int MaxTCRetDelta = FI->getTailCallSPDelta();
1642       MachineOperand &StackAdjust = MBBI->getOperand(1);
1643       assert(StackAdjust.isImm() && "Expecting immediate value.");
1644       // Adjust stack pointer.
1645       int StackAdj = StackAdjust.getImm();
1646       int Delta = StackAdj - MaxTCRetDelta;
1647       assert((Delta >= 0) && "Delta must be positive");
1648       if (MaxTCRetDelta>0)
1649         FrameSize += (StackAdj +Delta);
1650       else
1651         FrameSize += StackAdj;
1652     }
1653   }
1654 
1655   // Frames of 32KB & larger require special handling because they cannot be
1656   // indexed into with a simple LD/LWZ immediate offset operand.
1657   bool isLargeFrame = !isInt<16>(FrameSize);
1658 
1659   // On targets without red zone, the SP needs to be restored last, so that
1660   // all live contents of the stack frame are upwards of the SP. This means
1661   // that we cannot restore SP just now, since there may be more registers
1662   // to restore from the stack frame (e.g. R31). If the frame size is not
1663   // a simple immediate value, we will need a spare register to hold the
1664   // restored SP. If the frame size is known and small, we can simply adjust
1665   // the offsets of the registers to be restored, and still use SP to restore
1666   // them. In such case, the final update of SP will be to add the frame
1667   // size to it.
1668   // To simplify the code, set RBReg to the base register used to restore
1669   // values from the stack, and set SPAdd to the value that needs to be added
1670   // to the SP at the end. The default values are as if red zone was present.
1671   unsigned RBReg = SPReg;
1672   uint64_t SPAdd = 0;
1673 
1674   // Check if we can move the stack update instruction up the epilogue
1675   // past the callee saves. This will allow the move to LR instruction
1676   // to be executed before the restores of the callee saves which means
1677   // that the callee saves can hide the latency from the MTLR instrcution.
1678   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1679   if (stackUpdateCanBeMoved(MF)) {
1680     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1681     for (CalleeSavedInfo CSI : Info) {
1682       // If the callee saved register is spilled to another register abort the
1683       // stack update movement.
1684       if (CSI.isSpilledToReg()) {
1685         StackUpdateLoc = MBBI;
1686         break;
1687       }
1688       int FrIdx = CSI.getFrameIdx();
1689       // If the frame index is not negative the callee saved info belongs to a
1690       // stack object that is not a fixed stack object. We ignore non-fixed
1691       // stack objects because we won't move the update of the stack pointer
1692       // past them.
1693       if (FrIdx >= 0)
1694         continue;
1695 
1696       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1697         StackUpdateLoc--;
1698       else {
1699         // Abort the operation as we can't update all CSR restores.
1700         StackUpdateLoc = MBBI;
1701         break;
1702       }
1703     }
1704   }
1705 
1706   if (FrameSize) {
1707     // In the prologue, the loaded (or persistent) stack pointer value is
1708     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1709     // zone add this offset back now.
1710 
1711     // If the function has a base pointer, the stack pointer has been copied
1712     // to it so we can restore it by copying in the other direction.
1713     if (HasRedZone && HasBP) {
1714       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1715         addReg(BPReg).
1716         addReg(BPReg);
1717     }
1718     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1719     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1720     // call which invalidates the stack pointer value in SP(0). So we use the
1721     // value of R31 in this case. Similar situation exists with setjmp.
1722     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1723       assert(HasFP && "Expecting a valid frame pointer.");
1724       if (!HasRedZone)
1725         RBReg = FPReg;
1726       if (!isLargeFrame) {
1727         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1728           .addReg(FPReg).addImm(FrameSize);
1729       } else {
1730         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1731         BuildMI(MBB, MBBI, dl, AddInst)
1732           .addReg(RBReg)
1733           .addReg(FPReg)
1734           .addReg(ScratchReg);
1735       }
1736     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1737       if (HasRedZone) {
1738         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1739           .addReg(SPReg)
1740           .addImm(FrameSize);
1741       } else {
1742         // Make sure that adding FrameSize will not overflow the max offset
1743         // size.
1744         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1745                "Local offsets should be negative");
1746         SPAdd = FrameSize;
1747         FPOffset += FrameSize;
1748         BPOffset += FrameSize;
1749         PBPOffset += FrameSize;
1750       }
1751     } else {
1752       // We don't want to use ScratchReg as a base register, because it
1753       // could happen to be R0. Use FP instead, but make sure to preserve it.
1754       if (!HasRedZone) {
1755         // If FP is not saved, copy it to ScratchReg.
1756         if (!HasFP)
1757           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1758             .addReg(FPReg)
1759             .addReg(FPReg);
1760         RBReg = FPReg;
1761       }
1762       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1763         .addImm(0)
1764         .addReg(SPReg);
1765     }
1766   }
1767   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1768   // If there is no red zone, ScratchReg may be needed for holding a useful
1769   // value (although not the base register). Make sure it is not overwritten
1770   // too early.
1771 
1772   // If we need to restore both the LR and the CR and we only have one
1773   // available scratch register, we must do them one at a time.
1774   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1775     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1776     // is live here.
1777     assert(HasRedZone && "Expecting red zone");
1778     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1779       .addImm(CRSaveOffset)
1780       .addReg(SPReg);
1781     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1782       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1783         .addReg(TempReg, getKillRegState(i == e-1));
1784   }
1785 
1786   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1787   // LR is stored in the caller's stack frame. ScratchReg will be needed
1788   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1789   // a base register anyway, because it may happen to be R0.
1790   bool LoadedLR = false;
1791   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1792     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1793       .addImm(LROffset+SPAdd)
1794       .addReg(RBReg);
1795     LoadedLR = true;
1796   }
1797 
1798   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1799     assert(RBReg == SPReg && "Should be using SP as a base register");
1800     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1801       .addImm(CRSaveOffset)
1802       .addReg(RBReg);
1803   }
1804 
1805   if (HasFP) {
1806     // If there is red zone, restore FP directly, since SP has already been
1807     // restored. Otherwise, restore the value of FP into ScratchReg.
1808     if (HasRedZone || RBReg == SPReg)
1809       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1810         .addImm(FPOffset)
1811         .addReg(SPReg);
1812     else
1813       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1814         .addImm(FPOffset)
1815         .addReg(RBReg);
1816   }
1817 
1818   if (FI->usesPICBase())
1819     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1820       .addImm(PBPOffset)
1821       .addReg(RBReg);
1822 
1823   if (HasBP)
1824     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1825       .addImm(BPOffset)
1826       .addReg(RBReg);
1827 
1828   // There is nothing more to be loaded from the stack, so now we can
1829   // restore SP: SP = RBReg + SPAdd.
1830   if (RBReg != SPReg || SPAdd != 0) {
1831     assert(!HasRedZone && "This should not happen with red zone");
1832     // If SPAdd is 0, generate a copy.
1833     if (SPAdd == 0)
1834       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1835         .addReg(RBReg)
1836         .addReg(RBReg);
1837     else
1838       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1839         .addReg(RBReg)
1840         .addImm(SPAdd);
1841 
1842     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1843     if (RBReg == FPReg)
1844       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1845         .addReg(ScratchReg)
1846         .addReg(ScratchReg);
1847 
1848     // Now load the LR from the caller's stack frame.
1849     if (MustSaveLR && !LoadedLR)
1850       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1851         .addImm(LROffset)
1852         .addReg(SPReg);
1853   }
1854 
1855   if (MustSaveCR &&
1856       !(SingleScratchReg && MustSaveLR))
1857     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1858       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1859         .addReg(TempReg, getKillRegState(i == e-1));
1860 
1861   if (MustSaveLR) {
1862     // If ROP protection is required, an extra instruction is added to compute a
1863     // hash and then compare it to the hash stored in the prologue.
1864     if (HasROPProtect) {
1865       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1866       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1867       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1868              "ROP hash check location offset out of range.");
1869       assert(((ImmOffset & 0x7) == 0) &&
1870              "ROP hash check location offset must be 8 byte aligned.");
1871       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1872           .addReg(ScratchReg)
1873           .addImm(ImmOffset)
1874           .addReg(SPReg);
1875     }
1876     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1877   }
1878 
1879   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1880   // call optimization
1881   if (IsReturnBlock) {
1882     unsigned RetOpcode = MBBI->getOpcode();
1883     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1884         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1885         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1886       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1887       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1888 
1889       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1890         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1891           .addReg(SPReg).addImm(CallerAllocatedAmt);
1892       } else {
1893         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1894           .addImm(CallerAllocatedAmt >> 16);
1895         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1896           .addReg(ScratchReg, RegState::Kill)
1897           .addImm(CallerAllocatedAmt & 0xFFFF);
1898         BuildMI(MBB, MBBI, dl, AddInst)
1899           .addReg(SPReg)
1900           .addReg(FPReg)
1901           .addReg(ScratchReg);
1902       }
1903     } else {
1904       createTailCallBranchInstr(MBB);
1905     }
1906   }
1907 }
1908 
1909 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1910   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1911 
1912   // If we got this far a first terminator should exist.
1913   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1914 
1915   DebugLoc dl = MBBI->getDebugLoc();
1916   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1917 
1918   // Create branch instruction for pseudo tail call return instruction.
1919   // The TCRETURNdi variants are direct calls. Valid targets for those are
1920   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1921   // since we can tail call external functions with PC-Rel (i.e. we don't need
1922   // to worry about different TOC pointers). Some of the external functions will
1923   // be MO_GlobalAddress while others like memcpy for example, are going to
1924   // be MO_ExternalSymbol.
1925   unsigned RetOpcode = MBBI->getOpcode();
1926   if (RetOpcode == PPC::TCRETURNdi) {
1927     MBBI = MBB.getLastNonDebugInstr();
1928     MachineOperand &JumpTarget = MBBI->getOperand(0);
1929     if (JumpTarget.isGlobal())
1930       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1931         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1932     else if (JumpTarget.isSymbol())
1933       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1934         addExternalSymbol(JumpTarget.getSymbolName());
1935     else
1936       llvm_unreachable("Expecting Global or External Symbol");
1937   } else if (RetOpcode == PPC::TCRETURNri) {
1938     MBBI = MBB.getLastNonDebugInstr();
1939     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1940     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1941   } else if (RetOpcode == PPC::TCRETURNai) {
1942     MBBI = MBB.getLastNonDebugInstr();
1943     MachineOperand &JumpTarget = MBBI->getOperand(0);
1944     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1945   } else if (RetOpcode == PPC::TCRETURNdi8) {
1946     MBBI = MBB.getLastNonDebugInstr();
1947     MachineOperand &JumpTarget = MBBI->getOperand(0);
1948     if (JumpTarget.isGlobal())
1949       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1950         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1951     else if (JumpTarget.isSymbol())
1952       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1953         addExternalSymbol(JumpTarget.getSymbolName());
1954     else
1955       llvm_unreachable("Expecting Global or External Symbol");
1956   } else if (RetOpcode == PPC::TCRETURNri8) {
1957     MBBI = MBB.getLastNonDebugInstr();
1958     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1959     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1960   } else if (RetOpcode == PPC::TCRETURNai8) {
1961     MBBI = MBB.getLastNonDebugInstr();
1962     MachineOperand &JumpTarget = MBBI->getOperand(0);
1963     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1964   }
1965 }
1966 
1967 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1968                                             BitVector &SavedRegs,
1969                                             RegScavenger *RS) const {
1970   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1971   if (Subtarget.isAIXABI())
1972     updateCalleeSaves(MF, SavedRegs);
1973 
1974   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1975 
1976   // Do not explicitly save the callee saved VSRp registers.
1977   // The individual VSR subregisters will be saved instead.
1978   SavedRegs.reset(PPC::VSRp26);
1979   SavedRegs.reset(PPC::VSRp27);
1980   SavedRegs.reset(PPC::VSRp28);
1981   SavedRegs.reset(PPC::VSRp29);
1982   SavedRegs.reset(PPC::VSRp30);
1983   SavedRegs.reset(PPC::VSRp31);
1984 
1985   //  Save and clear the LR state.
1986   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1987   unsigned LR = RegInfo->getRARegister();
1988   FI->setMustSaveLR(MustSaveLR(MF, LR));
1989   SavedRegs.reset(LR);
1990 
1991   //  Save R31 if necessary
1992   int FPSI = FI->getFramePointerSaveIndex();
1993   const bool isPPC64 = Subtarget.isPPC64();
1994   MachineFrameInfo &MFI = MF.getFrameInfo();
1995 
1996   // If the frame pointer save index hasn't been defined yet.
1997   if (!FPSI && needsFP(MF)) {
1998     // Find out what the fix offset of the frame pointer save area.
1999     int FPOffset = getFramePointerSaveOffset();
2000     // Allocate the frame index for frame pointer save area.
2001     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2002     // Save the result.
2003     FI->setFramePointerSaveIndex(FPSI);
2004   }
2005 
2006   int BPSI = FI->getBasePointerSaveIndex();
2007   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2008     int BPOffset = getBasePointerSaveOffset();
2009     // Allocate the frame index for the base pointer save area.
2010     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2011     // Save the result.
2012     FI->setBasePointerSaveIndex(BPSI);
2013   }
2014 
2015   // Reserve stack space for the PIC Base register (R30).
2016   // Only used in SVR4 32-bit.
2017   if (FI->usesPICBase()) {
2018     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2019     FI->setPICBasePointerSaveIndex(PBPSI);
2020   }
2021 
2022   // Make sure we don't explicitly spill r31, because, for example, we have
2023   // some inline asm which explicitly clobbers it, when we otherwise have a
2024   // frame pointer and are using r31's spill slot for the prologue/epilogue
2025   // code. Same goes for the base pointer and the PIC base register.
2026   if (needsFP(MF))
2027     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2028   if (RegInfo->hasBasePointer(MF))
2029     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2030   if (FI->usesPICBase())
2031     SavedRegs.reset(PPC::R30);
2032 
2033   // Reserve stack space to move the linkage area to in case of a tail call.
2034   int TCSPDelta = 0;
2035   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2036       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2037     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2038   }
2039 
2040   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2041   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2042   // object at the offset of the CR-save slot in the linkage area. The actual
2043   // save and restore of the condition register will be created as part of the
2044   // prologue and epilogue insertion, but the FixedStack object is needed to
2045   // keep the CalleSavedInfo valid.
2046   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2047        SavedRegs.test(PPC::CR4))) {
2048     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2049     const int64_t SpillOffset =
2050         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2051     int FrameIdx =
2052         MFI.CreateFixedObject(SpillSize, SpillOffset,
2053                               /* IsImmutable */ true, /* IsAliased */ false);
2054     FI->setCRSpillFrameIndex(FrameIdx);
2055   }
2056 }
2057 
2058 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2059                                                        RegScavenger *RS) const {
2060   // Get callee saved register information.
2061   MachineFrameInfo &MFI = MF.getFrameInfo();
2062   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2063 
2064   // If the function is shrink-wrapped, and if the function has a tail call, the
2065   // tail call might not be in the new RestoreBlock, so real branch instruction
2066   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2067   // RestoreBlock. So we handle this case here.
2068   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2069     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2070     for (MachineBasicBlock &MBB : MF) {
2071       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2072         createTailCallBranchInstr(MBB);
2073     }
2074   }
2075 
2076   // Early exit if no callee saved registers are modified!
2077   if (CSI.empty() && !needsFP(MF)) {
2078     addScavengingSpillSlot(MF, RS);
2079     return;
2080   }
2081 
2082   unsigned MinGPR = PPC::R31;
2083   unsigned MinG8R = PPC::X31;
2084   unsigned MinFPR = PPC::F31;
2085   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2086 
2087   bool HasGPSaveArea = false;
2088   bool HasG8SaveArea = false;
2089   bool HasFPSaveArea = false;
2090   bool HasVRSaveArea = false;
2091 
2092   SmallVector<CalleeSavedInfo, 18> GPRegs;
2093   SmallVector<CalleeSavedInfo, 18> G8Regs;
2094   SmallVector<CalleeSavedInfo, 18> FPRegs;
2095   SmallVector<CalleeSavedInfo, 18> VRegs;
2096 
2097   for (const CalleeSavedInfo &I : CSI) {
2098     Register Reg = I.getReg();
2099     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2100             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2101            "Not expecting to try to spill R2 in a function that must save TOC");
2102     if (PPC::GPRCRegClass.contains(Reg)) {
2103       HasGPSaveArea = true;
2104 
2105       GPRegs.push_back(I);
2106 
2107       if (Reg < MinGPR) {
2108         MinGPR = Reg;
2109       }
2110     } else if (PPC::G8RCRegClass.contains(Reg)) {
2111       HasG8SaveArea = true;
2112 
2113       G8Regs.push_back(I);
2114 
2115       if (Reg < MinG8R) {
2116         MinG8R = Reg;
2117       }
2118     } else if (PPC::F8RCRegClass.contains(Reg)) {
2119       HasFPSaveArea = true;
2120 
2121       FPRegs.push_back(I);
2122 
2123       if (Reg < MinFPR) {
2124         MinFPR = Reg;
2125       }
2126     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2127                PPC::CRRCRegClass.contains(Reg)) {
2128       ; // do nothing, as we already know whether CRs are spilled
2129     } else if (PPC::VRRCRegClass.contains(Reg) ||
2130                PPC::SPERCRegClass.contains(Reg)) {
2131       // Altivec and SPE are mutually exclusive, but have the same stack
2132       // alignment requirements, so overload the save area for both cases.
2133       HasVRSaveArea = true;
2134 
2135       VRegs.push_back(I);
2136 
2137       if (Reg < MinVR) {
2138         MinVR = Reg;
2139       }
2140     } else {
2141       llvm_unreachable("Unknown RegisterClass!");
2142     }
2143   }
2144 
2145   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2146   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2147 
2148   int64_t LowerBound = 0;
2149 
2150   // Take into account stack space reserved for tail calls.
2151   int TCSPDelta = 0;
2152   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2153       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2154     LowerBound = TCSPDelta;
2155   }
2156 
2157   // The Floating-point register save area is right below the back chain word
2158   // of the previous stack frame.
2159   if (HasFPSaveArea) {
2160     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2161       int FI = FPRegs[i].getFrameIdx();
2162 
2163       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2164     }
2165 
2166     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2167   }
2168 
2169   // Check whether the frame pointer register is allocated. If so, make sure it
2170   // is spilled to the correct offset.
2171   if (needsFP(MF)) {
2172     int FI = PFI->getFramePointerSaveIndex();
2173     assert(FI && "No Frame Pointer Save Slot!");
2174     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2175     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2176     HasGPSaveArea = true;
2177   }
2178 
2179   if (PFI->usesPICBase()) {
2180     int FI = PFI->getPICBasePointerSaveIndex();
2181     assert(FI && "No PIC Base Pointer Save Slot!");
2182     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2183 
2184     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2185     HasGPSaveArea = true;
2186   }
2187 
2188   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2189   if (RegInfo->hasBasePointer(MF)) {
2190     int FI = PFI->getBasePointerSaveIndex();
2191     assert(FI && "No Base Pointer Save Slot!");
2192     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2193 
2194     Register BP = RegInfo->getBaseRegister(MF);
2195     if (PPC::G8RCRegClass.contains(BP)) {
2196       MinG8R = std::min<unsigned>(MinG8R, BP);
2197       HasG8SaveArea = true;
2198     } else if (PPC::GPRCRegClass.contains(BP)) {
2199       MinGPR = std::min<unsigned>(MinGPR, BP);
2200       HasGPSaveArea = true;
2201     }
2202   }
2203 
2204   // General register save area starts right below the Floating-point
2205   // register save area.
2206   if (HasGPSaveArea || HasG8SaveArea) {
2207     // Move general register save area spill slots down, taking into account
2208     // the size of the Floating-point register save area.
2209     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2210       if (!GPRegs[i].isSpilledToReg()) {
2211         int FI = GPRegs[i].getFrameIdx();
2212         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2213       }
2214     }
2215 
2216     // Move general register save area spill slots down, taking into account
2217     // the size of the Floating-point register save area.
2218     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2219       if (!G8Regs[i].isSpilledToReg()) {
2220         int FI = G8Regs[i].getFrameIdx();
2221         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2222       }
2223     }
2224 
2225     unsigned MinReg =
2226       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2227                          TRI->getEncodingValue(MinG8R));
2228 
2229     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2230     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2231   }
2232 
2233   // For 32-bit only, the CR save area is below the general register
2234   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2235   // to the stack pointer and hence does not need an adjustment here.
2236   // Only CR2 (the first nonvolatile spilled) has an associated frame
2237   // index so that we have a single uniform save area.
2238   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2239     // Adjust the frame index of the CR spill slot.
2240     for (const auto &CSInfo : CSI) {
2241       if (CSInfo.getReg() == PPC::CR2) {
2242         int FI = CSInfo.getFrameIdx();
2243         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2244         break;
2245       }
2246     }
2247 
2248     LowerBound -= 4; // The CR save area is always 4 bytes long.
2249   }
2250 
2251   // Both Altivec and SPE have the same alignment and padding requirements
2252   // within the stack frame.
2253   if (HasVRSaveArea) {
2254     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2255     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2256     // we are using negative number here (the stack grows downward). We should
2257     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2258     // is the alignment size ( n = 16 here) and y is the size after aligning.
2259     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2260     LowerBound &= ~(15);
2261 
2262     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2263       int FI = VRegs[i].getFrameIdx();
2264 
2265       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2266     }
2267   }
2268 
2269   addScavengingSpillSlot(MF, RS);
2270 }
2271 
2272 void
2273 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2274                                          RegScavenger *RS) const {
2275   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2276   // a large stack, which will require scavenging a register to materialize a
2277   // large offset.
2278 
2279   // We need to have a scavenger spill slot for spills if the frame size is
2280   // large. In case there is no free register for large-offset addressing,
2281   // this slot is used for the necessary emergency spill. Also, we need the
2282   // slot for dynamic stack allocations.
2283 
2284   // The scavenger might be invoked if the frame offset does not fit into
2285   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2286   // We don't know the complete frame size here because we've not yet computed
2287   // callee-saved register spills or the needed alignment padding.
2288   unsigned StackSize = determineFrameLayout(MF, true);
2289   MachineFrameInfo &MFI = MF.getFrameInfo();
2290   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2291 
2292   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2293       (hasSpills(MF) && NeedSpills)) {
2294     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2295     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2296     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2297     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2298     unsigned Size = TRI.getSpillSize(RC);
2299     Align Alignment = TRI.getSpillAlign(RC);
2300     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2301 
2302     // Might we have over-aligned allocas?
2303     bool HasAlVars =
2304         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2305 
2306     // These kinds of spills might need two registers.
2307     if (spillsCR(MF) || HasAlVars)
2308       RS->addScavengingFrameIndex(
2309           MFI.CreateStackObject(Size, Alignment, false));
2310   }
2311 }
2312 
2313 // This function checks if a callee saved gpr can be spilled to a volatile
2314 // vector register. This occurs for leaf functions when the option
2315 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2316 // which were not spilled to vectors, return false so the target independent
2317 // code can handle them by assigning a FrameIdx to a stack slot.
2318 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2319     MachineFunction &MF, const TargetRegisterInfo *TRI,
2320     std::vector<CalleeSavedInfo> &CSI) const {
2321 
2322   if (CSI.empty())
2323     return true; // Early exit if no callee saved registers are modified!
2324 
2325   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2326   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2327   const MachineRegisterInfo &MRI = MF.getRegInfo();
2328 
2329   if (Subtarget.hasSPE()) {
2330     // In case of SPE we only have SuperRegs and CRs
2331     // in our CalleSaveInfo vector.
2332 
2333     for (auto &CalleeSaveReg : CSI) {
2334       MCPhysReg Reg = CalleeSaveReg.getReg();
2335       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2336       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2337 
2338       if ( // Check only for SuperRegs.
2339           Lower &&
2340           // Replace Reg if only lower-32 bits modified
2341           !MRI.isPhysRegModified(Higher))
2342         CalleeSaveReg = CalleeSavedInfo(Lower);
2343     }
2344   }
2345 
2346   // Early exit if cannot spill gprs to volatile vector registers.
2347   MachineFrameInfo &MFI = MF.getFrameInfo();
2348   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2349     return false;
2350 
2351   // Build a BitVector of VSRs that can be used for spilling GPRs.
2352   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2353   BitVector BVCalleeSaved(TRI->getNumRegs());
2354   for (unsigned i = 0; CSRegs[i]; ++i)
2355     BVCalleeSaved.set(CSRegs[i]);
2356 
2357   for (unsigned Reg : BVAllocatable.set_bits()) {
2358     // Set to 0 if the register is not a volatile VSX register, or if it is
2359     // used in the function.
2360     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2361         MRI.isPhysRegUsed(Reg))
2362       BVAllocatable.reset(Reg);
2363   }
2364 
2365   bool AllSpilledToReg = true;
2366   unsigned LastVSRUsedForSpill = 0;
2367   for (auto &CS : CSI) {
2368     if (BVAllocatable.none())
2369       return false;
2370 
2371     Register Reg = CS.getReg();
2372 
2373     if (!PPC::G8RCRegClass.contains(Reg)) {
2374       AllSpilledToReg = false;
2375       continue;
2376     }
2377 
2378     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2379     // into one VSR using the mtvsrdd instruction.
2380     if (LastVSRUsedForSpill != 0) {
2381       CS.setDstReg(LastVSRUsedForSpill);
2382       BVAllocatable.reset(LastVSRUsedForSpill);
2383       LastVSRUsedForSpill = 0;
2384       continue;
2385     }
2386 
2387     unsigned VolatileVFReg = BVAllocatable.find_first();
2388     if (VolatileVFReg < BVAllocatable.size()) {
2389       CS.setDstReg(VolatileVFReg);
2390       LastVSRUsedForSpill = VolatileVFReg;
2391     } else {
2392       AllSpilledToReg = false;
2393     }
2394   }
2395   return AllSpilledToReg;
2396 }
2397 
2398 bool PPCFrameLowering::spillCalleeSavedRegisters(
2399     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2400     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2401 
2402   MachineFunction *MF = MBB.getParent();
2403   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2404   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2405   bool MustSaveTOC = FI->mustSaveTOC();
2406   DebugLoc DL;
2407   bool CRSpilled = false;
2408   MachineInstrBuilder CRMIB;
2409   BitVector Spilled(TRI->getNumRegs());
2410 
2411   VSRContainingGPRs.clear();
2412 
2413   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2414   // or two GPRs, so we need table to record information for later save/restore.
2415   for (const CalleeSavedInfo &Info : CSI) {
2416     if (Info.isSpilledToReg()) {
2417       auto &SpilledVSR =
2418           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2419       assert(SpilledVSR.second == 0 &&
2420              "Can't spill more than two GPRs into VSR!");
2421       if (SpilledVSR.first == 0)
2422         SpilledVSR.first = Info.getReg();
2423       else
2424         SpilledVSR.second = Info.getReg();
2425     }
2426   }
2427 
2428   for (const CalleeSavedInfo &I : CSI) {
2429     Register Reg = I.getReg();
2430 
2431     // CR2 through CR4 are the nonvolatile CR fields.
2432     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2433 
2434     // Add the callee-saved register as live-in; it's killed at the spill.
2435     // Do not do this for callee-saved registers that are live-in to the
2436     // function because they will already be marked live-in and this will be
2437     // adding it for a second time. It is an error to add the same register
2438     // to the set more than once.
2439     const MachineRegisterInfo &MRI = MF->getRegInfo();
2440     bool IsLiveIn = MRI.isLiveIn(Reg);
2441     if (!IsLiveIn)
2442        MBB.addLiveIn(Reg);
2443 
2444     if (CRSpilled && IsCRField) {
2445       CRMIB.addReg(Reg, RegState::ImplicitKill);
2446       continue;
2447     }
2448 
2449     // The actual spill will happen in the prologue.
2450     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2451       continue;
2452 
2453     // Insert the spill to the stack frame.
2454     if (IsCRField) {
2455       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2456       if (!Subtarget.is32BitELFABI()) {
2457         // The actual spill will happen at the start of the prologue.
2458         FuncInfo->addMustSaveCR(Reg);
2459       } else {
2460         CRSpilled = true;
2461         FuncInfo->setSpillsCR();
2462 
2463         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2464         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2465         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2466                   .addReg(Reg, RegState::ImplicitKill);
2467 
2468         MBB.insert(MI, CRMIB);
2469         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2470                                          .addReg(PPC::R12,
2471                                                  getKillRegState(true)),
2472                                          I.getFrameIdx()));
2473       }
2474     } else {
2475       if (I.isSpilledToReg()) {
2476         unsigned Dst = I.getDstReg();
2477 
2478         if (Spilled[Dst])
2479           continue;
2480 
2481         if (VSRContainingGPRs[Dst].second != 0) {
2482           assert(Subtarget.hasP9Vector() &&
2483                  "mtvsrdd is unavailable on pre-P9 targets.");
2484 
2485           NumPESpillVSR += 2;
2486           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2487               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2488               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2489         } else if (VSRContainingGPRs[Dst].second == 0) {
2490           assert(Subtarget.hasP8Vector() &&
2491                  "Can't move GPR to VSR on pre-P8 targets.");
2492 
2493           ++NumPESpillVSR;
2494           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2495                   TRI->getSubReg(Dst, PPC::sub_64))
2496               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2497         } else {
2498           llvm_unreachable("More than two GPRs spilled to a VSR!");
2499         }
2500         Spilled.set(Dst);
2501       } else {
2502         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2503         // Use !IsLiveIn for the kill flag.
2504         // We do not want to kill registers that are live in this function
2505         // before their use because they will become undefined registers.
2506         // Functions without NoUnwind need to preserve the order of elements in
2507         // saved vector registers.
2508         if (Subtarget.needsSwapsForVSXMemOps() &&
2509             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2510           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2511                                        I.getFrameIdx(), RC, TRI);
2512         else
2513           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2514                                   TRI, Register());
2515       }
2516     }
2517   }
2518   return true;
2519 }
2520 
2521 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2522                        bool CR4Spilled, MachineBasicBlock &MBB,
2523                        MachineBasicBlock::iterator MI,
2524                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2525 
2526   MachineFunction *MF = MBB.getParent();
2527   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2528   DebugLoc DL;
2529   unsigned MoveReg = PPC::R12;
2530 
2531   // 32-bit:  FP-relative
2532   MBB.insert(MI,
2533              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2534                                CSI[CSIIndex].getFrameIdx()));
2535 
2536   unsigned RestoreOp = PPC::MTOCRF;
2537   if (CR2Spilled)
2538     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2539                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2540 
2541   if (CR3Spilled)
2542     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2543                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2544 
2545   if (CR4Spilled)
2546     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2547                .addReg(MoveReg, getKillRegState(true)));
2548 }
2549 
2550 MachineBasicBlock::iterator PPCFrameLowering::
2551 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2552                               MachineBasicBlock::iterator I) const {
2553   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2554   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2555       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2556     // Add (actually subtract) back the amount the callee popped on return.
2557     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2558       bool is64Bit = Subtarget.isPPC64();
2559       CalleeAmt *= -1;
2560       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2561       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2562       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2563       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2564       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2565       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2566       const DebugLoc &dl = I->getDebugLoc();
2567 
2568       if (isInt<16>(CalleeAmt)) {
2569         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2570           .addReg(StackReg, RegState::Kill)
2571           .addImm(CalleeAmt);
2572       } else {
2573         MachineBasicBlock::iterator MBBI = I;
2574         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2575           .addImm(CalleeAmt >> 16);
2576         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2577           .addReg(TmpReg, RegState::Kill)
2578           .addImm(CalleeAmt & 0xFFFF);
2579         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2580           .addReg(StackReg, RegState::Kill)
2581           .addReg(TmpReg);
2582       }
2583     }
2584   }
2585   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2586   return MBB.erase(I);
2587 }
2588 
2589 static bool isCalleeSavedCR(unsigned Reg) {
2590   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2591 }
2592 
2593 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2594     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2595     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2596   MachineFunction *MF = MBB.getParent();
2597   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2598   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2599   bool MustSaveTOC = FI->mustSaveTOC();
2600   bool CR2Spilled = false;
2601   bool CR3Spilled = false;
2602   bool CR4Spilled = false;
2603   unsigned CSIIndex = 0;
2604   BitVector Restored(TRI->getNumRegs());
2605 
2606   // Initialize insertion-point logic; we will be restoring in reverse
2607   // order of spill.
2608   MachineBasicBlock::iterator I = MI, BeforeI = I;
2609   bool AtStart = I == MBB.begin();
2610 
2611   if (!AtStart)
2612     --BeforeI;
2613 
2614   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2615     Register Reg = CSI[i].getReg();
2616 
2617     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2618       continue;
2619 
2620     // Restore of callee saved condition register field is handled during
2621     // epilogue insertion.
2622     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2623       continue;
2624 
2625     if (Reg == PPC::CR2) {
2626       CR2Spilled = true;
2627       // The spill slot is associated only with CR2, which is the
2628       // first nonvolatile spilled.  Save it here.
2629       CSIIndex = i;
2630       continue;
2631     } else if (Reg == PPC::CR3) {
2632       CR3Spilled = true;
2633       continue;
2634     } else if (Reg == PPC::CR4) {
2635       CR4Spilled = true;
2636       continue;
2637     } else {
2638       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2639       // least one CR register, restore all spilled CRs together.
2640       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2641         bool is31 = needsFP(*MF);
2642         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2643                    CSIIndex);
2644         CR2Spilled = CR3Spilled = CR4Spilled = false;
2645       }
2646 
2647       if (CSI[i].isSpilledToReg()) {
2648         DebugLoc DL;
2649         unsigned Dst = CSI[i].getDstReg();
2650 
2651         if (Restored[Dst])
2652           continue;
2653 
2654         if (VSRContainingGPRs[Dst].second != 0) {
2655           assert(Subtarget.hasP9Vector());
2656           NumPEReloadVSR += 2;
2657           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2658                   VSRContainingGPRs[Dst].second)
2659               .addReg(Dst);
2660           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2661                   VSRContainingGPRs[Dst].first)
2662               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2663         } else if (VSRContainingGPRs[Dst].second == 0) {
2664           assert(Subtarget.hasP8Vector());
2665           ++NumPEReloadVSR;
2666           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2667                   VSRContainingGPRs[Dst].first)
2668               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2669         } else {
2670           llvm_unreachable("More than two GPRs spilled to a VSR!");
2671         }
2672 
2673         Restored.set(Dst);
2674 
2675       } else {
2676         // Default behavior for non-CR saves.
2677         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2678 
2679         // Functions without NoUnwind need to preserve the order of elements in
2680         // saved vector registers.
2681         if (Subtarget.needsSwapsForVSXMemOps() &&
2682             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2683           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2684                                         TRI);
2685         else
2686           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2687                                    Register());
2688 
2689         assert(I != MBB.begin() &&
2690                "loadRegFromStackSlot didn't insert any code!");
2691       }
2692     }
2693 
2694     // Insert in reverse order.
2695     if (AtStart)
2696       I = MBB.begin();
2697     else {
2698       I = BeforeI;
2699       ++I;
2700     }
2701   }
2702 
2703   // If we haven't yet spilled the CRs, do so now.
2704   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2705     assert(Subtarget.is32BitELFABI() &&
2706            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2707     bool is31 = needsFP(*MF);
2708     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2709   }
2710 
2711   return true;
2712 }
2713 
2714 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2715   return TOCSaveOffset;
2716 }
2717 
2718 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2719   return FramePointerSaveOffset;
2720 }
2721 
2722 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2723   return BasePointerSaveOffset;
2724 }
2725 
2726 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2727   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2728     return false;
2729   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2730 }
2731 
2732 void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF,
2733                                          BitVector &SavedRegs) const {
2734   // The AIX ABI uses traceback tables for EH which require that if callee-saved
2735   // register N is used, all registers N-31 must be saved/restored.
2736   // NOTE: The check for AIX is not actually what is relevant. Traceback tables
2737   // on Linux have the same requirements. It is just that AIX is the only ABI
2738   // for which we actually use traceback tables. If another ABI needs to be
2739   // supported that also uses them, we can add a check such as
2740   // Subtarget.usesTraceBackTables().
2741   assert(Subtarget.isAIXABI() &&
2742          "Function updateCalleeSaves should only be called for AIX.");
2743 
2744   // If there are no callee saves then there is nothing to do.
2745   if (SavedRegs.none())
2746     return;
2747 
2748   const MCPhysReg *CSRegs =
2749       Subtarget.getRegisterInfo()->getCalleeSavedRegs(&MF);
2750   MCPhysReg LowestGPR = PPC::R31;
2751   MCPhysReg LowestG8R = PPC::X31;
2752   MCPhysReg LowestFPR = PPC::F31;
2753   MCPhysReg LowestVR = PPC::V31;
2754 
2755   // Traverse the CSRs twice so as not to rely on ascending ordering of
2756   // registers in the array. The first pass finds the lowest numbered
2757   // register and the second pass marks all higher numbered registers
2758   // for spilling.
2759   for (int i = 0; CSRegs[i]; i++) {
2760     // Get the lowest numbered register for each class that actually needs
2761     // to be saved.
2762     MCPhysReg Cand = CSRegs[i];
2763     if (!SavedRegs.test(Cand))
2764       continue;
2765     if (PPC::GPRCRegClass.contains(Cand) && Cand < LowestGPR)
2766       LowestGPR = Cand;
2767     else if (PPC::G8RCRegClass.contains(Cand) && Cand < LowestG8R)
2768       LowestG8R = Cand;
2769     else if ((PPC::F4RCRegClass.contains(Cand) ||
2770               PPC::F8RCRegClass.contains(Cand)) &&
2771              Cand < LowestFPR)
2772       LowestFPR = Cand;
2773     else if (PPC::VRRCRegClass.contains(Cand) && Cand < LowestVR)
2774       LowestVR = Cand;
2775   }
2776 
2777   for (int i = 0; CSRegs[i]; i++) {
2778     MCPhysReg Cand = CSRegs[i];
2779     if ((PPC::GPRCRegClass.contains(Cand) && Cand > LowestGPR) ||
2780         (PPC::G8RCRegClass.contains(Cand) && Cand > LowestG8R) ||
2781         ((PPC::F4RCRegClass.contains(Cand) ||
2782           PPC::F8RCRegClass.contains(Cand)) &&
2783          Cand > LowestFPR) ||
2784         (PPC::VRRCRegClass.contains(Cand) && Cand > LowestVR))
2785       SavedRegs.set(Cand);
2786   }
2787 }
2788 
2789 uint64_t PPCFrameLowering::getStackThreshold() const {
2790   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2791   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2792   // Scratch register contains a signed 64-bit number, which is negative
2793   // when extending the stack and is positive when releasing the stack frame.
2794   // To make `stux` and `add` paired, the absolute value of the number contained
2795   // in the scratch register should be the same. Thus the maximum stack size
2796   // is (2^63)-1, i.e., LONG_MAX.
2797   if (Subtarget.isPPC64())
2798     return LONG_MAX;
2799 
2800   return TargetFrameLowering::getStackThreshold();
2801 }
2802