xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/PPCPredicates.h"
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 STATISTIC(NumPrologProbed, "Number of prologues probed");
36 
37 static cl::opt<bool>
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39                      cl::desc("Enable spills in prologue to vector registers."),
40                      cl::init(false), cl::Hidden);
41 
42 /// VRRegNo - Map from a numbered VR register to its enum value.
43 ///
44 static const MCPhysReg VRRegNo[] = {
45  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
46  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
47  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
48  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
49 };
50 
51 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
52   if (STI.isAIXABI())
53     return STI.isPPC64() ? 16 : 8;
54   // SVR4 ABI:
55   return STI.isPPC64() ? 16 : 4;
56 }
57 
58 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
59   if (STI.isAIXABI())
60     return STI.isPPC64() ? 40 : 20;
61   return STI.isELFv2ABI() ? 24 : 40;
62 }
63 
64 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
65   // First slot in the general register save area.
66   return STI.isPPC64() ? -8U : -4U;
67 }
68 
69 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
70   if (STI.isAIXABI() || STI.isPPC64())
71     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
72 
73   // 32-bit SVR4 ABI:
74   return 8;
75 }
76 
77 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
78   // Third slot in the general purpose register save area.
79   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
80     return -12U;
81 
82   // Second slot in the general purpose register save area.
83   return STI.isPPC64() ? -16U : -8U;
84 }
85 
86 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
87   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
88 }
89 
90 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
91     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
92                           STI.getPlatformStackAlignment(), 0),
93       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
94       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
95       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
96       LinkageSize(computeLinkageSize(Subtarget)),
97       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
98       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
99 
100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
102     unsigned &NumEntries) const {
103 
104 // Floating-point register save area offsets.
105 #define CALLEE_SAVED_FPRS \
106       {PPC::F31, -8},     \
107       {PPC::F30, -16},    \
108       {PPC::F29, -24},    \
109       {PPC::F28, -32},    \
110       {PPC::F27, -40},    \
111       {PPC::F26, -48},    \
112       {PPC::F25, -56},    \
113       {PPC::F24, -64},    \
114       {PPC::F23, -72},    \
115       {PPC::F22, -80},    \
116       {PPC::F21, -88},    \
117       {PPC::F20, -96},    \
118       {PPC::F19, -104},   \
119       {PPC::F18, -112},   \
120       {PPC::F17, -120},   \
121       {PPC::F16, -128},   \
122       {PPC::F15, -136},   \
123       {PPC::F14, -144}
124 
125 // 32-bit general purpose register save area offsets shared by ELF and
126 // AIX. AIX has an extra CSR with r13.
127 #define CALLEE_SAVED_GPRS32 \
128       {PPC::R31, -4},       \
129       {PPC::R30, -8},       \
130       {PPC::R29, -12},      \
131       {PPC::R28, -16},      \
132       {PPC::R27, -20},      \
133       {PPC::R26, -24},      \
134       {PPC::R25, -28},      \
135       {PPC::R24, -32},      \
136       {PPC::R23, -36},      \
137       {PPC::R22, -40},      \
138       {PPC::R21, -44},      \
139       {PPC::R20, -48},      \
140       {PPC::R19, -52},      \
141       {PPC::R18, -56},      \
142       {PPC::R17, -60},      \
143       {PPC::R16, -64},      \
144       {PPC::R15, -68},      \
145       {PPC::R14, -72}
146 
147 // 64-bit general purpose register save area offsets.
148 #define CALLEE_SAVED_GPRS64 \
149       {PPC::X31, -8},       \
150       {PPC::X30, -16},      \
151       {PPC::X29, -24},      \
152       {PPC::X28, -32},      \
153       {PPC::X27, -40},      \
154       {PPC::X26, -48},      \
155       {PPC::X25, -56},      \
156       {PPC::X24, -64},      \
157       {PPC::X23, -72},      \
158       {PPC::X22, -80},      \
159       {PPC::X21, -88},      \
160       {PPC::X20, -96},      \
161       {PPC::X19, -104},     \
162       {PPC::X18, -112},     \
163       {PPC::X17, -120},     \
164       {PPC::X16, -128},     \
165       {PPC::X15, -136},     \
166       {PPC::X14, -144}
167 
168 // Vector register save area offsets.
169 #define CALLEE_SAVED_VRS \
170       {PPC::V31, -16},   \
171       {PPC::V30, -32},   \
172       {PPC::V29, -48},   \
173       {PPC::V28, -64},   \
174       {PPC::V27, -80},   \
175       {PPC::V26, -96},   \
176       {PPC::V25, -112},  \
177       {PPC::V24, -128},  \
178       {PPC::V23, -144},  \
179       {PPC::V22, -160},  \
180       {PPC::V21, -176},  \
181       {PPC::V20, -192}
182 
183   // Note that the offsets here overlap, but this is fixed up in
184   // processFunctionBeforeFrameFinalized.
185 
186   static const SpillSlot ELFOffsets32[] = {
187       CALLEE_SAVED_FPRS,
188       CALLEE_SAVED_GPRS32,
189 
190       // CR save area offset.  We map each of the nonvolatile CR fields
191       // to the slot for CR2, which is the first of the nonvolatile CR
192       // fields to be assigned, so that we only allocate one save slot.
193       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
194       {PPC::CR2, -4},
195 
196       // VRSAVE save area offset.
197       {PPC::VRSAVE, -4},
198 
199       CALLEE_SAVED_VRS,
200 
201       // SPE register save area (overlaps Vector save area).
202       {PPC::S31, -8},
203       {PPC::S30, -16},
204       {PPC::S29, -24},
205       {PPC::S28, -32},
206       {PPC::S27, -40},
207       {PPC::S26, -48},
208       {PPC::S25, -56},
209       {PPC::S24, -64},
210       {PPC::S23, -72},
211       {PPC::S22, -80},
212       {PPC::S21, -88},
213       {PPC::S20, -96},
214       {PPC::S19, -104},
215       {PPC::S18, -112},
216       {PPC::S17, -120},
217       {PPC::S16, -128},
218       {PPC::S15, -136},
219       {PPC::S14, -144}};
220 
221   static const SpillSlot ELFOffsets64[] = {
222       CALLEE_SAVED_FPRS,
223       CALLEE_SAVED_GPRS64,
224 
225       // VRSAVE save area offset.
226       {PPC::VRSAVE, -4},
227       CALLEE_SAVED_VRS
228   };
229 
230   static const SpillSlot AIXOffsets32[] = {
231       CALLEE_SAVED_FPRS,
232       CALLEE_SAVED_GPRS32,
233       // Add AIX's extra CSR.
234       {PPC::R13, -76},
235       // TODO: Update when we add vector support for AIX.
236   };
237 
238   static const SpillSlot AIXOffsets64[] = {
239       CALLEE_SAVED_FPRS,
240       CALLEE_SAVED_GPRS64,
241       // TODO: Update when we add vector support for AIX.
242   };
243 
244   if (Subtarget.is64BitELFABI()) {
245     NumEntries = array_lengthof(ELFOffsets64);
246     return ELFOffsets64;
247   }
248 
249   if (Subtarget.is32BitELFABI()) {
250     NumEntries = array_lengthof(ELFOffsets32);
251     return ELFOffsets32;
252   }
253 
254   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
255 
256   if (Subtarget.isPPC64()) {
257     NumEntries = array_lengthof(AIXOffsets64);
258     return AIXOffsets64;
259   }
260 
261   NumEntries = array_lengthof(AIXOffsets32);
262   return AIXOffsets32;
263 }
264 
265 /// RemoveVRSaveCode - We have found that this function does not need any code
266 /// to manipulate the VRSAVE register, even though it uses vector registers.
267 /// This can happen when the only registers used are known to be live in or out
268 /// of the function.  Remove all of the VRSAVE related code from the function.
269 /// FIXME: The removal of the code results in a compile failure at -O0 when the
270 /// function contains a function call, as the GPR containing original VRSAVE
271 /// contents is spilled and reloaded around the call.  Without the prolog code,
272 /// the spill instruction refers to an undefined register.  This code needs
273 /// to account for all uses of that GPR.
274 static void RemoveVRSaveCode(MachineInstr &MI) {
275   MachineBasicBlock *Entry = MI.getParent();
276   MachineFunction *MF = Entry->getParent();
277 
278   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
279   MachineBasicBlock::iterator MBBI = MI;
280   ++MBBI;
281   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
282   MBBI->eraseFromParent();
283 
284   bool RemovedAllMTVRSAVEs = true;
285   // See if we can find and remove the MTVRSAVE instruction from all of the
286   // epilog blocks.
287   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
288     // If last instruction is a return instruction, add an epilogue
289     if (I->isReturnBlock()) {
290       bool FoundIt = false;
291       for (MBBI = I->end(); MBBI != I->begin(); ) {
292         --MBBI;
293         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
294           MBBI->eraseFromParent();  // remove it.
295           FoundIt = true;
296           break;
297         }
298       }
299       RemovedAllMTVRSAVEs &= FoundIt;
300     }
301   }
302 
303   // If we found and removed all MTVRSAVE instructions, remove the read of
304   // VRSAVE as well.
305   if (RemovedAllMTVRSAVEs) {
306     MBBI = MI;
307     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
308     --MBBI;
309     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
310     MBBI->eraseFromParent();
311   }
312 
313   // Finally, nuke the UPDATE_VRSAVE.
314   MI.eraseFromParent();
315 }
316 
317 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
318 // instruction selector.  Based on the vector registers that have been used,
319 // transform this into the appropriate ORI instruction.
320 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
321   MachineFunction *MF = MI.getParent()->getParent();
322   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
323   DebugLoc dl = MI.getDebugLoc();
324 
325   const MachineRegisterInfo &MRI = MF->getRegInfo();
326   unsigned UsedRegMask = 0;
327   for (unsigned i = 0; i != 32; ++i)
328     if (MRI.isPhysRegModified(VRRegNo[i]))
329       UsedRegMask |= 1 << (31-i);
330 
331   // Live in and live out values already must be in the mask, so don't bother
332   // marking them.
333   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
334     unsigned RegNo = TRI->getEncodingValue(LI.first);
335     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
336       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
337   }
338 
339   // Live out registers appear as use operands on return instructions.
340   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
341        UsedRegMask != 0 && BI != BE; ++BI) {
342     const MachineBasicBlock &MBB = *BI;
343     if (!MBB.isReturnBlock())
344       continue;
345     const MachineInstr &Ret = MBB.back();
346     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
347       const MachineOperand &MO = Ret.getOperand(I);
348       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
349         continue;
350       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
351       UsedRegMask &= ~(1 << (31-RegNo));
352     }
353   }
354 
355   // If no registers are used, turn this into a copy.
356   if (UsedRegMask == 0) {
357     // Remove all VRSAVE code.
358     RemoveVRSaveCode(MI);
359     return;
360   }
361 
362   Register SrcReg = MI.getOperand(1).getReg();
363   Register DstReg = MI.getOperand(0).getReg();
364 
365   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
366     if (DstReg != SrcReg)
367       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
368           .addReg(SrcReg)
369           .addImm(UsedRegMask);
370     else
371       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
372           .addReg(SrcReg, RegState::Kill)
373           .addImm(UsedRegMask);
374   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
375     if (DstReg != SrcReg)
376       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
377           .addReg(SrcReg)
378           .addImm(UsedRegMask >> 16);
379     else
380       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
381           .addReg(SrcReg, RegState::Kill)
382           .addImm(UsedRegMask >> 16);
383   } else {
384     if (DstReg != SrcReg)
385       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
386           .addReg(SrcReg)
387           .addImm(UsedRegMask >> 16);
388     else
389       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
390           .addReg(SrcReg, RegState::Kill)
391           .addImm(UsedRegMask >> 16);
392 
393     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
394         .addReg(DstReg, RegState::Kill)
395         .addImm(UsedRegMask & 0xFFFF);
396   }
397 
398   // Remove the old UPDATE_VRSAVE instruction.
399   MI.eraseFromParent();
400 }
401 
402 static bool spillsCR(const MachineFunction &MF) {
403   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
404   return FuncInfo->isCRSpilled();
405 }
406 
407 static bool spillsVRSAVE(const MachineFunction &MF) {
408   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
409   return FuncInfo->isVRSAVESpilled();
410 }
411 
412 static bool hasSpills(const MachineFunction &MF) {
413   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
414   return FuncInfo->hasSpills();
415 }
416 
417 static bool hasNonRISpills(const MachineFunction &MF) {
418   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
419   return FuncInfo->hasNonRISpills();
420 }
421 
422 /// MustSaveLR - Return true if this function requires that we save the LR
423 /// register onto the stack in the prolog and restore it in the epilog of the
424 /// function.
425 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
426   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
427 
428   // We need a save/restore of LR if there is any def of LR (which is
429   // defined by calls, including the PIC setup sequence), or if there is
430   // some use of the LR stack slot (e.g. for builtin_return_address).
431   // (LR comes in 32 and 64 bit versions.)
432   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
433   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
434 }
435 
436 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
437 /// call frame size. Update the MachineFunction object with the stack size.
438 unsigned
439 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
440                                                 bool UseEstimate) const {
441   unsigned NewMaxCallFrameSize = 0;
442   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
443                                             &NewMaxCallFrameSize);
444   MF.getFrameInfo().setStackSize(FrameSize);
445   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
446   return FrameSize;
447 }
448 
449 /// determineFrameLayout - Determine the size of the frame and maximum call
450 /// frame size.
451 unsigned
452 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
453                                        bool UseEstimate,
454                                        unsigned *NewMaxCallFrameSize) const {
455   const MachineFrameInfo &MFI = MF.getFrameInfo();
456   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
457 
458   // Get the number of bytes to allocate from the FrameInfo
459   unsigned FrameSize =
460     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
461 
462   // Get stack alignments. The frame must be aligned to the greatest of these:
463   Align TargetAlign = getStackAlign(); // alignment required per the ABI
464   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
465   Align Alignment = std::max(TargetAlign, MaxAlign);
466 
467   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
468 
469   unsigned LR = RegInfo->getRARegister();
470   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
471   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
472                        !MFI.adjustsStack() &&       // No calls.
473                        !MustSaveLR(MF, LR) &&       // No need to save LR.
474                        !FI->mustSaveTOC() &&        // No need to save TOC.
475                        !RegInfo->hasBasePointer(MF); // No special alignment.
476 
477   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
478   // code if all local vars are reg-allocated.
479   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
480 
481   // Check whether we can skip adjusting the stack pointer (by using red zone)
482   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
483     // No need for frame
484     return 0;
485   }
486 
487   // Get the maximum call frame size of all the calls.
488   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
489 
490   // Maximum call frame needs to be at least big enough for linkage area.
491   unsigned minCallFrameSize = getLinkageSize();
492   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
493 
494   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
495   // that allocations will be aligned.
496   if (MFI.hasVarSizedObjects())
497     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
498 
499   // Update the new max call frame size if the caller passes in a valid pointer.
500   if (NewMaxCallFrameSize)
501     *NewMaxCallFrameSize = maxCallFrameSize;
502 
503   // Include call frame size in total.
504   FrameSize += maxCallFrameSize;
505 
506   // Make sure the frame is aligned.
507   FrameSize = alignTo(FrameSize, Alignment);
508 
509   return FrameSize;
510 }
511 
512 // hasFP - Return true if the specified function actually has a dedicated frame
513 // pointer register.
514 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
515   const MachineFrameInfo &MFI = MF.getFrameInfo();
516   // FIXME: This is pretty much broken by design: hasFP() might be called really
517   // early, before the stack layout was calculated and thus hasFP() might return
518   // true or false here depending on the time of call.
519   return (MFI.getStackSize()) && needsFP(MF);
520 }
521 
522 // needsFP - Return true if the specified function should have a dedicated frame
523 // pointer register.  This is true if the function has variable sized allocas or
524 // if frame pointer elimination is disabled.
525 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
526   const MachineFrameInfo &MFI = MF.getFrameInfo();
527 
528   // Naked functions have no stack frame pushed, so we don't have a frame
529   // pointer.
530   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
531     return false;
532 
533   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
534     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
535     (MF.getTarget().Options.GuaranteedTailCallOpt &&
536      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
537 }
538 
539 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
540   bool is31 = needsFP(MF);
541   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
542   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
543 
544   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
545   bool HasBP = RegInfo->hasBasePointer(MF);
546   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
547   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
548 
549   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
550        BI != BE; ++BI)
551     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
552       --MBBI;
553       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
554         MachineOperand &MO = MBBI->getOperand(I);
555         if (!MO.isReg())
556           continue;
557 
558         switch (MO.getReg()) {
559         case PPC::FP:
560           MO.setReg(FPReg);
561           break;
562         case PPC::FP8:
563           MO.setReg(FP8Reg);
564           break;
565         case PPC::BP:
566           MO.setReg(BPReg);
567           break;
568         case PPC::BP8:
569           MO.setReg(BP8Reg);
570           break;
571 
572         }
573       }
574     }
575 }
576 
577 /*  This function will do the following:
578     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
579       respectively (defaults recommended by the ABI) and return true
580     - If MBB is not an entry block, initialize the register scavenger and look
581       for available registers.
582     - If the defaults (R0/R12) are available, return true
583     - If TwoUniqueRegsRequired is set to true, it looks for two unique
584       registers. Otherwise, look for a single available register.
585       - If the required registers are found, set SR1 and SR2 and return true.
586       - If the required registers are not found, set SR2 or both SR1 and SR2 to
587         PPC::NoRegister and return false.
588 
589     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
590     is not set, this function will attempt to find two different registers, but
591     still return true if only one register is available (and set SR1 == SR2).
592 */
593 bool
594 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
595                                       bool UseAtEnd,
596                                       bool TwoUniqueRegsRequired,
597                                       Register *SR1,
598                                       Register *SR2) const {
599   RegScavenger RS;
600   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
601   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
602 
603   // Set the defaults for the two scratch registers.
604   if (SR1)
605     *SR1 = R0;
606 
607   if (SR2) {
608     assert (SR1 && "Asking for the second scratch register but not the first?");
609     *SR2 = R12;
610   }
611 
612   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
613   if ((UseAtEnd && MBB->isReturnBlock()) ||
614       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
615     return true;
616 
617   RS.enterBasicBlock(*MBB);
618 
619   if (UseAtEnd && !MBB->empty()) {
620     // The scratch register will be used at the end of the block, so must
621     // consider all registers used within the block
622 
623     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
624     // If no terminator, back iterator up to previous instruction.
625     if (MBBI == MBB->end())
626       MBBI = std::prev(MBBI);
627 
628     if (MBBI != MBB->begin())
629       RS.forward(MBBI);
630   }
631 
632   // If the two registers are available, we're all good.
633   // Note that we only return here if both R0 and R12 are available because
634   // although the function may not require two unique registers, it may benefit
635   // from having two so we should try to provide them.
636   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
637     return true;
638 
639   // Get the list of callee-saved registers for the target.
640   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
641   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
642 
643   // Get all the available registers in the block.
644   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
645                                      &PPC::GPRCRegClass);
646 
647   // We shouldn't use callee-saved registers as scratch registers as they may be
648   // available when looking for a candidate block for shrink wrapping but not
649   // available when the actual prologue/epilogue is being emitted because they
650   // were added as live-in to the prologue block by PrologueEpilogueInserter.
651   for (int i = 0; CSRegs[i]; ++i)
652     BV.reset(CSRegs[i]);
653 
654   // Set the first scratch register to the first available one.
655   if (SR1) {
656     int FirstScratchReg = BV.find_first();
657     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
658   }
659 
660   // If there is another one available, set the second scratch register to that.
661   // Otherwise, set it to either PPC::NoRegister if this function requires two
662   // or to whatever SR1 is set to if this function doesn't require two.
663   if (SR2) {
664     int SecondScratchReg = BV.find_next(*SR1);
665     if (SecondScratchReg != -1)
666       *SR2 = SecondScratchReg;
667     else
668       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
669   }
670 
671   // Now that we've done our best to provide both registers, double check
672   // whether we were unable to provide enough.
673   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
674     return false;
675 
676   return true;
677 }
678 
679 // We need a scratch register for spilling LR and for spilling CR. By default,
680 // we use two scratch registers to hide latency. However, if only one scratch
681 // register is available, we can adjust for that by not overlapping the spill
682 // code. However, if we need to realign the stack (i.e. have a base pointer)
683 // and the stack frame is large, we need two scratch registers.
684 bool
685 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
686   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
687   MachineFunction &MF = *(MBB->getParent());
688   bool HasBP = RegInfo->hasBasePointer(MF);
689   unsigned FrameSize = determineFrameLayout(MF);
690   int NegFrameSize = -FrameSize;
691   bool IsLargeFrame = !isInt<16>(NegFrameSize);
692   MachineFrameInfo &MFI = MF.getFrameInfo();
693   Align MaxAlign = MFI.getMaxAlign();
694   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
695 
696   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
697 }
698 
699 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
700   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
701 
702   return findScratchRegister(TmpMBB, false,
703                              twoUniqueScratchRegsRequired(TmpMBB));
704 }
705 
706 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
707   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
708 
709   return findScratchRegister(TmpMBB, true);
710 }
711 
712 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
713   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
714   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
715 
716   // Abort if there is no register info or function info.
717   if (!RegInfo || !FI)
718     return false;
719 
720   // Only move the stack update on ELFv2 ABI and PPC64.
721   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
722     return false;
723 
724   // Check the frame size first and return false if it does not fit the
725   // requirements.
726   // We need a non-zero frame size as well as a frame that will fit in the red
727   // zone. This is because by moving the stack pointer update we are now storing
728   // to the red zone until the stack pointer is updated. If we get an interrupt
729   // inside the prologue but before the stack update we now have a number of
730   // stores to the red zone and those stores must all fit.
731   MachineFrameInfo &MFI = MF.getFrameInfo();
732   unsigned FrameSize = MFI.getStackSize();
733   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
734     return false;
735 
736   // Frame pointers and base pointers complicate matters so don't do anything
737   // if we have them. For example having a frame pointer will sometimes require
738   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
739   // difficult.
740   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
741     return false;
742 
743   // Calls to fast_cc functions use different rules for passing parameters on
744   // the stack from the ABI and using PIC base in the function imposes
745   // similar restrictions to using the base pointer. It is not generally safe
746   // to move the stack pointer update in these situations.
747   if (FI->hasFastCall() || FI->usesPICBase())
748     return false;
749 
750   // Finally we can move the stack update if we do not require register
751   // scavenging. Register scavenging can introduce more spills and so
752   // may make the frame size larger than we have computed.
753   return !RegInfo->requiresFrameIndexScavenging(MF);
754 }
755 
756 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
757                                     MachineBasicBlock &MBB) const {
758   MachineBasicBlock::iterator MBBI = MBB.begin();
759   MachineFrameInfo &MFI = MF.getFrameInfo();
760   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
761   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
762   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
763 
764   MachineModuleInfo &MMI = MF.getMMI();
765   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
766   DebugLoc dl;
767   // AIX assembler does not support cfi directives.
768   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
769 
770   // Get processor type.
771   bool isPPC64 = Subtarget.isPPC64();
772   // Get the ABI.
773   bool isSVR4ABI = Subtarget.isSVR4ABI();
774   bool isAIXABI = Subtarget.isAIXABI();
775   bool isELFv2ABI = Subtarget.isELFv2ABI();
776   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
777 
778   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
779   // process it.
780   if (!isSVR4ABI)
781     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
782       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
783         if (isAIXABI)
784           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
785         HandleVRSaveUpdate(*MBBI, TII);
786         break;
787       }
788     }
789 
790   // Move MBBI back to the beginning of the prologue block.
791   MBBI = MBB.begin();
792 
793   // Work out frame sizes.
794   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
795   int NegFrameSize = -FrameSize;
796   if (!isInt<32>(NegFrameSize))
797     llvm_unreachable("Unhandled stack size!");
798 
799   if (MFI.isFrameAddressTaken())
800     replaceFPWithRealFP(MF);
801 
802   // Check if the link register (LR) must be saved.
803   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
804   bool MustSaveLR = FI->mustSaveLR();
805   bool MustSaveTOC = FI->mustSaveTOC();
806   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
807   bool MustSaveCR = !MustSaveCRs.empty();
808   // Do we have a frame pointer and/or base pointer for this function?
809   bool HasFP = hasFP(MF);
810   bool HasBP = RegInfo->hasBasePointer(MF);
811   bool HasRedZone = isPPC64 || !isSVR4ABI;
812 
813   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
814   Register BPReg = RegInfo->getBaseRegister(MF);
815   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
816   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
817   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
818   Register ScratchReg;
819   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
820   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
821   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
822                                                 : PPC::MFLR );
823   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
824                                                  : PPC::STW );
825   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
826                                                      : PPC::STWU );
827   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
828                                                         : PPC::STWUX);
829   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
830                                                           : PPC::LIS );
831   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
832                                                  : PPC::ORI );
833   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
834                                               : PPC::OR );
835   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
836                                                             : PPC::SUBFC);
837   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
838                                                                : PPC::SUBFIC);
839   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
840                                                            : PPC::MFCR);
841   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
842 
843   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
844   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
845   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
846   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
847   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
848          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
849 
850   // Using the same bool variable as below to suppress compiler warnings.
851   // Stack probe requires two scratch registers, one for old sp, one for large
852   // frame and large probe size.
853   bool SingleScratchReg = findScratchRegister(
854       &MBB, false,
855       twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
856       &ScratchReg, &TempReg);
857   assert(SingleScratchReg &&
858          "Required number of registers not available in this block");
859 
860   SingleScratchReg = ScratchReg == TempReg;
861 
862   int LROffset = getReturnSaveOffset();
863 
864   int FPOffset = 0;
865   if (HasFP) {
866     if (isSVR4ABI) {
867       MachineFrameInfo &MFI = MF.getFrameInfo();
868       int FPIndex = FI->getFramePointerSaveIndex();
869       assert(FPIndex && "No Frame Pointer Save Slot!");
870       FPOffset = MFI.getObjectOffset(FPIndex);
871     } else {
872       FPOffset = getFramePointerSaveOffset();
873     }
874   }
875 
876   int BPOffset = 0;
877   if (HasBP) {
878     if (isSVR4ABI) {
879       MachineFrameInfo &MFI = MF.getFrameInfo();
880       int BPIndex = FI->getBasePointerSaveIndex();
881       assert(BPIndex && "No Base Pointer Save Slot!");
882       BPOffset = MFI.getObjectOffset(BPIndex);
883     } else {
884       BPOffset = getBasePointerSaveOffset();
885     }
886   }
887 
888   int PBPOffset = 0;
889   if (FI->usesPICBase()) {
890     MachineFrameInfo &MFI = MF.getFrameInfo();
891     int PBPIndex = FI->getPICBasePointerSaveIndex();
892     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
893     PBPOffset = MFI.getObjectOffset(PBPIndex);
894   }
895 
896   // Get stack alignments.
897   Align MaxAlign = MFI.getMaxAlign();
898   if (HasBP && MaxAlign > 1)
899     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
900 
901   // Frames of 32KB & larger require special handling because they cannot be
902   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
903   bool isLargeFrame = !isInt<16>(NegFrameSize);
904 
905   // Check if we can move the stack update instruction (stdu) down the prologue
906   // past the callee saves. Hopefully this will avoid the situation where the
907   // saves are waiting for the update on the store with update to complete.
908   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
909   bool MovingStackUpdateDown = false;
910 
911   // Check if we can move the stack update.
912   if (stackUpdateCanBeMoved(MF)) {
913     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
914     for (CalleeSavedInfo CSI : Info) {
915       int FrIdx = CSI.getFrameIdx();
916       // If the frame index is not negative the callee saved info belongs to a
917       // stack object that is not a fixed stack object. We ignore non-fixed
918       // stack objects because we won't move the stack update pointer past them.
919       if (FrIdx >= 0)
920         continue;
921 
922       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
923         StackUpdateLoc++;
924         MovingStackUpdateDown = true;
925       } else {
926         // We need all of the Frame Indices to meet these conditions.
927         // If they do not, abort the whole operation.
928         StackUpdateLoc = MBBI;
929         MovingStackUpdateDown = false;
930         break;
931       }
932     }
933 
934     // If the operation was not aborted then update the object offset.
935     if (MovingStackUpdateDown) {
936       for (CalleeSavedInfo CSI : Info) {
937         int FrIdx = CSI.getFrameIdx();
938         if (FrIdx < 0)
939           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
940       }
941     }
942   }
943 
944   // Where in the prologue we move the CR fields depends on how many scratch
945   // registers we have, and if we need to save the link register or not. This
946   // lambda is to avoid duplicating the logic in 2 places.
947   auto BuildMoveFromCR = [&]() {
948     if (isELFv2ABI && MustSaveCRs.size() == 1) {
949     // In the ELFv2 ABI, we are not required to save all CR fields.
950     // If only one CR field is clobbered, it is more efficient to use
951     // mfocrf to selectively save just that field, because mfocrf has short
952     // latency compares to mfcr.
953       assert(isPPC64 && "V2 ABI is 64-bit only.");
954       MachineInstrBuilder MIB =
955           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
956       MIB.addReg(MustSaveCRs[0], RegState::Kill);
957     } else {
958       MachineInstrBuilder MIB =
959           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
960       for (unsigned CRfield : MustSaveCRs)
961         MIB.addReg(CRfield, RegState::ImplicitKill);
962     }
963   };
964 
965   // If we need to spill the CR and the LR but we don't have two separate
966   // registers available, we must spill them one at a time
967   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
968     BuildMoveFromCR();
969     BuildMI(MBB, MBBI, dl, StoreWordInst)
970         .addReg(TempReg, getKillRegState(true))
971         .addImm(CRSaveOffset)
972         .addReg(SPReg);
973   }
974 
975   if (MustSaveLR)
976     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
977 
978   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
979     BuildMoveFromCR();
980 
981   if (HasRedZone) {
982     if (HasFP)
983       BuildMI(MBB, MBBI, dl, StoreInst)
984         .addReg(FPReg)
985         .addImm(FPOffset)
986         .addReg(SPReg);
987     if (FI->usesPICBase())
988       BuildMI(MBB, MBBI, dl, StoreInst)
989         .addReg(PPC::R30)
990         .addImm(PBPOffset)
991         .addReg(SPReg);
992     if (HasBP)
993       BuildMI(MBB, MBBI, dl, StoreInst)
994         .addReg(BPReg)
995         .addImm(BPOffset)
996         .addReg(SPReg);
997   }
998 
999   if (MustSaveLR)
1000     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1001       .addReg(ScratchReg, getKillRegState(true))
1002       .addImm(LROffset)
1003       .addReg(SPReg);
1004 
1005   if (MustSaveCR &&
1006       !(SingleScratchReg && MustSaveLR)) {
1007     assert(HasRedZone && "A red zone is always available on PPC64");
1008     BuildMI(MBB, MBBI, dl, StoreWordInst)
1009       .addReg(TempReg, getKillRegState(true))
1010       .addImm(CRSaveOffset)
1011       .addReg(SPReg);
1012   }
1013 
1014   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1015   if (!FrameSize)
1016     return;
1017 
1018   // Adjust stack pointer: r1 += NegFrameSize.
1019   // If there is a preferred stack alignment, align R1 now
1020 
1021   if (HasBP && HasRedZone) {
1022     // Save a copy of r1 as the base pointer.
1023     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1024       .addReg(SPReg)
1025       .addReg(SPReg);
1026   }
1027 
1028   // Have we generated a STUX instruction to claim stack frame? If so,
1029   // the negated frame size will be placed in ScratchReg.
1030   bool HasSTUX = false;
1031 
1032   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
1033   // pointer is always stored at SP, we will get a free probe due to an essential
1034   // STU(X) instruction.
1035   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
1036     // To be consistent with other targets, a pseudo instruction is emitted and
1037     // will be later expanded in `inlineStackProbe`.
1038     BuildMI(MBB, MBBI, dl,
1039             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
1040                             : PPC::PROBED_STACKALLOC_32))
1041         .addDef(ScratchReg)
1042         .addDef(TempReg) // TempReg stores the old sp.
1043         .addImm(NegFrameSize);
1044     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
1045     // update the ScratchReg to meet the assumption that ScratchReg contains
1046     // the NegFrameSize. This solution is rather tricky.
1047     if (!HasRedZone) {
1048       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1049           .addReg(TempReg)
1050           .addReg(SPReg);
1051       HasSTUX = true;
1052     }
1053   } else {
1054     // This condition must be kept in sync with canUseAsPrologue.
1055     if (HasBP && MaxAlign > 1) {
1056       if (isPPC64)
1057         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1058             .addReg(SPReg)
1059             .addImm(0)
1060             .addImm(64 - Log2(MaxAlign));
1061       else // PPC32...
1062         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1063             .addReg(SPReg)
1064             .addImm(0)
1065             .addImm(32 - Log2(MaxAlign))
1066             .addImm(31);
1067       if (!isLargeFrame) {
1068         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1069             .addReg(ScratchReg, RegState::Kill)
1070             .addImm(NegFrameSize);
1071       } else {
1072         assert(!SingleScratchReg && "Only a single scratch reg available");
1073         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1074             .addImm(NegFrameSize >> 16);
1075         BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1076             .addReg(TempReg, RegState::Kill)
1077             .addImm(NegFrameSize & 0xFFFF);
1078         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1079             .addReg(ScratchReg, RegState::Kill)
1080             .addReg(TempReg, RegState::Kill);
1081       }
1082 
1083       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1084           .addReg(SPReg, RegState::Kill)
1085           .addReg(SPReg)
1086           .addReg(ScratchReg);
1087       HasSTUX = true;
1088 
1089     } else if (!isLargeFrame) {
1090       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1091           .addReg(SPReg)
1092           .addImm(NegFrameSize)
1093           .addReg(SPReg);
1094 
1095     } else {
1096       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1097           .addImm(NegFrameSize >> 16);
1098       BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1099           .addReg(ScratchReg, RegState::Kill)
1100           .addImm(NegFrameSize & 0xFFFF);
1101       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1102           .addReg(SPReg, RegState::Kill)
1103           .addReg(SPReg)
1104           .addReg(ScratchReg);
1105       HasSTUX = true;
1106     }
1107   }
1108 
1109   // Save the TOC register after the stack pointer update if a prologue TOC
1110   // save is required for the function.
1111   if (MustSaveTOC) {
1112     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1113     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1114       .addReg(TOCReg, getKillRegState(true))
1115       .addImm(TOCSaveOffset)
1116       .addReg(SPReg);
1117   }
1118 
1119   if (!HasRedZone) {
1120     assert(!isPPC64 && "A red zone is always available on PPC64");
1121     if (HasSTUX) {
1122       // The negated frame size is in ScratchReg, and the SPReg has been
1123       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1124       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1125       // the stack frame (i.e. the old SP), ideally, we would put the old
1126       // SP into a register and use it as the base for the stores. The
1127       // problem is that the only available register may be ScratchReg,
1128       // which could be R0, and R0 cannot be used as a base address.
1129 
1130       // First, set ScratchReg to the old SP. This may need to be modified
1131       // later.
1132       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1133         .addReg(ScratchReg, RegState::Kill)
1134         .addReg(SPReg);
1135 
1136       if (ScratchReg == PPC::R0) {
1137         // R0 cannot be used as a base register, but it can be used as an
1138         // index in a store-indexed.
1139         int LastOffset = 0;
1140         if (HasFP)  {
1141           // R0 += (FPOffset-LastOffset).
1142           // Need addic, since addi treats R0 as 0.
1143           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1144             .addReg(ScratchReg)
1145             .addImm(FPOffset-LastOffset);
1146           LastOffset = FPOffset;
1147           // Store FP into *R0.
1148           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1149             .addReg(FPReg, RegState::Kill)  // Save FP.
1150             .addReg(PPC::ZERO)
1151             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1152         }
1153         if (FI->usesPICBase()) {
1154           // R0 += (PBPOffset-LastOffset).
1155           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1156             .addReg(ScratchReg)
1157             .addImm(PBPOffset-LastOffset);
1158           LastOffset = PBPOffset;
1159           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1160             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1161             .addReg(PPC::ZERO)
1162             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1163         }
1164         if (HasBP) {
1165           // R0 += (BPOffset-LastOffset).
1166           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1167             .addReg(ScratchReg)
1168             .addImm(BPOffset-LastOffset);
1169           LastOffset = BPOffset;
1170           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1171             .addReg(BPReg, RegState::Kill)  // Save BP.
1172             .addReg(PPC::ZERO)
1173             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1174           // BP = R0-LastOffset
1175           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1176             .addReg(ScratchReg, RegState::Kill)
1177             .addImm(-LastOffset);
1178         }
1179       } else {
1180         // ScratchReg is not R0, so use it as the base register. It is
1181         // already set to the old SP, so we can use the offsets directly.
1182 
1183         // Now that the stack frame has been allocated, save all the necessary
1184         // registers using ScratchReg as the base address.
1185         if (HasFP)
1186           BuildMI(MBB, MBBI, dl, StoreInst)
1187             .addReg(FPReg)
1188             .addImm(FPOffset)
1189             .addReg(ScratchReg);
1190         if (FI->usesPICBase())
1191           BuildMI(MBB, MBBI, dl, StoreInst)
1192             .addReg(PPC::R30)
1193             .addImm(PBPOffset)
1194             .addReg(ScratchReg);
1195         if (HasBP) {
1196           BuildMI(MBB, MBBI, dl, StoreInst)
1197             .addReg(BPReg)
1198             .addImm(BPOffset)
1199             .addReg(ScratchReg);
1200           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1201             .addReg(ScratchReg, RegState::Kill)
1202             .addReg(ScratchReg);
1203         }
1204       }
1205     } else {
1206       // The frame size is a known 16-bit constant (fitting in the immediate
1207       // field of STWU). To be here we have to be compiling for PPC32.
1208       // Since the SPReg has been decreased by FrameSize, add it back to each
1209       // offset.
1210       if (HasFP)
1211         BuildMI(MBB, MBBI, dl, StoreInst)
1212           .addReg(FPReg)
1213           .addImm(FrameSize + FPOffset)
1214           .addReg(SPReg);
1215       if (FI->usesPICBase())
1216         BuildMI(MBB, MBBI, dl, StoreInst)
1217           .addReg(PPC::R30)
1218           .addImm(FrameSize + PBPOffset)
1219           .addReg(SPReg);
1220       if (HasBP) {
1221         BuildMI(MBB, MBBI, dl, StoreInst)
1222           .addReg(BPReg)
1223           .addImm(FrameSize + BPOffset)
1224           .addReg(SPReg);
1225         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1226           .addReg(SPReg)
1227           .addImm(FrameSize);
1228       }
1229     }
1230   }
1231 
1232   // Add Call Frame Information for the instructions we generated above.
1233   if (needsCFI) {
1234     unsigned CFIIndex;
1235 
1236     if (HasBP) {
1237       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1238       // because if the stack needed aligning then CFA won't be at a fixed
1239       // offset from FP/SP.
1240       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1241       CFIIndex = MF.addFrameInst(
1242           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1243     } else {
1244       // Adjust the definition of CFA to account for the change in SP.
1245       assert(NegFrameSize);
1246       CFIIndex = MF.addFrameInst(
1247           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1248     }
1249     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1250         .addCFIIndex(CFIIndex);
1251 
1252     if (HasFP) {
1253       // Describe where FP was saved, at a fixed offset from CFA.
1254       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1255       CFIIndex = MF.addFrameInst(
1256           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1257       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1258           .addCFIIndex(CFIIndex);
1259     }
1260 
1261     if (FI->usesPICBase()) {
1262       // Describe where FP was saved, at a fixed offset from CFA.
1263       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1264       CFIIndex = MF.addFrameInst(
1265           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1266       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1267           .addCFIIndex(CFIIndex);
1268     }
1269 
1270     if (HasBP) {
1271       // Describe where BP was saved, at a fixed offset from CFA.
1272       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1273       CFIIndex = MF.addFrameInst(
1274           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1275       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1276           .addCFIIndex(CFIIndex);
1277     }
1278 
1279     if (MustSaveLR) {
1280       // Describe where LR was saved, at a fixed offset from CFA.
1281       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1282       CFIIndex = MF.addFrameInst(
1283           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1284       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1285           .addCFIIndex(CFIIndex);
1286     }
1287   }
1288 
1289   // If there is a frame pointer, copy R1 into R31
1290   if (HasFP) {
1291     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1292       .addReg(SPReg)
1293       .addReg(SPReg);
1294 
1295     if (!HasBP && needsCFI) {
1296       // Change the definition of CFA from SP+offset to FP+offset, because SP
1297       // will change at every alloca.
1298       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1299       unsigned CFIIndex = MF.addFrameInst(
1300           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1301 
1302       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1303           .addCFIIndex(CFIIndex);
1304     }
1305   }
1306 
1307   if (needsCFI) {
1308     // Describe where callee saved registers were saved, at fixed offsets from
1309     // CFA.
1310     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1311     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1312       unsigned Reg = CSI[I].getReg();
1313       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1314 
1315       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1316       // subregisters of CR2. We just need to emit a move of CR2.
1317       if (PPC::CRBITRCRegClass.contains(Reg))
1318         continue;
1319 
1320       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1321         continue;
1322 
1323       // For SVR4, don't emit a move for the CR spill slot if we haven't
1324       // spilled CRs.
1325       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1326           && !MustSaveCR)
1327         continue;
1328 
1329       // For 64-bit SVR4 when we have spilled CRs, the spill location
1330       // is SP+8, not a frame-relative slot.
1331       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1332         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1333         // the whole CR word.  In the ELFv2 ABI, every CR that was
1334         // actually saved gets its own CFI record.
1335         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1336         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1337             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1338         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1339             .addCFIIndex(CFIIndex);
1340         continue;
1341       }
1342 
1343       if (CSI[I].isSpilledToReg()) {
1344         unsigned SpilledReg = CSI[I].getDstReg();
1345         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1346             nullptr, MRI->getDwarfRegNum(Reg, true),
1347             MRI->getDwarfRegNum(SpilledReg, true)));
1348         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1349           .addCFIIndex(CFIRegister);
1350       } else {
1351         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1352         // We have changed the object offset above but we do not want to change
1353         // the actual offsets in the CFI instruction so we have to undo the
1354         // offset change here.
1355         if (MovingStackUpdateDown)
1356           Offset -= NegFrameSize;
1357 
1358         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1359             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1360         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1361             .addCFIIndex(CFIIndex);
1362       }
1363     }
1364   }
1365 }
1366 
1367 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1368                                         MachineBasicBlock &PrologMBB) const {
1369   // TODO: Generate CFI instructions.
1370   bool isPPC64 = Subtarget.isPPC64();
1371   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1372   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1373   MachineFrameInfo &MFI = MF.getFrameInfo();
1374   MachineModuleInfo &MMI = MF.getMMI();
1375   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1376   // AIX assembler does not support cfi directives.
1377   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1378   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1379     int Opc = MI.getOpcode();
1380     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1381   });
1382   if (StackAllocMIPos == PrologMBB.end())
1383     return;
1384   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1385   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1386   MachineInstr &MI = *StackAllocMIPos;
1387   int64_t NegFrameSize = MI.getOperand(2).getImm();
1388   int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
1389   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1390   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1391   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1392   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1393   Register ScratchReg = MI.getOperand(0).getReg();
1394   Register FPReg = MI.getOperand(1).getReg();
1395   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1396   bool HasBP = RegInfo->hasBasePointer(MF);
1397   Align MaxAlign = MFI.getMaxAlign();
1398   // Initialize current frame pointer.
1399   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1400   BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1401   // Subroutines to generate .cfi_* directives.
1402   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1403                             MachineBasicBlock::iterator MBBI, Register Reg) {
1404     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1405     unsigned CFIIndex = MF.addFrameInst(
1406         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1407     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1408         .addCFIIndex(CFIIndex);
1409   };
1410   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1411                          MachineBasicBlock::iterator MBBI, Register Reg,
1412                          int Offset) {
1413     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1414     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1415         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1416     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1417         .addCFIIndex(CFIIndex);
1418   };
1419   // Subroutine to determine if we can use the Imm as part of d-form.
1420   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1421   // Subroutine to materialize the Imm into TempReg.
1422   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1423                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1424                             Register &TempReg) {
1425     assert(isInt<32>(Imm) && "Unhandled imm");
1426     if (isInt<16>(Imm))
1427       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1428           .addImm(Imm);
1429     else {
1430       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1431           .addImm(Imm >> 16);
1432       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1433           .addReg(TempReg)
1434           .addImm(Imm & 0xFFFF);
1435     }
1436   };
1437   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1438   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1439                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1440                               Register NegSizeReg, bool UseDForm) {
1441     if (UseDForm)
1442       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1443           .addReg(FPReg)
1444           .addImm(NegSize)
1445           .addReg(SPReg);
1446     else
1447       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1448           .addReg(FPReg)
1449           .addReg(SPReg)
1450           .addReg(NegSizeReg);
1451   };
1452   // Use FPReg to calculate CFA.
1453   if (needsCFI)
1454     buildDefCFA(PrologMBB, {MI}, FPReg, 0);
1455   // For case HasBP && MaxAlign > 1, we have to align the SP by performing
1456   // SP = SP - SP % MaxAlign.
1457   if (HasBP && MaxAlign > 1) {
1458     if (isPPC64)
1459       BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1460           .addReg(FPReg)
1461           .addImm(0)
1462           .addImm(64 - Log2(MaxAlign));
1463     else
1464       BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1465           .addReg(FPReg)
1466           .addImm(0)
1467           .addImm(32 - Log2(MaxAlign))
1468           .addImm(31);
1469     BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX),
1470             SPReg)
1471         .addReg(FPReg)
1472         .addReg(SPReg)
1473         .addReg(ScratchReg);
1474   }
1475   // Probe residual part.
1476   if (NegResidualSize) {
1477     bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1478     if (!ResidualUseDForm)
1479       MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
1480     allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
1481                      ResidualUseDForm);
1482   }
1483   bool UseDForm = CanUseDForm(NegProbeSize);
1484   // If number of blocks is small, just probe them directly.
1485   if (NumBlocks < 3) {
1486     if (!UseDForm)
1487       MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1488     for (int i = 0; i < NumBlocks; ++i)
1489       allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
1490     if (needsCFI) {
1491       // Restore using SPReg to calculate CFA.
1492       buildDefCFAReg(PrologMBB, {MI}, SPReg);
1493     }
1494   } else {
1495     // Since CTR is a volatile register and current shrinkwrap implementation
1496     // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1497     // CTR loop to probe.
1498     // Calculate trip count and stores it in CTRReg.
1499     MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
1500     BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1501         .addReg(ScratchReg, RegState::Kill);
1502     if (!UseDForm)
1503       MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1504     // Create MBBs of the loop.
1505     MachineFunction::iterator MBBInsertPoint =
1506         std::next(PrologMBB.getIterator());
1507     MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1508     MF.insert(MBBInsertPoint, LoopMBB);
1509     MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1510     MF.insert(MBBInsertPoint, ExitMBB);
1511     // Synthesize the loop body.
1512     allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1513                      UseDForm);
1514     BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1515         .addMBB(LoopMBB);
1516     LoopMBB->addSuccessor(ExitMBB);
1517     LoopMBB->addSuccessor(LoopMBB);
1518     // Synthesize the exit MBB.
1519     ExitMBB->splice(ExitMBB->end(), &PrologMBB,
1520                     std::next(MachineBasicBlock::iterator(MI)),
1521                     PrologMBB.end());
1522     ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
1523     PrologMBB.addSuccessor(LoopMBB);
1524     if (needsCFI) {
1525       // Restore using SPReg to calculate CFA.
1526       buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1527     }
1528     // Update liveins.
1529     recomputeLiveIns(*LoopMBB);
1530     recomputeLiveIns(*ExitMBB);
1531   }
1532   ++NumPrologProbed;
1533   MI.eraseFromParent();
1534 }
1535 
1536 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1537                                     MachineBasicBlock &MBB) const {
1538   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1539   DebugLoc dl;
1540 
1541   if (MBBI != MBB.end())
1542     dl = MBBI->getDebugLoc();
1543 
1544   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1545   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1546 
1547   // Get alignment info so we know how to restore the SP.
1548   const MachineFrameInfo &MFI = MF.getFrameInfo();
1549 
1550   // Get the number of bytes allocated from the FrameInfo.
1551   int FrameSize = MFI.getStackSize();
1552 
1553   // Get processor type.
1554   bool isPPC64 = Subtarget.isPPC64();
1555   // Get the ABI.
1556   bool isSVR4ABI = Subtarget.isSVR4ABI();
1557 
1558   // Check if the link register (LR) has been saved.
1559   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1560   bool MustSaveLR = FI->mustSaveLR();
1561   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1562   bool MustSaveCR = !MustSaveCRs.empty();
1563   // Do we have a frame pointer and/or base pointer for this function?
1564   bool HasFP = hasFP(MF);
1565   bool HasBP = RegInfo->hasBasePointer(MF);
1566   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1567 
1568   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1569   Register BPReg = RegInfo->getBaseRegister(MF);
1570   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1571   Register ScratchReg;
1572   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1573   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1574                                                  : PPC::MTLR );
1575   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1576                                                  : PPC::LWZ );
1577   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1578                                                            : PPC::LIS );
1579   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1580                                               : PPC::OR );
1581   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1582                                                   : PPC::ORI );
1583   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1584                                                    : PPC::ADDI );
1585   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1586                                                 : PPC::ADD4 );
1587   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1588                                                      : PPC::LWZ);
1589   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1590                                                      : PPC::MTOCRF);
1591   int LROffset = getReturnSaveOffset();
1592 
1593   int FPOffset = 0;
1594 
1595   // Using the same bool variable as below to suppress compiler warnings.
1596   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1597                                               &TempReg);
1598   assert(SingleScratchReg &&
1599          "Could not find an available scratch register");
1600 
1601   SingleScratchReg = ScratchReg == TempReg;
1602 
1603   if (HasFP) {
1604     if (isSVR4ABI) {
1605       int FPIndex = FI->getFramePointerSaveIndex();
1606       assert(FPIndex && "No Frame Pointer Save Slot!");
1607       FPOffset = MFI.getObjectOffset(FPIndex);
1608     } else {
1609       FPOffset = getFramePointerSaveOffset();
1610     }
1611   }
1612 
1613   int BPOffset = 0;
1614   if (HasBP) {
1615     if (isSVR4ABI) {
1616       int BPIndex = FI->getBasePointerSaveIndex();
1617       assert(BPIndex && "No Base Pointer Save Slot!");
1618       BPOffset = MFI.getObjectOffset(BPIndex);
1619     } else {
1620       BPOffset = getBasePointerSaveOffset();
1621     }
1622   }
1623 
1624   int PBPOffset = 0;
1625   if (FI->usesPICBase()) {
1626     int PBPIndex = FI->getPICBasePointerSaveIndex();
1627     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1628     PBPOffset = MFI.getObjectOffset(PBPIndex);
1629   }
1630 
1631   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1632 
1633   if (IsReturnBlock) {
1634     unsigned RetOpcode = MBBI->getOpcode();
1635     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1636                       RetOpcode == PPC::TCRETURNdi ||
1637                       RetOpcode == PPC::TCRETURNai ||
1638                       RetOpcode == PPC::TCRETURNri8 ||
1639                       RetOpcode == PPC::TCRETURNdi8 ||
1640                       RetOpcode == PPC::TCRETURNai8;
1641 
1642     if (UsesTCRet) {
1643       int MaxTCRetDelta = FI->getTailCallSPDelta();
1644       MachineOperand &StackAdjust = MBBI->getOperand(1);
1645       assert(StackAdjust.isImm() && "Expecting immediate value.");
1646       // Adjust stack pointer.
1647       int StackAdj = StackAdjust.getImm();
1648       int Delta = StackAdj - MaxTCRetDelta;
1649       assert((Delta >= 0) && "Delta must be positive");
1650       if (MaxTCRetDelta>0)
1651         FrameSize += (StackAdj +Delta);
1652       else
1653         FrameSize += StackAdj;
1654     }
1655   }
1656 
1657   // Frames of 32KB & larger require special handling because they cannot be
1658   // indexed into with a simple LD/LWZ immediate offset operand.
1659   bool isLargeFrame = !isInt<16>(FrameSize);
1660 
1661   // On targets without red zone, the SP needs to be restored last, so that
1662   // all live contents of the stack frame are upwards of the SP. This means
1663   // that we cannot restore SP just now, since there may be more registers
1664   // to restore from the stack frame (e.g. R31). If the frame size is not
1665   // a simple immediate value, we will need a spare register to hold the
1666   // restored SP. If the frame size is known and small, we can simply adjust
1667   // the offsets of the registers to be restored, and still use SP to restore
1668   // them. In such case, the final update of SP will be to add the frame
1669   // size to it.
1670   // To simplify the code, set RBReg to the base register used to restore
1671   // values from the stack, and set SPAdd to the value that needs to be added
1672   // to the SP at the end. The default values are as if red zone was present.
1673   unsigned RBReg = SPReg;
1674   unsigned SPAdd = 0;
1675 
1676   // Check if we can move the stack update instruction up the epilogue
1677   // past the callee saves. This will allow the move to LR instruction
1678   // to be executed before the restores of the callee saves which means
1679   // that the callee saves can hide the latency from the MTLR instrcution.
1680   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1681   if (stackUpdateCanBeMoved(MF)) {
1682     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1683     for (CalleeSavedInfo CSI : Info) {
1684       int FrIdx = CSI.getFrameIdx();
1685       // If the frame index is not negative the callee saved info belongs to a
1686       // stack object that is not a fixed stack object. We ignore non-fixed
1687       // stack objects because we won't move the update of the stack pointer
1688       // past them.
1689       if (FrIdx >= 0)
1690         continue;
1691 
1692       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1693         StackUpdateLoc--;
1694       else {
1695         // Abort the operation as we can't update all CSR restores.
1696         StackUpdateLoc = MBBI;
1697         break;
1698       }
1699     }
1700   }
1701 
1702   if (FrameSize) {
1703     // In the prologue, the loaded (or persistent) stack pointer value is
1704     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1705     // zone add this offset back now.
1706 
1707     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1708     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1709     // call which invalidates the stack pointer value in SP(0). So we use the
1710     // value of R31 in this case.
1711     if (FI->hasFastCall()) {
1712       assert(HasFP && "Expecting a valid frame pointer.");
1713       if (!HasRedZone)
1714         RBReg = FPReg;
1715       if (!isLargeFrame) {
1716         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1717           .addReg(FPReg).addImm(FrameSize);
1718       } else {
1719         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1720           .addImm(FrameSize >> 16);
1721         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1722           .addReg(ScratchReg, RegState::Kill)
1723           .addImm(FrameSize & 0xFFFF);
1724         BuildMI(MBB, MBBI, dl, AddInst)
1725           .addReg(RBReg)
1726           .addReg(FPReg)
1727           .addReg(ScratchReg);
1728       }
1729     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1730       if (HasRedZone) {
1731         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1732           .addReg(SPReg)
1733           .addImm(FrameSize);
1734       } else {
1735         // Make sure that adding FrameSize will not overflow the max offset
1736         // size.
1737         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1738                "Local offsets should be negative");
1739         SPAdd = FrameSize;
1740         FPOffset += FrameSize;
1741         BPOffset += FrameSize;
1742         PBPOffset += FrameSize;
1743       }
1744     } else {
1745       // We don't want to use ScratchReg as a base register, because it
1746       // could happen to be R0. Use FP instead, but make sure to preserve it.
1747       if (!HasRedZone) {
1748         // If FP is not saved, copy it to ScratchReg.
1749         if (!HasFP)
1750           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1751             .addReg(FPReg)
1752             .addReg(FPReg);
1753         RBReg = FPReg;
1754       }
1755       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1756         .addImm(0)
1757         .addReg(SPReg);
1758     }
1759   }
1760   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1761   // If there is no red zone, ScratchReg may be needed for holding a useful
1762   // value (although not the base register). Make sure it is not overwritten
1763   // too early.
1764 
1765   // If we need to restore both the LR and the CR and we only have one
1766   // available scratch register, we must do them one at a time.
1767   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1768     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1769     // is live here.
1770     assert(HasRedZone && "Expecting red zone");
1771     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1772       .addImm(CRSaveOffset)
1773       .addReg(SPReg);
1774     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1775       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1776         .addReg(TempReg, getKillRegState(i == e-1));
1777   }
1778 
1779   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1780   // LR is stored in the caller's stack frame. ScratchReg will be needed
1781   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1782   // a base register anyway, because it may happen to be R0.
1783   bool LoadedLR = false;
1784   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1785     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1786       .addImm(LROffset+SPAdd)
1787       .addReg(RBReg);
1788     LoadedLR = true;
1789   }
1790 
1791   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1792     assert(RBReg == SPReg && "Should be using SP as a base register");
1793     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1794       .addImm(CRSaveOffset)
1795       .addReg(RBReg);
1796   }
1797 
1798   if (HasFP) {
1799     // If there is red zone, restore FP directly, since SP has already been
1800     // restored. Otherwise, restore the value of FP into ScratchReg.
1801     if (HasRedZone || RBReg == SPReg)
1802       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1803         .addImm(FPOffset)
1804         .addReg(SPReg);
1805     else
1806       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1807         .addImm(FPOffset)
1808         .addReg(RBReg);
1809   }
1810 
1811   if (FI->usesPICBase())
1812     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1813       .addImm(PBPOffset)
1814       .addReg(RBReg);
1815 
1816   if (HasBP)
1817     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1818       .addImm(BPOffset)
1819       .addReg(RBReg);
1820 
1821   // There is nothing more to be loaded from the stack, so now we can
1822   // restore SP: SP = RBReg + SPAdd.
1823   if (RBReg != SPReg || SPAdd != 0) {
1824     assert(!HasRedZone && "This should not happen with red zone");
1825     // If SPAdd is 0, generate a copy.
1826     if (SPAdd == 0)
1827       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1828         .addReg(RBReg)
1829         .addReg(RBReg);
1830     else
1831       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1832         .addReg(RBReg)
1833         .addImm(SPAdd);
1834 
1835     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1836     if (RBReg == FPReg)
1837       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1838         .addReg(ScratchReg)
1839         .addReg(ScratchReg);
1840 
1841     // Now load the LR from the caller's stack frame.
1842     if (MustSaveLR && !LoadedLR)
1843       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1844         .addImm(LROffset)
1845         .addReg(SPReg);
1846   }
1847 
1848   if (MustSaveCR &&
1849       !(SingleScratchReg && MustSaveLR))
1850     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1851       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1852         .addReg(TempReg, getKillRegState(i == e-1));
1853 
1854   if (MustSaveLR)
1855     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1856 
1857   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1858   // call optimization
1859   if (IsReturnBlock) {
1860     unsigned RetOpcode = MBBI->getOpcode();
1861     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1862         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1863         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1864       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1865       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1866 
1867       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1868         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1869           .addReg(SPReg).addImm(CallerAllocatedAmt);
1870       } else {
1871         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1872           .addImm(CallerAllocatedAmt >> 16);
1873         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1874           .addReg(ScratchReg, RegState::Kill)
1875           .addImm(CallerAllocatedAmt & 0xFFFF);
1876         BuildMI(MBB, MBBI, dl, AddInst)
1877           .addReg(SPReg)
1878           .addReg(FPReg)
1879           .addReg(ScratchReg);
1880       }
1881     } else {
1882       createTailCallBranchInstr(MBB);
1883     }
1884   }
1885 }
1886 
1887 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1888   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1889 
1890   // If we got this far a first terminator should exist.
1891   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1892 
1893   DebugLoc dl = MBBI->getDebugLoc();
1894   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1895 
1896   // Create branch instruction for pseudo tail call return instruction.
1897   // The TCRETURNdi variants are direct calls. Valid targets for those are
1898   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1899   // since we can tail call external functions with PC-Rel (i.e. we don't need
1900   // to worry about different TOC pointers). Some of the external functions will
1901   // be MO_GlobalAddress while others like memcpy for example, are going to
1902   // be MO_ExternalSymbol.
1903   unsigned RetOpcode = MBBI->getOpcode();
1904   if (RetOpcode == PPC::TCRETURNdi) {
1905     MBBI = MBB.getLastNonDebugInstr();
1906     MachineOperand &JumpTarget = MBBI->getOperand(0);
1907     if (JumpTarget.isGlobal())
1908       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1909         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1910     else if (JumpTarget.isSymbol())
1911       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1912         addExternalSymbol(JumpTarget.getSymbolName());
1913     else
1914       llvm_unreachable("Expecting Global or External Symbol");
1915   } else if (RetOpcode == PPC::TCRETURNri) {
1916     MBBI = MBB.getLastNonDebugInstr();
1917     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1918     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1919   } else if (RetOpcode == PPC::TCRETURNai) {
1920     MBBI = MBB.getLastNonDebugInstr();
1921     MachineOperand &JumpTarget = MBBI->getOperand(0);
1922     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1923   } else if (RetOpcode == PPC::TCRETURNdi8) {
1924     MBBI = MBB.getLastNonDebugInstr();
1925     MachineOperand &JumpTarget = MBBI->getOperand(0);
1926     if (JumpTarget.isGlobal())
1927       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1928         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1929     else if (JumpTarget.isSymbol())
1930       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1931         addExternalSymbol(JumpTarget.getSymbolName());
1932     else
1933       llvm_unreachable("Expecting Global or External Symbol");
1934   } else if (RetOpcode == PPC::TCRETURNri8) {
1935     MBBI = MBB.getLastNonDebugInstr();
1936     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1937     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1938   } else if (RetOpcode == PPC::TCRETURNai8) {
1939     MBBI = MBB.getLastNonDebugInstr();
1940     MachineOperand &JumpTarget = MBBI->getOperand(0);
1941     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1942   }
1943 }
1944 
1945 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1946                                             BitVector &SavedRegs,
1947                                             RegScavenger *RS) const {
1948   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1949 
1950   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1951 
1952   //  Save and clear the LR state.
1953   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1954   unsigned LR = RegInfo->getRARegister();
1955   FI->setMustSaveLR(MustSaveLR(MF, LR));
1956   SavedRegs.reset(LR);
1957 
1958   //  Save R31 if necessary
1959   int FPSI = FI->getFramePointerSaveIndex();
1960   const bool isPPC64 = Subtarget.isPPC64();
1961   MachineFrameInfo &MFI = MF.getFrameInfo();
1962 
1963   // If the frame pointer save index hasn't been defined yet.
1964   if (!FPSI && needsFP(MF)) {
1965     // Find out what the fix offset of the frame pointer save area.
1966     int FPOffset = getFramePointerSaveOffset();
1967     // Allocate the frame index for frame pointer save area.
1968     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1969     // Save the result.
1970     FI->setFramePointerSaveIndex(FPSI);
1971   }
1972 
1973   int BPSI = FI->getBasePointerSaveIndex();
1974   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1975     int BPOffset = getBasePointerSaveOffset();
1976     // Allocate the frame index for the base pointer save area.
1977     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1978     // Save the result.
1979     FI->setBasePointerSaveIndex(BPSI);
1980   }
1981 
1982   // Reserve stack space for the PIC Base register (R30).
1983   // Only used in SVR4 32-bit.
1984   if (FI->usesPICBase()) {
1985     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1986     FI->setPICBasePointerSaveIndex(PBPSI);
1987   }
1988 
1989   // Make sure we don't explicitly spill r31, because, for example, we have
1990   // some inline asm which explicitly clobbers it, when we otherwise have a
1991   // frame pointer and are using r31's spill slot for the prologue/epilogue
1992   // code. Same goes for the base pointer and the PIC base register.
1993   if (needsFP(MF))
1994     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1995   if (RegInfo->hasBasePointer(MF))
1996     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1997   if (FI->usesPICBase())
1998     SavedRegs.reset(PPC::R30);
1999 
2000   // Reserve stack space to move the linkage area to in case of a tail call.
2001   int TCSPDelta = 0;
2002   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2003       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2004     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2005   }
2006 
2007   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2008   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2009   // object at the offset of the CR-save slot in the linkage area. The actual
2010   // save and restore of the condition register will be created as part of the
2011   // prologue and epilogue insertion, but the FixedStack object is needed to
2012   // keep the CalleSavedInfo valid.
2013   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2014        SavedRegs.test(PPC::CR4))) {
2015     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2016     const int64_t SpillOffset =
2017         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2018     int FrameIdx =
2019         MFI.CreateFixedObject(SpillSize, SpillOffset,
2020                               /* IsImmutable */ true, /* IsAliased */ false);
2021     FI->setCRSpillFrameIndex(FrameIdx);
2022   }
2023 }
2024 
2025 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2026                                                        RegScavenger *RS) const {
2027   // Get callee saved register information.
2028   MachineFrameInfo &MFI = MF.getFrameInfo();
2029   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2030 
2031   // If the function is shrink-wrapped, and if the function has a tail call, the
2032   // tail call might not be in the new RestoreBlock, so real branch instruction
2033   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2034   // RestoreBlock. So we handle this case here.
2035   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2036     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2037     for (MachineBasicBlock &MBB : MF) {
2038       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2039         createTailCallBranchInstr(MBB);
2040     }
2041   }
2042 
2043   // Early exit if no callee saved registers are modified!
2044   if (CSI.empty() && !needsFP(MF)) {
2045     addScavengingSpillSlot(MF, RS);
2046     return;
2047   }
2048 
2049   unsigned MinGPR = PPC::R31;
2050   unsigned MinG8R = PPC::X31;
2051   unsigned MinFPR = PPC::F31;
2052   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2053 
2054   bool HasGPSaveArea = false;
2055   bool HasG8SaveArea = false;
2056   bool HasFPSaveArea = false;
2057   bool HasVRSAVESaveArea = false;
2058   bool HasVRSaveArea = false;
2059 
2060   SmallVector<CalleeSavedInfo, 18> GPRegs;
2061   SmallVector<CalleeSavedInfo, 18> G8Regs;
2062   SmallVector<CalleeSavedInfo, 18> FPRegs;
2063   SmallVector<CalleeSavedInfo, 18> VRegs;
2064 
2065   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2066     unsigned Reg = CSI[i].getReg();
2067     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2068             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2069            "Not expecting to try to spill R2 in a function that must save TOC");
2070     if (PPC::GPRCRegClass.contains(Reg)) {
2071       HasGPSaveArea = true;
2072 
2073       GPRegs.push_back(CSI[i]);
2074 
2075       if (Reg < MinGPR) {
2076         MinGPR = Reg;
2077       }
2078     } else if (PPC::G8RCRegClass.contains(Reg)) {
2079       HasG8SaveArea = true;
2080 
2081       G8Regs.push_back(CSI[i]);
2082 
2083       if (Reg < MinG8R) {
2084         MinG8R = Reg;
2085       }
2086     } else if (PPC::F8RCRegClass.contains(Reg)) {
2087       HasFPSaveArea = true;
2088 
2089       FPRegs.push_back(CSI[i]);
2090 
2091       if (Reg < MinFPR) {
2092         MinFPR = Reg;
2093       }
2094     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2095                PPC::CRRCRegClass.contains(Reg)) {
2096       ; // do nothing, as we already know whether CRs are spilled
2097     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
2098       HasVRSAVESaveArea = true;
2099     } else if (PPC::VRRCRegClass.contains(Reg) ||
2100                PPC::SPERCRegClass.contains(Reg)) {
2101       // Altivec and SPE are mutually exclusive, but have the same stack
2102       // alignment requirements, so overload the save area for both cases.
2103       HasVRSaveArea = true;
2104 
2105       VRegs.push_back(CSI[i]);
2106 
2107       if (Reg < MinVR) {
2108         MinVR = Reg;
2109       }
2110     } else {
2111       llvm_unreachable("Unknown RegisterClass!");
2112     }
2113   }
2114 
2115   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2116   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2117 
2118   int64_t LowerBound = 0;
2119 
2120   // Take into account stack space reserved for tail calls.
2121   int TCSPDelta = 0;
2122   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2123       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2124     LowerBound = TCSPDelta;
2125   }
2126 
2127   // The Floating-point register save area is right below the back chain word
2128   // of the previous stack frame.
2129   if (HasFPSaveArea) {
2130     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2131       int FI = FPRegs[i].getFrameIdx();
2132 
2133       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2134     }
2135 
2136     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2137   }
2138 
2139   // Check whether the frame pointer register is allocated. If so, make sure it
2140   // is spilled to the correct offset.
2141   if (needsFP(MF)) {
2142     int FI = PFI->getFramePointerSaveIndex();
2143     assert(FI && "No Frame Pointer Save Slot!");
2144     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2145     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2146     HasGPSaveArea = true;
2147   }
2148 
2149   if (PFI->usesPICBase()) {
2150     int FI = PFI->getPICBasePointerSaveIndex();
2151     assert(FI && "No PIC Base Pointer Save Slot!");
2152     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2153 
2154     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2155     HasGPSaveArea = true;
2156   }
2157 
2158   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2159   if (RegInfo->hasBasePointer(MF)) {
2160     int FI = PFI->getBasePointerSaveIndex();
2161     assert(FI && "No Base Pointer Save Slot!");
2162     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2163 
2164     Register BP = RegInfo->getBaseRegister(MF);
2165     if (PPC::G8RCRegClass.contains(BP)) {
2166       MinG8R = std::min<unsigned>(MinG8R, BP);
2167       HasG8SaveArea = true;
2168     } else if (PPC::GPRCRegClass.contains(BP)) {
2169       MinGPR = std::min<unsigned>(MinGPR, BP);
2170       HasGPSaveArea = true;
2171     }
2172   }
2173 
2174   // General register save area starts right below the Floating-point
2175   // register save area.
2176   if (HasGPSaveArea || HasG8SaveArea) {
2177     // Move general register save area spill slots down, taking into account
2178     // the size of the Floating-point register save area.
2179     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2180       if (!GPRegs[i].isSpilledToReg()) {
2181         int FI = GPRegs[i].getFrameIdx();
2182         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2183       }
2184     }
2185 
2186     // Move general register save area spill slots down, taking into account
2187     // the size of the Floating-point register save area.
2188     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2189       if (!G8Regs[i].isSpilledToReg()) {
2190         int FI = G8Regs[i].getFrameIdx();
2191         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2192       }
2193     }
2194 
2195     unsigned MinReg =
2196       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2197                          TRI->getEncodingValue(MinG8R));
2198 
2199     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2200     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2201   }
2202 
2203   // For 32-bit only, the CR save area is below the general register
2204   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2205   // to the stack pointer and hence does not need an adjustment here.
2206   // Only CR2 (the first nonvolatile spilled) has an associated frame
2207   // index so that we have a single uniform save area.
2208   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2209     // Adjust the frame index of the CR spill slot.
2210     for (const auto &CSInfo : CSI) {
2211       if (CSInfo.getReg() == PPC::CR2) {
2212         int FI = CSInfo.getFrameIdx();
2213         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2214         break;
2215       }
2216     }
2217 
2218     LowerBound -= 4; // The CR save area is always 4 bytes long.
2219   }
2220 
2221   if (HasVRSAVESaveArea) {
2222     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2223     //             which have the VRSAVE register class?
2224     // Adjust the frame index of the VRSAVE spill slot.
2225     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2226       unsigned Reg = CSI[i].getReg();
2227 
2228       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2229         int FI = CSI[i].getFrameIdx();
2230 
2231         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2232       }
2233     }
2234 
2235     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2236   }
2237 
2238   // Both Altivec and SPE have the same alignment and padding requirements
2239   // within the stack frame.
2240   if (HasVRSaveArea) {
2241     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2242     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2243     // we are using negative number here (the stack grows downward). We should
2244     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2245     // is the alignment size ( n = 16 here) and y is the size after aligning.
2246     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2247     LowerBound &= ~(15);
2248 
2249     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2250       int FI = VRegs[i].getFrameIdx();
2251 
2252       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2253     }
2254   }
2255 
2256   addScavengingSpillSlot(MF, RS);
2257 }
2258 
2259 void
2260 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2261                                          RegScavenger *RS) const {
2262   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2263   // a large stack, which will require scavenging a register to materialize a
2264   // large offset.
2265 
2266   // We need to have a scavenger spill slot for spills if the frame size is
2267   // large. In case there is no free register for large-offset addressing,
2268   // this slot is used for the necessary emergency spill. Also, we need the
2269   // slot for dynamic stack allocations.
2270 
2271   // The scavenger might be invoked if the frame offset does not fit into
2272   // the 16-bit immediate. We don't know the complete frame size here
2273   // because we've not yet computed callee-saved register spills or the
2274   // needed alignment padding.
2275   unsigned StackSize = determineFrameLayout(MF, true);
2276   MachineFrameInfo &MFI = MF.getFrameInfo();
2277   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2278       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2279     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2280     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2281     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2282     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2283     unsigned Size = TRI.getSpillSize(RC);
2284     Align Alignment = TRI.getSpillAlign(RC);
2285     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2286 
2287     // Might we have over-aligned allocas?
2288     bool HasAlVars =
2289         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2290 
2291     // These kinds of spills might need two registers.
2292     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2293       RS->addScavengingFrameIndex(
2294           MFI.CreateStackObject(Size, Alignment, false));
2295   }
2296 }
2297 
2298 // This function checks if a callee saved gpr can be spilled to a volatile
2299 // vector register. This occurs for leaf functions when the option
2300 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2301 // which were not spilled to vectors, return false so the target independent
2302 // code can handle them by assigning a FrameIdx to a stack slot.
2303 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2304     MachineFunction &MF, const TargetRegisterInfo *TRI,
2305     std::vector<CalleeSavedInfo> &CSI) const {
2306 
2307   if (CSI.empty())
2308     return true; // Early exit if no callee saved registers are modified!
2309 
2310   // Early exit if cannot spill gprs to volatile vector registers.
2311   MachineFrameInfo &MFI = MF.getFrameInfo();
2312   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2313     return false;
2314 
2315   // Build a BitVector of VSRs that can be used for spilling GPRs.
2316   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2317   BitVector BVCalleeSaved(TRI->getNumRegs());
2318   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2319   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2320   for (unsigned i = 0; CSRegs[i]; ++i)
2321     BVCalleeSaved.set(CSRegs[i]);
2322 
2323   for (unsigned Reg : BVAllocatable.set_bits()) {
2324     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2325     // used in the function.
2326     if (BVCalleeSaved[Reg] ||
2327         (!PPC::F8RCRegClass.contains(Reg) &&
2328          !PPC::VFRCRegClass.contains(Reg)) ||
2329         (MF.getRegInfo().isPhysRegUsed(Reg)))
2330       BVAllocatable.reset(Reg);
2331   }
2332 
2333   bool AllSpilledToReg = true;
2334   for (auto &CS : CSI) {
2335     if (BVAllocatable.none())
2336       return false;
2337 
2338     unsigned Reg = CS.getReg();
2339     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2340       AllSpilledToReg = false;
2341       continue;
2342     }
2343 
2344     unsigned VolatileVFReg = BVAllocatable.find_first();
2345     if (VolatileVFReg < BVAllocatable.size()) {
2346       CS.setDstReg(VolatileVFReg);
2347       BVAllocatable.reset(VolatileVFReg);
2348     } else {
2349       AllSpilledToReg = false;
2350     }
2351   }
2352   return AllSpilledToReg;
2353 }
2354 
2355 bool PPCFrameLowering::spillCalleeSavedRegisters(
2356     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2357     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2358 
2359   MachineFunction *MF = MBB.getParent();
2360   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2361   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2362   bool MustSaveTOC = FI->mustSaveTOC();
2363   DebugLoc DL;
2364   bool CRSpilled = false;
2365   MachineInstrBuilder CRMIB;
2366 
2367   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2368     unsigned Reg = CSI[i].getReg();
2369     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2370     if (Reg == PPC::VRSAVE)
2371       continue;
2372 
2373     // CR2 through CR4 are the nonvolatile CR fields.
2374     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2375 
2376     // Add the callee-saved register as live-in; it's killed at the spill.
2377     // Do not do this for callee-saved registers that are live-in to the
2378     // function because they will already be marked live-in and this will be
2379     // adding it for a second time. It is an error to add the same register
2380     // to the set more than once.
2381     const MachineRegisterInfo &MRI = MF->getRegInfo();
2382     bool IsLiveIn = MRI.isLiveIn(Reg);
2383     if (!IsLiveIn)
2384        MBB.addLiveIn(Reg);
2385 
2386     if (CRSpilled && IsCRField) {
2387       CRMIB.addReg(Reg, RegState::ImplicitKill);
2388       continue;
2389     }
2390 
2391     // The actual spill will happen in the prologue.
2392     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2393       continue;
2394 
2395     // Insert the spill to the stack frame.
2396     if (IsCRField) {
2397       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2398       if (!Subtarget.is32BitELFABI()) {
2399         // The actual spill will happen at the start of the prologue.
2400         FuncInfo->addMustSaveCR(Reg);
2401       } else {
2402         CRSpilled = true;
2403         FuncInfo->setSpillsCR();
2404 
2405         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2406         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2407         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2408                   .addReg(Reg, RegState::ImplicitKill);
2409 
2410         MBB.insert(MI, CRMIB);
2411         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2412                                          .addReg(PPC::R12,
2413                                                  getKillRegState(true)),
2414                                          CSI[i].getFrameIdx()));
2415       }
2416     } else {
2417       if (CSI[i].isSpilledToReg()) {
2418         NumPESpillVSR++;
2419         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2420           .addReg(Reg, getKillRegState(true));
2421       } else {
2422         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2423         // Use !IsLiveIn for the kill flag.
2424         // We do not want to kill registers that are live in this function
2425         // before their use because they will become undefined registers.
2426         // Functions without NoUnwind need to preserve the order of elements in
2427         // saved vector registers.
2428         if (Subtarget.needsSwapsForVSXMemOps() &&
2429             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2430           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2431                                        CSI[i].getFrameIdx(), RC, TRI);
2432         else
2433           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2434                                   RC, TRI);
2435       }
2436     }
2437   }
2438   return true;
2439 }
2440 
2441 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2442                        bool CR4Spilled, MachineBasicBlock &MBB,
2443                        MachineBasicBlock::iterator MI,
2444                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2445 
2446   MachineFunction *MF = MBB.getParent();
2447   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2448   DebugLoc DL;
2449   unsigned MoveReg = PPC::R12;
2450 
2451   // 32-bit:  FP-relative
2452   MBB.insert(MI,
2453              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2454                                CSI[CSIIndex].getFrameIdx()));
2455 
2456   unsigned RestoreOp = PPC::MTOCRF;
2457   if (CR2Spilled)
2458     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2459                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2460 
2461   if (CR3Spilled)
2462     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2463                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2464 
2465   if (CR4Spilled)
2466     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2467                .addReg(MoveReg, getKillRegState(true)));
2468 }
2469 
2470 MachineBasicBlock::iterator PPCFrameLowering::
2471 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2472                               MachineBasicBlock::iterator I) const {
2473   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2474   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2475       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2476     // Add (actually subtract) back the amount the callee popped on return.
2477     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2478       bool is64Bit = Subtarget.isPPC64();
2479       CalleeAmt *= -1;
2480       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2481       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2482       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2483       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2484       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2485       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2486       const DebugLoc &dl = I->getDebugLoc();
2487 
2488       if (isInt<16>(CalleeAmt)) {
2489         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2490           .addReg(StackReg, RegState::Kill)
2491           .addImm(CalleeAmt);
2492       } else {
2493         MachineBasicBlock::iterator MBBI = I;
2494         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2495           .addImm(CalleeAmt >> 16);
2496         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2497           .addReg(TmpReg, RegState::Kill)
2498           .addImm(CalleeAmt & 0xFFFF);
2499         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2500           .addReg(StackReg, RegState::Kill)
2501           .addReg(TmpReg);
2502       }
2503     }
2504   }
2505   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2506   return MBB.erase(I);
2507 }
2508 
2509 static bool isCalleeSavedCR(unsigned Reg) {
2510   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2511 }
2512 
2513 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2514     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2515     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2516   MachineFunction *MF = MBB.getParent();
2517   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2518   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2519   bool MustSaveTOC = FI->mustSaveTOC();
2520   bool CR2Spilled = false;
2521   bool CR3Spilled = false;
2522   bool CR4Spilled = false;
2523   unsigned CSIIndex = 0;
2524 
2525   // Initialize insertion-point logic; we will be restoring in reverse
2526   // order of spill.
2527   MachineBasicBlock::iterator I = MI, BeforeI = I;
2528   bool AtStart = I == MBB.begin();
2529 
2530   if (!AtStart)
2531     --BeforeI;
2532 
2533   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2534     unsigned Reg = CSI[i].getReg();
2535 
2536     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2537     if (Reg == PPC::VRSAVE)
2538       continue;
2539 
2540     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2541       continue;
2542 
2543     // Restore of callee saved condition register field is handled during
2544     // epilogue insertion.
2545     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2546       continue;
2547 
2548     if (Reg == PPC::CR2) {
2549       CR2Spilled = true;
2550       // The spill slot is associated only with CR2, which is the
2551       // first nonvolatile spilled.  Save it here.
2552       CSIIndex = i;
2553       continue;
2554     } else if (Reg == PPC::CR3) {
2555       CR3Spilled = true;
2556       continue;
2557     } else if (Reg == PPC::CR4) {
2558       CR4Spilled = true;
2559       continue;
2560     } else {
2561       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2562       // least one CR register, restore all spilled CRs together.
2563       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2564         bool is31 = needsFP(*MF);
2565         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2566                    CSIIndex);
2567         CR2Spilled = CR3Spilled = CR4Spilled = false;
2568       }
2569 
2570       if (CSI[i].isSpilledToReg()) {
2571         DebugLoc DL;
2572         NumPEReloadVSR++;
2573         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2574             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2575       } else {
2576        // Default behavior for non-CR saves.
2577         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2578 
2579         // Functions without NoUnwind need to preserve the order of elements in
2580         // saved vector registers.
2581         if (Subtarget.needsSwapsForVSXMemOps() &&
2582             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2583           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2584                                         TRI);
2585         else
2586           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2587 
2588         assert(I != MBB.begin() &&
2589                "loadRegFromStackSlot didn't insert any code!");
2590       }
2591     }
2592 
2593     // Insert in reverse order.
2594     if (AtStart)
2595       I = MBB.begin();
2596     else {
2597       I = BeforeI;
2598       ++I;
2599     }
2600   }
2601 
2602   // If we haven't yet spilled the CRs, do so now.
2603   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2604     assert(Subtarget.is32BitELFABI() &&
2605            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2606     bool is31 = needsFP(*MF);
2607     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2608   }
2609 
2610   return true;
2611 }
2612 
2613 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2614   return TOCSaveOffset;
2615 }
2616 
2617 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2618   return FramePointerSaveOffset;
2619 }
2620 
2621 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2622   return BasePointerSaveOffset;
2623 }
2624 
2625 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2626   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2627     return false;
2628   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2629           MF.getSubtarget<PPCSubtarget>().isPPC64());
2630 }
2631