1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCReturnProtectorLowering.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/LivePhysRegs.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineModuleInfo.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/Target/TargetOptions.h"
31
32 using namespace llvm;
33
34 #define DEBUG_TYPE "framelowering"
35 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
36 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
37 STATISTIC(NumPrologProbed, "Number of prologues probed");
38
39 static cl::opt<bool>
40 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
41 cl::desc("Enable spills in prologue to vector registers."),
42 cl::init(false), cl::Hidden);
43
computeReturnSaveOffset(const PPCSubtarget & STI)44 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
45 if (STI.isAIXABI())
46 return STI.isPPC64() ? 16 : 8;
47 // SVR4 ABI:
48 return STI.isPPC64() ? 16 : 4;
49 }
50
computeTOCSaveOffset(const PPCSubtarget & STI)51 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
52 if (STI.isAIXABI())
53 return STI.isPPC64() ? 40 : 20;
54 return STI.isELFv2ABI() ? 24 : 40;
55 }
56
computeFramePointerSaveOffset(const PPCSubtarget & STI)57 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
58 // First slot in the general register save area.
59 return STI.isPPC64() ? -8U : -4U;
60 }
61
computeLinkageSize(const PPCSubtarget & STI)62 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
63 if (STI.isAIXABI() || STI.isPPC64())
64 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
65
66 // 32-bit SVR4 ABI:
67 return 8;
68 }
69
computeBasePointerSaveOffset(const PPCSubtarget & STI)70 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
71 // Third slot in the general purpose register save area.
72 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
73 return -12U;
74
75 // Second slot in the general purpose register save area.
76 return STI.isPPC64() ? -16U : -8U;
77 }
78
computeCRSaveOffset(const PPCSubtarget & STI)79 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
80 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
81 }
82
PPCFrameLowering(const PPCSubtarget & STI)83 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
84 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
85 STI.getPlatformStackAlignment(), 0),
86 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
87 TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
88 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
89 LinkageSize(computeLinkageSize(Subtarget)),
90 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
91 CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
92
93 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
getCalleeSavedSpillSlots(unsigned & NumEntries) const94 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
95 unsigned &NumEntries) const {
96
97 // Floating-point register save area offsets.
98 #define CALLEE_SAVED_FPRS \
99 {PPC::F31, -8}, \
100 {PPC::F30, -16}, \
101 {PPC::F29, -24}, \
102 {PPC::F28, -32}, \
103 {PPC::F27, -40}, \
104 {PPC::F26, -48}, \
105 {PPC::F25, -56}, \
106 {PPC::F24, -64}, \
107 {PPC::F23, -72}, \
108 {PPC::F22, -80}, \
109 {PPC::F21, -88}, \
110 {PPC::F20, -96}, \
111 {PPC::F19, -104}, \
112 {PPC::F18, -112}, \
113 {PPC::F17, -120}, \
114 {PPC::F16, -128}, \
115 {PPC::F15, -136}, \
116 {PPC::F14, -144}
117
118 // 32-bit general purpose register save area offsets shared by ELF and
119 // AIX. AIX has an extra CSR with r13.
120 #define CALLEE_SAVED_GPRS32 \
121 {PPC::R31, -4}, \
122 {PPC::R30, -8}, \
123 {PPC::R29, -12}, \
124 {PPC::R28, -16}, \
125 {PPC::R27, -20}, \
126 {PPC::R26, -24}, \
127 {PPC::R25, -28}, \
128 {PPC::R24, -32}, \
129 {PPC::R23, -36}, \
130 {PPC::R22, -40}, \
131 {PPC::R21, -44}, \
132 {PPC::R20, -48}, \
133 {PPC::R19, -52}, \
134 {PPC::R18, -56}, \
135 {PPC::R17, -60}, \
136 {PPC::R16, -64}, \
137 {PPC::R15, -68}, \
138 {PPC::R14, -72}
139
140 // 64-bit general purpose register save area offsets.
141 #define CALLEE_SAVED_GPRS64 \
142 {PPC::X31, -8}, \
143 {PPC::X30, -16}, \
144 {PPC::X29, -24}, \
145 {PPC::X28, -32}, \
146 {PPC::X27, -40}, \
147 {PPC::X26, -48}, \
148 {PPC::X25, -56}, \
149 {PPC::X24, -64}, \
150 {PPC::X23, -72}, \
151 {PPC::X22, -80}, \
152 {PPC::X21, -88}, \
153 {PPC::X20, -96}, \
154 {PPC::X19, -104}, \
155 {PPC::X18, -112}, \
156 {PPC::X17, -120}, \
157 {PPC::X16, -128}, \
158 {PPC::X15, -136}, \
159 {PPC::X14, -144}
160
161 // Vector register save area offsets.
162 #define CALLEE_SAVED_VRS \
163 {PPC::V31, -16}, \
164 {PPC::V30, -32}, \
165 {PPC::V29, -48}, \
166 {PPC::V28, -64}, \
167 {PPC::V27, -80}, \
168 {PPC::V26, -96}, \
169 {PPC::V25, -112}, \
170 {PPC::V24, -128}, \
171 {PPC::V23, -144}, \
172 {PPC::V22, -160}, \
173 {PPC::V21, -176}, \
174 {PPC::V20, -192}
175
176 // Note that the offsets here overlap, but this is fixed up in
177 // processFunctionBeforeFrameFinalized.
178
179 static const SpillSlot ELFOffsets32[] = {
180 CALLEE_SAVED_FPRS,
181 CALLEE_SAVED_GPRS32,
182
183 // CR save area offset. We map each of the nonvolatile CR fields
184 // to the slot for CR2, which is the first of the nonvolatile CR
185 // fields to be assigned, so that we only allocate one save slot.
186 // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
187 {PPC::CR2, -4},
188
189 // VRSAVE save area offset.
190 {PPC::VRSAVE, -4},
191
192 CALLEE_SAVED_VRS,
193
194 // SPE register save area (overlaps Vector save area).
195 {PPC::S31, -8},
196 {PPC::S30, -16},
197 {PPC::S29, -24},
198 {PPC::S28, -32},
199 {PPC::S27, -40},
200 {PPC::S26, -48},
201 {PPC::S25, -56},
202 {PPC::S24, -64},
203 {PPC::S23, -72},
204 {PPC::S22, -80},
205 {PPC::S21, -88},
206 {PPC::S20, -96},
207 {PPC::S19, -104},
208 {PPC::S18, -112},
209 {PPC::S17, -120},
210 {PPC::S16, -128},
211 {PPC::S15, -136},
212 {PPC::S14, -144}};
213
214 static const SpillSlot ELFOffsets64[] = {
215 CALLEE_SAVED_FPRS,
216 CALLEE_SAVED_GPRS64,
217
218 // VRSAVE save area offset.
219 {PPC::VRSAVE, -4},
220 CALLEE_SAVED_VRS
221 };
222
223 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
224 CALLEE_SAVED_GPRS32,
225 // Add AIX's extra CSR.
226 {PPC::R13, -76},
227 CALLEE_SAVED_VRS};
228
229 static const SpillSlot AIXOffsets64[] = {
230 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
231
232 if (Subtarget.is64BitELFABI()) {
233 NumEntries = std::size(ELFOffsets64);
234 return ELFOffsets64;
235 }
236
237 if (Subtarget.is32BitELFABI()) {
238 NumEntries = std::size(ELFOffsets32);
239 return ELFOffsets32;
240 }
241
242 assert(Subtarget.isAIXABI() && "Unexpected ABI.");
243
244 if (Subtarget.isPPC64()) {
245 NumEntries = std::size(AIXOffsets64);
246 return AIXOffsets64;
247 }
248
249 NumEntries = std::size(AIXOffsets32);
250 return AIXOffsets32;
251 }
252
spillsCR(const MachineFunction & MF)253 static bool spillsCR(const MachineFunction &MF) {
254 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
255 return FuncInfo->isCRSpilled();
256 }
257
hasSpills(const MachineFunction & MF)258 static bool hasSpills(const MachineFunction &MF) {
259 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
260 return FuncInfo->hasSpills();
261 }
262
hasNonRISpills(const MachineFunction & MF)263 static bool hasNonRISpills(const MachineFunction &MF) {
264 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
265 return FuncInfo->hasNonRISpills();
266 }
267
268 /// MustSaveLR - Return true if this function requires that we save the LR
269 /// register onto the stack in the prolog and restore it in the epilog of the
270 /// function.
MustSaveLR(const MachineFunction & MF,unsigned LR)271 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
272 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
273
274 // We need a save/restore of LR if there is any def of LR (which is
275 // defined by calls, including the PIC setup sequence), or if there is
276 // some use of the LR stack slot (e.g. for builtin_return_address).
277 // (LR comes in 32 and 64 bit versions.)
278 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
279 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
280 }
281
282 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
283 /// call frame size. Update the MachineFunction object with the stack size.
284 uint64_t
determineFrameLayoutAndUpdate(MachineFunction & MF,bool UseEstimate) const285 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
286 bool UseEstimate) const {
287 unsigned NewMaxCallFrameSize = 0;
288 uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
289 &NewMaxCallFrameSize);
290 MF.getFrameInfo().setStackSize(FrameSize);
291 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
292 return FrameSize;
293 }
294
295 /// determineFrameLayout - Determine the size of the frame and maximum call
296 /// frame size.
297 uint64_t
determineFrameLayout(const MachineFunction & MF,bool UseEstimate,unsigned * NewMaxCallFrameSize) const298 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
299 bool UseEstimate,
300 unsigned *NewMaxCallFrameSize) const {
301 const MachineFrameInfo &MFI = MF.getFrameInfo();
302 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
303
304 // Get the number of bytes to allocate from the FrameInfo
305 uint64_t FrameSize =
306 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
307
308 // Get stack alignments. The frame must be aligned to the greatest of these:
309 Align TargetAlign = getStackAlign(); // alignment required per the ABI
310 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
311 Align Alignment = std::max(TargetAlign, MaxAlign);
312
313 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
314
315 unsigned LR = RegInfo->getRARegister();
316 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
317 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
318 !MFI.adjustsStack() && // No calls.
319 !MustSaveLR(MF, LR) && // No need to save LR.
320 !FI->mustSaveTOC() && // No need to save TOC.
321 !RegInfo->hasBasePointer(MF); // No special alignment.
322
323 // Note: for PPC32 SVR4ABI, we can still generate stackless
324 // code if all local vars are reg-allocated.
325 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
326
327 // Check whether we can skip adjusting the stack pointer (by using red zone)
328 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
329 // No need for frame
330 return 0;
331 }
332
333 // Get the maximum call frame size of all the calls.
334 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
335
336 // Maximum call frame needs to be at least big enough for linkage area.
337 unsigned minCallFrameSize = getLinkageSize();
338 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
339
340 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
341 // that allocations will be aligned.
342 if (MFI.hasVarSizedObjects())
343 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
344
345 // Update the new max call frame size if the caller passes in a valid pointer.
346 if (NewMaxCallFrameSize)
347 *NewMaxCallFrameSize = maxCallFrameSize;
348
349 // Include call frame size in total.
350 FrameSize += maxCallFrameSize;
351
352 // Make sure the frame is aligned.
353 FrameSize = alignTo(FrameSize, Alignment);
354
355 return FrameSize;
356 }
357
358 // hasFP - Return true if the specified function actually has a dedicated frame
359 // pointer register.
hasFP(const MachineFunction & MF) const360 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
361 const MachineFrameInfo &MFI = MF.getFrameInfo();
362 // FIXME: This is pretty much broken by design: hasFP() might be called really
363 // early, before the stack layout was calculated and thus hasFP() might return
364 // true or false here depending on the time of call.
365 return (MFI.getStackSize()) && needsFP(MF);
366 }
367
368 // needsFP - Return true if the specified function should have a dedicated frame
369 // pointer register. This is true if the function has variable sized allocas or
370 // if frame pointer elimination is disabled.
needsFP(const MachineFunction & MF) const371 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
372 const MachineFrameInfo &MFI = MF.getFrameInfo();
373
374 // Naked functions have no stack frame pushed, so we don't have a frame
375 // pointer.
376 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
377 return false;
378
379 return MF.getTarget().Options.DisableFramePointerElim(MF) ||
380 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
381 MF.exposesReturnsTwice() ||
382 (MF.getTarget().Options.GuaranteedTailCallOpt &&
383 MF.getInfo<PPCFunctionInfo>()->hasFastCall());
384 }
385
replaceFPWithRealFP(MachineFunction & MF) const386 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
387 bool is31 = needsFP(MF);
388 unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
389 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
390
391 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
392 bool HasBP = RegInfo->hasBasePointer(MF);
393 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
394 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
395
396 for (MachineBasicBlock &MBB : MF)
397 for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
398 --MBBI;
399 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
400 MachineOperand &MO = MBBI->getOperand(I);
401 if (!MO.isReg())
402 continue;
403
404 switch (MO.getReg()) {
405 case PPC::FP:
406 MO.setReg(FPReg);
407 break;
408 case PPC::FP8:
409 MO.setReg(FP8Reg);
410 break;
411 case PPC::BP:
412 MO.setReg(BPReg);
413 break;
414 case PPC::BP8:
415 MO.setReg(BP8Reg);
416 break;
417
418 }
419 }
420 }
421 }
422
423 /* This function will do the following:
424 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
425 respectively (defaults recommended by the ABI) and return true
426 - If MBB is not an entry block, initialize the register scavenger and look
427 for available registers.
428 - If the defaults (R0/R12) are available, return true
429 - If TwoUniqueRegsRequired is set to true, it looks for two unique
430 registers. Otherwise, look for a single available register.
431 - If the required registers are found, set SR1 and SR2 and return true.
432 - If the required registers are not found, set SR2 or both SR1 and SR2 to
433 PPC::NoRegister and return false.
434
435 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
436 is not set, this function will attempt to find two different registers, but
437 still return true if only one register is available (and set SR1 == SR2).
438 */
439 bool
findScratchRegister(MachineBasicBlock * MBB,bool UseAtEnd,bool TwoUniqueRegsRequired,Register * SR1,Register * SR2) const440 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
441 bool UseAtEnd,
442 bool TwoUniqueRegsRequired,
443 Register *SR1,
444 Register *SR2) const {
445 RegScavenger RS;
446 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
447 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
448
449 // Set the defaults for the two scratch registers.
450 if (SR1)
451 *SR1 = R0;
452
453 if (SR2) {
454 assert (SR1 && "Asking for the second scratch register but not the first?");
455 *SR2 = R12;
456 }
457
458 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
459 if ((UseAtEnd && MBB->isReturnBlock()) ||
460 (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
461 return true;
462
463 RS.enterBasicBlock(*MBB);
464
465 if (UseAtEnd && !MBB->empty()) {
466 // The scratch register will be used at the end of the block, so must
467 // consider all registers used within the block
468
469 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
470 // If no terminator, back iterator up to previous instruction.
471 if (MBBI == MBB->end())
472 MBBI = std::prev(MBBI);
473
474 if (MBBI != MBB->begin())
475 RS.forward(MBBI);
476 }
477
478 // If the two registers are available, we're all good.
479 // Note that we only return here if both R0 and R12 are available because
480 // although the function may not require two unique registers, it may benefit
481 // from having two so we should try to provide them.
482 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
483 return true;
484
485 // Get the list of callee-saved registers for the target.
486 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
487 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
488
489 // Get all the available registers in the block.
490 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
491 &PPC::GPRCRegClass);
492
493 // We shouldn't use callee-saved registers as scratch registers as they may be
494 // available when looking for a candidate block for shrink wrapping but not
495 // available when the actual prologue/epilogue is being emitted because they
496 // were added as live-in to the prologue block by PrologueEpilogueInserter.
497 for (int i = 0; CSRegs[i]; ++i)
498 BV.reset(CSRegs[i]);
499
500 // Set the first scratch register to the first available one.
501 if (SR1) {
502 int FirstScratchReg = BV.find_first();
503 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
504 }
505
506 // If there is another one available, set the second scratch register to that.
507 // Otherwise, set it to either PPC::NoRegister if this function requires two
508 // or to whatever SR1 is set to if this function doesn't require two.
509 if (SR2) {
510 int SecondScratchReg = BV.find_next(*SR1);
511 if (SecondScratchReg != -1)
512 *SR2 = SecondScratchReg;
513 else
514 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
515 }
516
517 // Now that we've done our best to provide both registers, double check
518 // whether we were unable to provide enough.
519 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
520 return false;
521
522 return true;
523 }
524
525 // We need a scratch register for spilling LR and for spilling CR. By default,
526 // we use two scratch registers to hide latency. However, if only one scratch
527 // register is available, we can adjust for that by not overlapping the spill
528 // code. However, if we need to realign the stack (i.e. have a base pointer)
529 // and the stack frame is large, we need two scratch registers.
530 // Also, stack probe requires two scratch registers, one for old sp, one for
531 // large frame and large probe size.
532 bool
twoUniqueScratchRegsRequired(MachineBasicBlock * MBB) const533 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
534 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
535 MachineFunction &MF = *(MBB->getParent());
536 bool HasBP = RegInfo->hasBasePointer(MF);
537 unsigned FrameSize = determineFrameLayout(MF);
538 int NegFrameSize = -FrameSize;
539 bool IsLargeFrame = !isInt<16>(NegFrameSize);
540 MachineFrameInfo &MFI = MF.getFrameInfo();
541 Align MaxAlign = MFI.getMaxAlign();
542 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
543 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
544
545 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
546 TLI.hasInlineStackProbe(MF);
547 }
548
canUseAsPrologue(const MachineBasicBlock & MBB) const549 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
550 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
551
552 return findScratchRegister(TmpMBB, false,
553 twoUniqueScratchRegsRequired(TmpMBB));
554 }
555
canUseAsEpilogue(const MachineBasicBlock & MBB) const556 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
557 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
558
559 return findScratchRegister(TmpMBB, true);
560 }
561
stackUpdateCanBeMoved(MachineFunction & MF) const562 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
563 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
564 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
565
566 // Abort if there is no register info or function info.
567 if (!RegInfo || !FI)
568 return false;
569
570 // Only move the stack update on ELFv2 ABI and PPC64.
571 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
572 return false;
573
574 // Check the frame size first and return false if it does not fit the
575 // requirements.
576 // We need a non-zero frame size as well as a frame that will fit in the red
577 // zone. This is because by moving the stack pointer update we are now storing
578 // to the red zone until the stack pointer is updated. If we get an interrupt
579 // inside the prologue but before the stack update we now have a number of
580 // stores to the red zone and those stores must all fit.
581 MachineFrameInfo &MFI = MF.getFrameInfo();
582 unsigned FrameSize = MFI.getStackSize();
583 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
584 return false;
585
586 // Frame pointers and base pointers complicate matters so don't do anything
587 // if we have them. For example having a frame pointer will sometimes require
588 // a copy of r1 into r31 and that makes keeping track of updates to r1 more
589 // difficult. Similar situation exists with setjmp.
590 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
591 return false;
592
593 // Calls to fast_cc functions use different rules for passing parameters on
594 // the stack from the ABI and using PIC base in the function imposes
595 // similar restrictions to using the base pointer. It is not generally safe
596 // to move the stack pointer update in these situations.
597 if (FI->hasFastCall() || FI->usesPICBase())
598 return false;
599
600 // Finally we can move the stack update if we do not require register
601 // scavenging. Register scavenging can introduce more spills and so
602 // may make the frame size larger than we have computed.
603 return !RegInfo->requiresFrameIndexScavenging(MF);
604 }
605
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const606 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
607 MachineBasicBlock &MBB) const {
608 MachineBasicBlock::iterator MBBI = MBB.begin();
609 MachineFrameInfo &MFI = MF.getFrameInfo();
610 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
611 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
612 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
613
614 MachineModuleInfo &MMI = MF.getMMI();
615 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
616 DebugLoc dl;
617 // AIX assembler does not support cfi directives.
618 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
619
620 const bool HasFastMFLR = Subtarget.hasFastMFLR();
621
622 // Get processor type.
623 bool isPPC64 = Subtarget.isPPC64();
624 // Get the ABI.
625 bool isSVR4ABI = Subtarget.isSVR4ABI();
626 bool isELFv2ABI = Subtarget.isELFv2ABI();
627 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
628
629 // Work out frame sizes.
630 uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
631 int64_t NegFrameSize = -FrameSize;
632 if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
633 llvm_unreachable("Unhandled stack size!");
634
635 if (MFI.isFrameAddressTaken())
636 replaceFPWithRealFP(MF);
637
638 // Check if the link register (LR) must be saved.
639 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
640 bool MustSaveLR = FI->mustSaveLR();
641 bool MustSaveTOC = FI->mustSaveTOC();
642 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
643 bool MustSaveCR = !MustSaveCRs.empty();
644 // Do we have a frame pointer and/or base pointer for this function?
645 bool HasFP = hasFP(MF);
646 bool HasBP = RegInfo->hasBasePointer(MF);
647 bool HasRedZone = isPPC64 || !isSVR4ABI;
648 bool HasROPProtect = Subtarget.hasROPProtect();
649 bool HasPrivileged = Subtarget.hasPrivileged();
650
651 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
652 Register BPReg = RegInfo->getBaseRegister(MF);
653 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
654 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
655 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
656 Register ScratchReg;
657 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
658 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
659 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
660 : PPC::MFLR );
661 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
662 : PPC::STW );
663 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
664 : PPC::STWU );
665 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
666 : PPC::STWUX);
667 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
668 : PPC::OR );
669 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
670 : PPC::SUBFC);
671 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
672 : PPC::SUBFIC);
673 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
674 : PPC::MFCR);
675 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
676 const MCInstrDesc &HashST =
677 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
678 : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
679
680 // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
681 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
682 // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
683 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
684 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
685 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
686
687 // Using the same bool variable as below to suppress compiler warnings.
688 bool SingleScratchReg = findScratchRegister(
689 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
690 assert(SingleScratchReg &&
691 "Required number of registers not available in this block");
692
693 SingleScratchReg = ScratchReg == TempReg;
694
695 int64_t LROffset = getReturnSaveOffset();
696
697 int64_t FPOffset = 0;
698 if (HasFP) {
699 MachineFrameInfo &MFI = MF.getFrameInfo();
700 int FPIndex = FI->getFramePointerSaveIndex();
701 assert(FPIndex && "No Frame Pointer Save Slot!");
702 FPOffset = MFI.getObjectOffset(FPIndex);
703 }
704
705 int64_t BPOffset = 0;
706 if (HasBP) {
707 MachineFrameInfo &MFI = MF.getFrameInfo();
708 int BPIndex = FI->getBasePointerSaveIndex();
709 assert(BPIndex && "No Base Pointer Save Slot!");
710 BPOffset = MFI.getObjectOffset(BPIndex);
711 }
712
713 int64_t PBPOffset = 0;
714 if (FI->usesPICBase()) {
715 MachineFrameInfo &MFI = MF.getFrameInfo();
716 int PBPIndex = FI->getPICBasePointerSaveIndex();
717 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
718 PBPOffset = MFI.getObjectOffset(PBPIndex);
719 }
720
721 // Get stack alignments.
722 Align MaxAlign = MFI.getMaxAlign();
723 if (HasBP && MaxAlign > 1)
724 assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
725
726 // Frames of 32KB & larger require special handling because they cannot be
727 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
728 bool isLargeFrame = !isInt<16>(NegFrameSize);
729
730 // Check if we can move the stack update instruction (stdu) down the prologue
731 // past the callee saves. Hopefully this will avoid the situation where the
732 // saves are waiting for the update on the store with update to complete.
733 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
734 bool MovingStackUpdateDown = false;
735
736 // Check if we can move the stack update.
737 if (stackUpdateCanBeMoved(MF)) {
738 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
739 for (CalleeSavedInfo CSI : Info) {
740 // If the callee saved register is spilled to a register instead of the
741 // stack then the spill no longer uses the stack pointer.
742 // This can lead to two consequences:
743 // 1) We no longer need to update the stack because the function does not
744 // spill any callee saved registers to stack.
745 // 2) We have a situation where we still have to update the stack pointer
746 // even though some registers are spilled to other registers. In
747 // this case the current code moves the stack update to an incorrect
748 // position.
749 // In either case we should abort moving the stack update operation.
750 if (CSI.isSpilledToReg()) {
751 StackUpdateLoc = MBBI;
752 MovingStackUpdateDown = false;
753 break;
754 }
755
756 int FrIdx = CSI.getFrameIdx();
757 // If the frame index is not negative the callee saved info belongs to a
758 // stack object that is not a fixed stack object. We ignore non-fixed
759 // stack objects because we won't move the stack update pointer past them.
760 if (FrIdx >= 0)
761 continue;
762
763 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
764 StackUpdateLoc++;
765 MovingStackUpdateDown = true;
766 } else {
767 // We need all of the Frame Indices to meet these conditions.
768 // If they do not, abort the whole operation.
769 StackUpdateLoc = MBBI;
770 MovingStackUpdateDown = false;
771 break;
772 }
773 }
774
775 // If the operation was not aborted then update the object offset.
776 if (MovingStackUpdateDown) {
777 for (CalleeSavedInfo CSI : Info) {
778 int FrIdx = CSI.getFrameIdx();
779 if (FrIdx < 0)
780 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
781 }
782 }
783 }
784
785 // Where in the prologue we move the CR fields depends on how many scratch
786 // registers we have, and if we need to save the link register or not. This
787 // lambda is to avoid duplicating the logic in 2 places.
788 auto BuildMoveFromCR = [&]() {
789 if (isELFv2ABI && MustSaveCRs.size() == 1) {
790 // In the ELFv2 ABI, we are not required to save all CR fields.
791 // If only one CR field is clobbered, it is more efficient to use
792 // mfocrf to selectively save just that field, because mfocrf has short
793 // latency compares to mfcr.
794 assert(isPPC64 && "V2 ABI is 64-bit only.");
795 MachineInstrBuilder MIB =
796 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
797 MIB.addReg(MustSaveCRs[0], RegState::Kill);
798 } else {
799 MachineInstrBuilder MIB =
800 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
801 for (unsigned CRfield : MustSaveCRs)
802 MIB.addReg(CRfield, RegState::ImplicitKill);
803 }
804 };
805
806 // If we need to spill the CR and the LR but we don't have two separate
807 // registers available, we must spill them one at a time
808 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
809 BuildMoveFromCR();
810 BuildMI(MBB, MBBI, dl, StoreWordInst)
811 .addReg(TempReg, getKillRegState(true))
812 .addImm(CRSaveOffset)
813 .addReg(SPReg);
814 }
815
816 if (MustSaveLR)
817 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
818
819 if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
820 BuildMoveFromCR();
821
822 if (HasRedZone) {
823 if (HasFP)
824 BuildMI(MBB, MBBI, dl, StoreInst)
825 .addReg(FPReg)
826 .addImm(FPOffset)
827 .addReg(SPReg);
828 if (FI->usesPICBase())
829 BuildMI(MBB, MBBI, dl, StoreInst)
830 .addReg(PPC::R30)
831 .addImm(PBPOffset)
832 .addReg(SPReg);
833 if (HasBP)
834 BuildMI(MBB, MBBI, dl, StoreInst)
835 .addReg(BPReg)
836 .addImm(BPOffset)
837 .addReg(SPReg);
838 }
839
840 // Generate the instruction to store the LR. In the case where ROP protection
841 // is required the register holding the LR should not be killed as it will be
842 // used by the hash store instruction.
843 auto SaveLR = [&](int64_t Offset) {
844 assert(MustSaveLR && "LR is not required to be saved!");
845 BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
846 .addReg(ScratchReg, getKillRegState(!HasROPProtect))
847 .addImm(Offset)
848 .addReg(SPReg);
849
850 // Add the ROP protection Hash Store instruction.
851 // NOTE: This is technically a violation of the ABI. The hash can be saved
852 // up to 512 bytes into the Protected Zone. This can be outside of the
853 // initial 288 byte volatile program storage region in the Protected Zone.
854 // However, this restriction will be removed in an upcoming revision of the
855 // ABI.
856 if (HasROPProtect) {
857 const int SaveIndex = FI->getROPProtectionHashSaveIndex();
858 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
859 assert((ImmOffset <= -8 && ImmOffset >= -512) &&
860 "ROP hash save offset out of range.");
861 assert(((ImmOffset & 0x7) == 0) &&
862 "ROP hash save offset must be 8 byte aligned.");
863 BuildMI(MBB, StackUpdateLoc, dl, HashST)
864 .addReg(ScratchReg, getKillRegState(true))
865 .addImm(ImmOffset)
866 .addReg(SPReg);
867 }
868 };
869
870 if (MustSaveLR && HasFastMFLR)
871 SaveLR(LROffset);
872
873 if (MustSaveCR &&
874 !(SingleScratchReg && MustSaveLR)) {
875 assert(HasRedZone && "A red zone is always available on PPC64");
876 BuildMI(MBB, MBBI, dl, StoreWordInst)
877 .addReg(TempReg, getKillRegState(true))
878 .addImm(CRSaveOffset)
879 .addReg(SPReg);
880 }
881
882 // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
883 if (!FrameSize) {
884 if (MustSaveLR && !HasFastMFLR)
885 SaveLR(LROffset);
886 return;
887 }
888
889 // Adjust stack pointer: r1 += NegFrameSize.
890 // If there is a preferred stack alignment, align R1 now
891
892 if (HasBP && HasRedZone) {
893 // Save a copy of r1 as the base pointer.
894 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
895 .addReg(SPReg)
896 .addReg(SPReg);
897 }
898
899 // Have we generated a STUX instruction to claim stack frame? If so,
900 // the negated frame size will be placed in ScratchReg.
901 bool HasSTUX =
902 (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
903 (HasBP && MaxAlign > 1) || isLargeFrame;
904
905 // If we use STUX to update the stack pointer, we need the two scratch
906 // registers TempReg and ScratchReg, we have to save LR here which is stored
907 // in ScratchReg.
908 // If the offset can not be encoded into the store instruction, we also have
909 // to save LR here.
910 if (MustSaveLR && !HasFastMFLR &&
911 (HasSTUX || !isInt<16>(FrameSize + LROffset)))
912 SaveLR(LROffset);
913
914 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
915 // pointer is always stored at SP, we will get a free probe due to an essential
916 // STU(X) instruction.
917 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
918 // To be consistent with other targets, a pseudo instruction is emitted and
919 // will be later expanded in `inlineStackProbe`.
920 BuildMI(MBB, MBBI, dl,
921 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
922 : PPC::PROBED_STACKALLOC_32))
923 .addDef(TempReg)
924 .addDef(ScratchReg) // ScratchReg stores the old sp.
925 .addImm(NegFrameSize);
926 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
927 // update the ScratchReg to meet the assumption that ScratchReg contains
928 // the NegFrameSize. This solution is rather tricky.
929 if (!HasRedZone) {
930 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
931 .addReg(ScratchReg)
932 .addReg(SPReg);
933 }
934 } else {
935 // This condition must be kept in sync with canUseAsPrologue.
936 if (HasBP && MaxAlign > 1) {
937 if (isPPC64)
938 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
939 .addReg(SPReg)
940 .addImm(0)
941 .addImm(64 - Log2(MaxAlign));
942 else // PPC32...
943 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
944 .addReg(SPReg)
945 .addImm(0)
946 .addImm(32 - Log2(MaxAlign))
947 .addImm(31);
948 if (!isLargeFrame) {
949 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
950 .addReg(ScratchReg, RegState::Kill)
951 .addImm(NegFrameSize);
952 } else {
953 assert(!SingleScratchReg && "Only a single scratch reg available");
954 TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
955 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
956 .addReg(ScratchReg, RegState::Kill)
957 .addReg(TempReg, RegState::Kill);
958 }
959
960 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
961 .addReg(SPReg, RegState::Kill)
962 .addReg(SPReg)
963 .addReg(ScratchReg);
964 } else if (!isLargeFrame) {
965 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
966 .addReg(SPReg)
967 .addImm(NegFrameSize)
968 .addReg(SPReg);
969 } else {
970 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
971 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
972 .addReg(SPReg, RegState::Kill)
973 .addReg(SPReg)
974 .addReg(ScratchReg);
975 }
976 }
977
978 // Save the TOC register after the stack pointer update if a prologue TOC
979 // save is required for the function.
980 if (MustSaveTOC) {
981 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
982 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
983 .addReg(TOCReg, getKillRegState(true))
984 .addImm(TOCSaveOffset)
985 .addReg(SPReg);
986 }
987
988 if (!HasRedZone) {
989 assert(!isPPC64 && "A red zone is always available on PPC64");
990 if (HasSTUX) {
991 // The negated frame size is in ScratchReg, and the SPReg has been
992 // decremented by the frame size: SPReg = old SPReg + ScratchReg.
993 // Since FPOffset, PBPOffset, etc. are relative to the beginning of
994 // the stack frame (i.e. the old SP), ideally, we would put the old
995 // SP into a register and use it as the base for the stores. The
996 // problem is that the only available register may be ScratchReg,
997 // which could be R0, and R0 cannot be used as a base address.
998
999 // First, set ScratchReg to the old SP. This may need to be modified
1000 // later.
1001 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1002 .addReg(ScratchReg, RegState::Kill)
1003 .addReg(SPReg);
1004
1005 if (ScratchReg == PPC::R0) {
1006 // R0 cannot be used as a base register, but it can be used as an
1007 // index in a store-indexed.
1008 int LastOffset = 0;
1009 if (HasFP) {
1010 // R0 += (FPOffset-LastOffset).
1011 // Need addic, since addi treats R0 as 0.
1012 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1013 .addReg(ScratchReg)
1014 .addImm(FPOffset-LastOffset);
1015 LastOffset = FPOffset;
1016 // Store FP into *R0.
1017 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1018 .addReg(FPReg, RegState::Kill) // Save FP.
1019 .addReg(PPC::ZERO)
1020 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1021 }
1022 if (FI->usesPICBase()) {
1023 // R0 += (PBPOffset-LastOffset).
1024 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1025 .addReg(ScratchReg)
1026 .addImm(PBPOffset-LastOffset);
1027 LastOffset = PBPOffset;
1028 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1029 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
1030 .addReg(PPC::ZERO)
1031 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1032 }
1033 if (HasBP) {
1034 // R0 += (BPOffset-LastOffset).
1035 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1036 .addReg(ScratchReg)
1037 .addImm(BPOffset-LastOffset);
1038 LastOffset = BPOffset;
1039 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1040 .addReg(BPReg, RegState::Kill) // Save BP.
1041 .addReg(PPC::ZERO)
1042 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1043 // BP = R0-LastOffset
1044 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1045 .addReg(ScratchReg, RegState::Kill)
1046 .addImm(-LastOffset);
1047 }
1048 } else {
1049 // ScratchReg is not R0, so use it as the base register. It is
1050 // already set to the old SP, so we can use the offsets directly.
1051
1052 // Now that the stack frame has been allocated, save all the necessary
1053 // registers using ScratchReg as the base address.
1054 if (HasFP)
1055 BuildMI(MBB, MBBI, dl, StoreInst)
1056 .addReg(FPReg)
1057 .addImm(FPOffset)
1058 .addReg(ScratchReg);
1059 if (FI->usesPICBase())
1060 BuildMI(MBB, MBBI, dl, StoreInst)
1061 .addReg(PPC::R30)
1062 .addImm(PBPOffset)
1063 .addReg(ScratchReg);
1064 if (HasBP) {
1065 BuildMI(MBB, MBBI, dl, StoreInst)
1066 .addReg(BPReg)
1067 .addImm(BPOffset)
1068 .addReg(ScratchReg);
1069 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1070 .addReg(ScratchReg, RegState::Kill)
1071 .addReg(ScratchReg);
1072 }
1073 }
1074 } else {
1075 // The frame size is a known 16-bit constant (fitting in the immediate
1076 // field of STWU). To be here we have to be compiling for PPC32.
1077 // Since the SPReg has been decreased by FrameSize, add it back to each
1078 // offset.
1079 if (HasFP)
1080 BuildMI(MBB, MBBI, dl, StoreInst)
1081 .addReg(FPReg)
1082 .addImm(FrameSize + FPOffset)
1083 .addReg(SPReg);
1084 if (FI->usesPICBase())
1085 BuildMI(MBB, MBBI, dl, StoreInst)
1086 .addReg(PPC::R30)
1087 .addImm(FrameSize + PBPOffset)
1088 .addReg(SPReg);
1089 if (HasBP) {
1090 BuildMI(MBB, MBBI, dl, StoreInst)
1091 .addReg(BPReg)
1092 .addImm(FrameSize + BPOffset)
1093 .addReg(SPReg);
1094 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1095 .addReg(SPReg)
1096 .addImm(FrameSize);
1097 }
1098 }
1099 }
1100
1101 // Save the LR now.
1102 if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1103 SaveLR(LROffset + FrameSize);
1104
1105 // Add Call Frame Information for the instructions we generated above.
1106 if (needsCFI) {
1107 unsigned CFIIndex;
1108
1109 if (HasBP) {
1110 // Define CFA in terms of BP. Do this in preference to using FP/SP,
1111 // because if the stack needed aligning then CFA won't be at a fixed
1112 // offset from FP/SP.
1113 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1114 CFIIndex = MF.addFrameInst(
1115 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1116 } else {
1117 // Adjust the definition of CFA to account for the change in SP.
1118 assert(NegFrameSize);
1119 CFIIndex = MF.addFrameInst(
1120 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1121 }
1122 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1123 .addCFIIndex(CFIIndex);
1124
1125 if (HasFP) {
1126 // Describe where FP was saved, at a fixed offset from CFA.
1127 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1128 CFIIndex = MF.addFrameInst(
1129 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1130 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1131 .addCFIIndex(CFIIndex);
1132 }
1133
1134 if (FI->usesPICBase()) {
1135 // Describe where FP was saved, at a fixed offset from CFA.
1136 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1137 CFIIndex = MF.addFrameInst(
1138 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1139 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1140 .addCFIIndex(CFIIndex);
1141 }
1142
1143 if (HasBP) {
1144 // Describe where BP was saved, at a fixed offset from CFA.
1145 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1146 CFIIndex = MF.addFrameInst(
1147 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1148 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1149 .addCFIIndex(CFIIndex);
1150 }
1151
1152 if (MustSaveLR) {
1153 // Describe where LR was saved, at a fixed offset from CFA.
1154 unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1155 CFIIndex = MF.addFrameInst(
1156 MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1157 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1158 .addCFIIndex(CFIIndex);
1159 }
1160 }
1161
1162 // If there is a frame pointer, copy R1 into R31
1163 if (HasFP) {
1164 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1165 .addReg(SPReg)
1166 .addReg(SPReg);
1167
1168 if (!HasBP && needsCFI) {
1169 // Change the definition of CFA from SP+offset to FP+offset, because SP
1170 // will change at every alloca.
1171 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1172 unsigned CFIIndex = MF.addFrameInst(
1173 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1174
1175 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1176 .addCFIIndex(CFIIndex);
1177 }
1178 }
1179
1180 if (needsCFI) {
1181 // Describe where callee saved registers were saved, at fixed offsets from
1182 // CFA.
1183 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1184 for (const CalleeSavedInfo &I : CSI) {
1185 Register Reg = I.getReg();
1186 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1187
1188 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1189 // subregisters of CR2. We just need to emit a move of CR2.
1190 if (PPC::CRBITRCRegClass.contains(Reg))
1191 continue;
1192
1193 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1194 continue;
1195
1196 // For 64-bit SVR4 when we have spilled CRs, the spill location
1197 // is SP+8, not a frame-relative slot.
1198 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1199 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1200 // the whole CR word. In the ELFv2 ABI, every CR that was
1201 // actually saved gets its own CFI record.
1202 Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1203 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1204 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1205 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1206 .addCFIIndex(CFIIndex);
1207 continue;
1208 }
1209
1210 if (I.isSpilledToReg()) {
1211 unsigned SpilledReg = I.getDstReg();
1212 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1213 nullptr, MRI->getDwarfRegNum(Reg, true),
1214 MRI->getDwarfRegNum(SpilledReg, true)));
1215 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1216 .addCFIIndex(CFIRegister);
1217 } else {
1218 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1219 // We have changed the object offset above but we do not want to change
1220 // the actual offsets in the CFI instruction so we have to undo the
1221 // offset change here.
1222 if (MovingStackUpdateDown)
1223 Offset -= NegFrameSize;
1224
1225 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1226 nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1227 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1228 .addCFIIndex(CFIIndex);
1229 }
1230 }
1231 }
1232 }
1233
inlineStackProbe(MachineFunction & MF,MachineBasicBlock & PrologMBB) const1234 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1235 MachineBasicBlock &PrologMBB) const {
1236 bool isPPC64 = Subtarget.isPPC64();
1237 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1238 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1239 MachineFrameInfo &MFI = MF.getFrameInfo();
1240 MachineModuleInfo &MMI = MF.getMMI();
1241 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1242 // AIX assembler does not support cfi directives.
1243 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1244 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1245 int Opc = MI.getOpcode();
1246 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1247 });
1248 if (StackAllocMIPos == PrologMBB.end())
1249 return;
1250 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1251 MachineBasicBlock *CurrentMBB = &PrologMBB;
1252 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1253 MachineInstr &MI = *StackAllocMIPos;
1254 int64_t NegFrameSize = MI.getOperand(2).getImm();
1255 unsigned ProbeSize = TLI.getStackProbeSize(MF);
1256 int64_t NegProbeSize = -(int64_t)ProbeSize;
1257 assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1258 int64_t NumBlocks = NegFrameSize / NegProbeSize;
1259 int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1260 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1261 Register ScratchReg = MI.getOperand(0).getReg();
1262 Register FPReg = MI.getOperand(1).getReg();
1263 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1264 bool HasBP = RegInfo->hasBasePointer(MF);
1265 Register BPReg = RegInfo->getBaseRegister(MF);
1266 Align MaxAlign = MFI.getMaxAlign();
1267 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1268 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1269 // Subroutines to generate .cfi_* directives.
1270 auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1271 MachineBasicBlock::iterator MBBI, Register Reg) {
1272 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1273 unsigned CFIIndex = MF.addFrameInst(
1274 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1275 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1276 .addCFIIndex(CFIIndex);
1277 };
1278 auto buildDefCFA = [&](MachineBasicBlock &MBB,
1279 MachineBasicBlock::iterator MBBI, Register Reg,
1280 int Offset) {
1281 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1282 unsigned CFIIndex = MBB.getParent()->addFrameInst(
1283 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1284 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1285 .addCFIIndex(CFIIndex);
1286 };
1287 // Subroutine to determine if we can use the Imm as part of d-form.
1288 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1289 // Subroutine to materialize the Imm into TempReg.
1290 auto MaterializeImm = [&](MachineBasicBlock &MBB,
1291 MachineBasicBlock::iterator MBBI, int64_t Imm,
1292 Register &TempReg) {
1293 assert(isInt<32>(Imm) && "Unhandled imm");
1294 if (isInt<16>(Imm))
1295 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1296 .addImm(Imm);
1297 else {
1298 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1299 .addImm(Imm >> 16);
1300 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1301 .addReg(TempReg)
1302 .addImm(Imm & 0xFFFF);
1303 }
1304 };
1305 // Subroutine to store frame pointer and decrease stack pointer by probe size.
1306 auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1307 MachineBasicBlock::iterator MBBI, int64_t NegSize,
1308 Register NegSizeReg, bool UseDForm,
1309 Register StoreReg) {
1310 if (UseDForm)
1311 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1312 .addReg(StoreReg)
1313 .addImm(NegSize)
1314 .addReg(SPReg);
1315 else
1316 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1317 .addReg(StoreReg)
1318 .addReg(SPReg)
1319 .addReg(NegSizeReg);
1320 };
1321 // Used to probe stack when realignment is required.
1322 // Note that, according to ABI's requirement, *sp must always equals the
1323 // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1324 // Following is pseudo code:
1325 // final_sp = (sp & align) + negframesize;
1326 // neg_gap = final_sp - sp;
1327 // while (neg_gap < negprobesize) {
1328 // stdu fp, negprobesize(sp);
1329 // neg_gap -= negprobesize;
1330 // }
1331 // stdux fp, sp, neg_gap
1332 //
1333 // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1334 // before probe code, we don't need to save it, so we get one additional reg
1335 // that can be used to materialize the probeside if needed to use xform.
1336 // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1337 // now.
1338 //
1339 // The allocations are:
1340 // if (HasBP && HasRedzone) {
1341 // r0: materialize the probesize if needed so that we can use xform.
1342 // r12: `neg_gap`
1343 // } else {
1344 // r0: back-chain pointer
1345 // r12: `neg_gap`.
1346 // }
1347 auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1348 MachineBasicBlock::iterator MBBI,
1349 Register ScratchReg, Register TempReg) {
1350 assert(HasBP && "The function is supposed to have base pointer when its "
1351 "stack is realigned.");
1352 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1353
1354 // FIXME: We can eliminate this limitation if we get more infomation about
1355 // which part of redzone are already used. Used redzone can be treated
1356 // probed. But there might be `holes' in redzone probed, this could
1357 // complicate the implementation.
1358 assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1359 "Probe size should be larger or equal to the size of red-zone so "
1360 "that red-zone is not clobbered by probing.");
1361
1362 Register &FinalStackPtr = TempReg;
1363 // FIXME: We only support NegProbeSize materializable by DForm currently.
1364 // When HasBP && HasRedzone, we can use xform if we have an additional idle
1365 // register.
1366 NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1367 assert(isInt<16>(NegProbeSize) &&
1368 "NegProbeSize should be materializable by DForm");
1369 Register CRReg = PPC::CR0;
1370 // Layout of output assembly kinda like:
1371 // bb.0:
1372 // ...
1373 // sub $scratchreg, $finalsp, r1
1374 // cmpdi $scratchreg, <negprobesize>
1375 // bge bb.2
1376 // bb.1:
1377 // stdu <backchain>, <negprobesize>(r1)
1378 // sub $scratchreg, $scratchreg, negprobesize
1379 // cmpdi $scratchreg, <negprobesize>
1380 // blt bb.1
1381 // bb.2:
1382 // stdux <backchain>, r1, $scratchreg
1383 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1384 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1385 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1386 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1387 MF.insert(MBBInsertPoint, ProbeExitMBB);
1388 // bb.2
1389 {
1390 Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1391 allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1392 BackChainPointer);
1393 if (HasRedZone)
1394 // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1395 // to TempReg to satisfy it.
1396 BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1397 .addReg(BPReg)
1398 .addReg(BPReg);
1399 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1400 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1401 }
1402 // bb.0
1403 {
1404 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1405 .addReg(SPReg)
1406 .addReg(FinalStackPtr);
1407 if (!HasRedZone)
1408 BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1409 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1410 .addReg(ScratchReg)
1411 .addImm(NegProbeSize);
1412 BuildMI(&MBB, DL, TII.get(PPC::BCC))
1413 .addImm(PPC::PRED_GE)
1414 .addReg(CRReg)
1415 .addMBB(ProbeExitMBB);
1416 MBB.addSuccessor(ProbeLoopBodyMBB);
1417 MBB.addSuccessor(ProbeExitMBB);
1418 }
1419 // bb.1
1420 {
1421 Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1422 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1423 0, true /*UseDForm*/, BackChainPointer);
1424 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1425 ScratchReg)
1426 .addReg(ScratchReg)
1427 .addImm(-NegProbeSize);
1428 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1429 CRReg)
1430 .addReg(ScratchReg)
1431 .addImm(NegProbeSize);
1432 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1433 .addImm(PPC::PRED_LT)
1434 .addReg(CRReg)
1435 .addMBB(ProbeLoopBodyMBB);
1436 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1437 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1438 }
1439 // Update liveins.
1440 recomputeLiveIns(*ProbeLoopBodyMBB);
1441 recomputeLiveIns(*ProbeExitMBB);
1442 return ProbeExitMBB;
1443 };
1444 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1445 // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1446 // the offset subtracted from SP is determined by SP's runtime value.
1447 if (HasBP && MaxAlign > 1) {
1448 // Calculate final stack pointer.
1449 if (isPPC64)
1450 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1451 .addReg(SPReg)
1452 .addImm(0)
1453 .addImm(64 - Log2(MaxAlign));
1454 else
1455 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1456 .addReg(SPReg)
1457 .addImm(0)
1458 .addImm(32 - Log2(MaxAlign))
1459 .addImm(31);
1460 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1461 FPReg)
1462 .addReg(ScratchReg)
1463 .addReg(SPReg);
1464 MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1465 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1466 FPReg)
1467 .addReg(ScratchReg)
1468 .addReg(FPReg);
1469 CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1470 if (needsCFI)
1471 buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1472 } else {
1473 // Initialize current frame pointer.
1474 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1475 // Use FPReg to calculate CFA.
1476 if (needsCFI)
1477 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1478 // Probe residual part.
1479 if (NegResidualSize) {
1480 bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1481 if (!ResidualUseDForm)
1482 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1483 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1484 ResidualUseDForm, FPReg);
1485 }
1486 bool UseDForm = CanUseDForm(NegProbeSize);
1487 // If number of blocks is small, just probe them directly.
1488 if (NumBlocks < 3) {
1489 if (!UseDForm)
1490 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1491 for (int i = 0; i < NumBlocks; ++i)
1492 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1493 FPReg);
1494 if (needsCFI) {
1495 // Restore using SPReg to calculate CFA.
1496 buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1497 }
1498 } else {
1499 // Since CTR is a volatile register and current shrinkwrap implementation
1500 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1501 // CTR loop to probe.
1502 // Calculate trip count and stores it in CTRReg.
1503 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1504 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1505 .addReg(ScratchReg, RegState::Kill);
1506 if (!UseDForm)
1507 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1508 // Create MBBs of the loop.
1509 MachineFunction::iterator MBBInsertPoint =
1510 std::next(CurrentMBB->getIterator());
1511 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1512 MF.insert(MBBInsertPoint, LoopMBB);
1513 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1514 MF.insert(MBBInsertPoint, ExitMBB);
1515 // Synthesize the loop body.
1516 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1517 UseDForm, FPReg);
1518 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1519 .addMBB(LoopMBB);
1520 LoopMBB->addSuccessor(ExitMBB);
1521 LoopMBB->addSuccessor(LoopMBB);
1522 // Synthesize the exit MBB.
1523 ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1524 std::next(MachineBasicBlock::iterator(MI)),
1525 CurrentMBB->end());
1526 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1527 CurrentMBB->addSuccessor(LoopMBB);
1528 if (needsCFI) {
1529 // Restore using SPReg to calculate CFA.
1530 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1531 }
1532 // Update liveins.
1533 recomputeLiveIns(*LoopMBB);
1534 recomputeLiveIns(*ExitMBB);
1535 }
1536 }
1537 ++NumPrologProbed;
1538 MI.eraseFromParent();
1539 }
1540
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1541 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1542 MachineBasicBlock &MBB) const {
1543 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1544 DebugLoc dl;
1545
1546 if (MBBI != MBB.end())
1547 dl = MBBI->getDebugLoc();
1548
1549 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1550 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1551
1552 // Get alignment info so we know how to restore the SP.
1553 const MachineFrameInfo &MFI = MF.getFrameInfo();
1554
1555 // Get the number of bytes allocated from the FrameInfo.
1556 int64_t FrameSize = MFI.getStackSize();
1557
1558 // Get processor type.
1559 bool isPPC64 = Subtarget.isPPC64();
1560
1561 // Check if the link register (LR) has been saved.
1562 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1563 bool MustSaveLR = FI->mustSaveLR();
1564 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1565 bool MustSaveCR = !MustSaveCRs.empty();
1566 // Do we have a frame pointer and/or base pointer for this function?
1567 bool HasFP = hasFP(MF);
1568 bool HasBP = RegInfo->hasBasePointer(MF);
1569 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1570 bool HasROPProtect = Subtarget.hasROPProtect();
1571 bool HasPrivileged = Subtarget.hasPrivileged();
1572
1573 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1574 Register BPReg = RegInfo->getBaseRegister(MF);
1575 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
1576 Register ScratchReg;
1577 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1578 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1579 : PPC::MTLR );
1580 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1581 : PPC::LWZ );
1582 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1583 : PPC::LIS );
1584 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1585 : PPC::OR );
1586 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1587 : PPC::ORI );
1588 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1589 : PPC::ADDI );
1590 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1591 : PPC::ADD4 );
1592 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1593 : PPC::LWZ);
1594 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1595 : PPC::MTOCRF);
1596 const MCInstrDesc &HashChk =
1597 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1598 : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1599 int64_t LROffset = getReturnSaveOffset();
1600
1601 int64_t FPOffset = 0;
1602
1603 // Using the same bool variable as below to suppress compiler warnings.
1604 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1605 &TempReg);
1606 assert(SingleScratchReg &&
1607 "Could not find an available scratch register");
1608
1609 SingleScratchReg = ScratchReg == TempReg;
1610
1611 if (HasFP) {
1612 int FPIndex = FI->getFramePointerSaveIndex();
1613 assert(FPIndex && "No Frame Pointer Save Slot!");
1614 FPOffset = MFI.getObjectOffset(FPIndex);
1615 }
1616
1617 int64_t BPOffset = 0;
1618 if (HasBP) {
1619 int BPIndex = FI->getBasePointerSaveIndex();
1620 assert(BPIndex && "No Base Pointer Save Slot!");
1621 BPOffset = MFI.getObjectOffset(BPIndex);
1622 }
1623
1624 int64_t PBPOffset = 0;
1625 if (FI->usesPICBase()) {
1626 int PBPIndex = FI->getPICBasePointerSaveIndex();
1627 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1628 PBPOffset = MFI.getObjectOffset(PBPIndex);
1629 }
1630
1631 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1632
1633 if (IsReturnBlock) {
1634 unsigned RetOpcode = MBBI->getOpcode();
1635 bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
1636 RetOpcode == PPC::TCRETURNdi ||
1637 RetOpcode == PPC::TCRETURNai ||
1638 RetOpcode == PPC::TCRETURNri8 ||
1639 RetOpcode == PPC::TCRETURNdi8 ||
1640 RetOpcode == PPC::TCRETURNai8;
1641
1642 if (UsesTCRet) {
1643 int MaxTCRetDelta = FI->getTailCallSPDelta();
1644 MachineOperand &StackAdjust = MBBI->getOperand(1);
1645 assert(StackAdjust.isImm() && "Expecting immediate value.");
1646 // Adjust stack pointer.
1647 int StackAdj = StackAdjust.getImm();
1648 int Delta = StackAdj - MaxTCRetDelta;
1649 assert((Delta >= 0) && "Delta must be positive");
1650 if (MaxTCRetDelta>0)
1651 FrameSize += (StackAdj +Delta);
1652 else
1653 FrameSize += StackAdj;
1654 }
1655 }
1656
1657 // Frames of 32KB & larger require special handling because they cannot be
1658 // indexed into with a simple LD/LWZ immediate offset operand.
1659 bool isLargeFrame = !isInt<16>(FrameSize);
1660
1661 // On targets without red zone, the SP needs to be restored last, so that
1662 // all live contents of the stack frame are upwards of the SP. This means
1663 // that we cannot restore SP just now, since there may be more registers
1664 // to restore from the stack frame (e.g. R31). If the frame size is not
1665 // a simple immediate value, we will need a spare register to hold the
1666 // restored SP. If the frame size is known and small, we can simply adjust
1667 // the offsets of the registers to be restored, and still use SP to restore
1668 // them. In such case, the final update of SP will be to add the frame
1669 // size to it.
1670 // To simplify the code, set RBReg to the base register used to restore
1671 // values from the stack, and set SPAdd to the value that needs to be added
1672 // to the SP at the end. The default values are as if red zone was present.
1673 unsigned RBReg = SPReg;
1674 uint64_t SPAdd = 0;
1675
1676 // Check if we can move the stack update instruction up the epilogue
1677 // past the callee saves. This will allow the move to LR instruction
1678 // to be executed before the restores of the callee saves which means
1679 // that the callee saves can hide the latency from the MTLR instrcution.
1680 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1681 if (stackUpdateCanBeMoved(MF)) {
1682 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1683 for (CalleeSavedInfo CSI : Info) {
1684 // If the callee saved register is spilled to another register abort the
1685 // stack update movement.
1686 if (CSI.isSpilledToReg()) {
1687 StackUpdateLoc = MBBI;
1688 break;
1689 }
1690 int FrIdx = CSI.getFrameIdx();
1691 // If the frame index is not negative the callee saved info belongs to a
1692 // stack object that is not a fixed stack object. We ignore non-fixed
1693 // stack objects because we won't move the update of the stack pointer
1694 // past them.
1695 if (FrIdx >= 0)
1696 continue;
1697
1698 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1699 StackUpdateLoc--;
1700 else {
1701 // Abort the operation as we can't update all CSR restores.
1702 StackUpdateLoc = MBBI;
1703 break;
1704 }
1705 }
1706 }
1707
1708 if (FrameSize) {
1709 // In the prologue, the loaded (or persistent) stack pointer value is
1710 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1711 // zone add this offset back now.
1712
1713 // If the function has a base pointer, the stack pointer has been copied
1714 // to it so we can restore it by copying in the other direction.
1715 if (HasRedZone && HasBP) {
1716 BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1717 addReg(BPReg).
1718 addReg(BPReg);
1719 }
1720 // If this function contained a fastcc call and GuaranteedTailCallOpt is
1721 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1722 // call which invalidates the stack pointer value in SP(0). So we use the
1723 // value of R31 in this case. Similar situation exists with setjmp.
1724 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1725 assert(HasFP && "Expecting a valid frame pointer.");
1726 if (!HasRedZone)
1727 RBReg = FPReg;
1728 if (!isLargeFrame) {
1729 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1730 .addReg(FPReg).addImm(FrameSize);
1731 } else {
1732 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1733 BuildMI(MBB, MBBI, dl, AddInst)
1734 .addReg(RBReg)
1735 .addReg(FPReg)
1736 .addReg(ScratchReg);
1737 }
1738 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1739 if (HasRedZone) {
1740 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1741 .addReg(SPReg)
1742 .addImm(FrameSize);
1743 } else {
1744 // Make sure that adding FrameSize will not overflow the max offset
1745 // size.
1746 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1747 "Local offsets should be negative");
1748 SPAdd = FrameSize;
1749 FPOffset += FrameSize;
1750 BPOffset += FrameSize;
1751 PBPOffset += FrameSize;
1752 }
1753 } else {
1754 // We don't want to use ScratchReg as a base register, because it
1755 // could happen to be R0. Use FP instead, but make sure to preserve it.
1756 if (!HasRedZone) {
1757 // If FP is not saved, copy it to ScratchReg.
1758 if (!HasFP)
1759 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1760 .addReg(FPReg)
1761 .addReg(FPReg);
1762 RBReg = FPReg;
1763 }
1764 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1765 .addImm(0)
1766 .addReg(SPReg);
1767 }
1768 }
1769 assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1770 // If there is no red zone, ScratchReg may be needed for holding a useful
1771 // value (although not the base register). Make sure it is not overwritten
1772 // too early.
1773
1774 // If we need to restore both the LR and the CR and we only have one
1775 // available scratch register, we must do them one at a time.
1776 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1777 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1778 // is live here.
1779 assert(HasRedZone && "Expecting red zone");
1780 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1781 .addImm(CRSaveOffset)
1782 .addReg(SPReg);
1783 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1784 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1785 .addReg(TempReg, getKillRegState(i == e-1));
1786 }
1787
1788 // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1789 // LR is stored in the caller's stack frame. ScratchReg will be needed
1790 // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1791 // a base register anyway, because it may happen to be R0.
1792 bool LoadedLR = false;
1793 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1794 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1795 .addImm(LROffset+SPAdd)
1796 .addReg(RBReg);
1797 LoadedLR = true;
1798 }
1799
1800 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1801 assert(RBReg == SPReg && "Should be using SP as a base register");
1802 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1803 .addImm(CRSaveOffset)
1804 .addReg(RBReg);
1805 }
1806
1807 if (HasFP) {
1808 // If there is red zone, restore FP directly, since SP has already been
1809 // restored. Otherwise, restore the value of FP into ScratchReg.
1810 if (HasRedZone || RBReg == SPReg)
1811 BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1812 .addImm(FPOffset)
1813 .addReg(SPReg);
1814 else
1815 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1816 .addImm(FPOffset)
1817 .addReg(RBReg);
1818 }
1819
1820 if (FI->usesPICBase())
1821 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1822 .addImm(PBPOffset)
1823 .addReg(RBReg);
1824
1825 if (HasBP)
1826 BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1827 .addImm(BPOffset)
1828 .addReg(RBReg);
1829
1830 // There is nothing more to be loaded from the stack, so now we can
1831 // restore SP: SP = RBReg + SPAdd.
1832 if (RBReg != SPReg || SPAdd != 0) {
1833 assert(!HasRedZone && "This should not happen with red zone");
1834 // If SPAdd is 0, generate a copy.
1835 if (SPAdd == 0)
1836 BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1837 .addReg(RBReg)
1838 .addReg(RBReg);
1839 else
1840 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1841 .addReg(RBReg)
1842 .addImm(SPAdd);
1843
1844 assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1845 if (RBReg == FPReg)
1846 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1847 .addReg(ScratchReg)
1848 .addReg(ScratchReg);
1849
1850 // Now load the LR from the caller's stack frame.
1851 if (MustSaveLR && !LoadedLR)
1852 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1853 .addImm(LROffset)
1854 .addReg(SPReg);
1855 }
1856
1857 if (MustSaveCR &&
1858 !(SingleScratchReg && MustSaveLR))
1859 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1860 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1861 .addReg(TempReg, getKillRegState(i == e-1));
1862
1863 if (MustSaveLR) {
1864 // If ROP protection is required, an extra instruction is added to compute a
1865 // hash and then compare it to the hash stored in the prologue.
1866 if (HasROPProtect) {
1867 const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1868 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1869 assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1870 "ROP hash check location offset out of range.");
1871 assert(((ImmOffset & 0x7) == 0) &&
1872 "ROP hash check location offset must be 8 byte aligned.");
1873 BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1874 .addReg(ScratchReg)
1875 .addImm(ImmOffset)
1876 .addReg(SPReg);
1877 }
1878 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1879 }
1880
1881 // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1882 // call optimization
1883 if (IsReturnBlock) {
1884 unsigned RetOpcode = MBBI->getOpcode();
1885 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1886 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1887 MF.getFunction().getCallingConv() == CallingConv::Fast) {
1888 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1889 unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1890
1891 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1892 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1893 .addReg(SPReg).addImm(CallerAllocatedAmt);
1894 } else {
1895 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1896 .addImm(CallerAllocatedAmt >> 16);
1897 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1898 .addReg(ScratchReg, RegState::Kill)
1899 .addImm(CallerAllocatedAmt & 0xFFFF);
1900 BuildMI(MBB, MBBI, dl, AddInst)
1901 .addReg(SPReg)
1902 .addReg(FPReg)
1903 .addReg(ScratchReg);
1904 }
1905 } else {
1906 createTailCallBranchInstr(MBB);
1907 }
1908 }
1909 }
1910
createTailCallBranchInstr(MachineBasicBlock & MBB) const1911 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1912 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1913
1914 // If we got this far a first terminator should exist.
1915 assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1916
1917 DebugLoc dl = MBBI->getDebugLoc();
1918 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1919
1920 // Create branch instruction for pseudo tail call return instruction.
1921 // The TCRETURNdi variants are direct calls. Valid targets for those are
1922 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1923 // since we can tail call external functions with PC-Rel (i.e. we don't need
1924 // to worry about different TOC pointers). Some of the external functions will
1925 // be MO_GlobalAddress while others like memcpy for example, are going to
1926 // be MO_ExternalSymbol.
1927 unsigned RetOpcode = MBBI->getOpcode();
1928 if (RetOpcode == PPC::TCRETURNdi) {
1929 MBBI = MBB.getLastNonDebugInstr();
1930 MachineOperand &JumpTarget = MBBI->getOperand(0);
1931 if (JumpTarget.isGlobal())
1932 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1933 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1934 else if (JumpTarget.isSymbol())
1935 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1936 addExternalSymbol(JumpTarget.getSymbolName());
1937 else
1938 llvm_unreachable("Expecting Global or External Symbol");
1939 } else if (RetOpcode == PPC::TCRETURNri) {
1940 MBBI = MBB.getLastNonDebugInstr();
1941 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1942 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1943 } else if (RetOpcode == PPC::TCRETURNai) {
1944 MBBI = MBB.getLastNonDebugInstr();
1945 MachineOperand &JumpTarget = MBBI->getOperand(0);
1946 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1947 } else if (RetOpcode == PPC::TCRETURNdi8) {
1948 MBBI = MBB.getLastNonDebugInstr();
1949 MachineOperand &JumpTarget = MBBI->getOperand(0);
1950 if (JumpTarget.isGlobal())
1951 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1952 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1953 else if (JumpTarget.isSymbol())
1954 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1955 addExternalSymbol(JumpTarget.getSymbolName());
1956 else
1957 llvm_unreachable("Expecting Global or External Symbol");
1958 } else if (RetOpcode == PPC::TCRETURNri8) {
1959 MBBI = MBB.getLastNonDebugInstr();
1960 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1961 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1962 } else if (RetOpcode == PPC::TCRETURNai8) {
1963 MBBI = MBB.getLastNonDebugInstr();
1964 MachineOperand &JumpTarget = MBBI->getOperand(0);
1965 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1966 }
1967 }
1968
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1969 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1970 BitVector &SavedRegs,
1971 RegScavenger *RS) const {
1972 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1973
1974 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1975
1976 // Do not explicitly save the callee saved VSRp registers.
1977 // The individual VSR subregisters will be saved instead.
1978 SavedRegs.reset(PPC::VSRp26);
1979 SavedRegs.reset(PPC::VSRp27);
1980 SavedRegs.reset(PPC::VSRp28);
1981 SavedRegs.reset(PPC::VSRp29);
1982 SavedRegs.reset(PPC::VSRp30);
1983 SavedRegs.reset(PPC::VSRp31);
1984
1985 // Save and clear the LR state.
1986 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1987 unsigned LR = RegInfo->getRARegister();
1988 FI->setMustSaveLR(MustSaveLR(MF, LR));
1989 SavedRegs.reset(LR);
1990
1991 // Save R31 if necessary
1992 int FPSI = FI->getFramePointerSaveIndex();
1993 const bool isPPC64 = Subtarget.isPPC64();
1994 MachineFrameInfo &MFI = MF.getFrameInfo();
1995
1996 // If the frame pointer save index hasn't been defined yet.
1997 if (!FPSI && needsFP(MF)) {
1998 // Find out what the fix offset of the frame pointer save area.
1999 int FPOffset = getFramePointerSaveOffset();
2000 // Allocate the frame index for frame pointer save area.
2001 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2002 // Save the result.
2003 FI->setFramePointerSaveIndex(FPSI);
2004 }
2005
2006 int BPSI = FI->getBasePointerSaveIndex();
2007 if (!BPSI && RegInfo->hasBasePointer(MF)) {
2008 int BPOffset = getBasePointerSaveOffset();
2009 // Allocate the frame index for the base pointer save area.
2010 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2011 // Save the result.
2012 FI->setBasePointerSaveIndex(BPSI);
2013 }
2014
2015 // Reserve stack space for the PIC Base register (R30).
2016 // Only used in SVR4 32-bit.
2017 if (FI->usesPICBase()) {
2018 int PBPSI = MFI.CreateFixedObject(4, -8, true);
2019 FI->setPICBasePointerSaveIndex(PBPSI);
2020 }
2021
2022 // Make sure we don't explicitly spill r31, because, for example, we have
2023 // some inline asm which explicitly clobbers it, when we otherwise have a
2024 // frame pointer and are using r31's spill slot for the prologue/epilogue
2025 // code. Same goes for the base pointer and the PIC base register.
2026 if (needsFP(MF))
2027 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2028 if (RegInfo->hasBasePointer(MF))
2029 SavedRegs.reset(RegInfo->getBaseRegister(MF));
2030 if (FI->usesPICBase())
2031 SavedRegs.reset(PPC::R30);
2032
2033 // Reserve stack space to move the linkage area to in case of a tail call.
2034 int TCSPDelta = 0;
2035 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2036 (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2037 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2038 }
2039
2040 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2041 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2042 // object at the offset of the CR-save slot in the linkage area. The actual
2043 // save and restore of the condition register will be created as part of the
2044 // prologue and epilogue insertion, but the FixedStack object is needed to
2045 // keep the CalleSavedInfo valid.
2046 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2047 SavedRegs.test(PPC::CR4))) {
2048 const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2049 const int64_t SpillOffset =
2050 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2051 int FrameIdx =
2052 MFI.CreateFixedObject(SpillSize, SpillOffset,
2053 /* IsImmutable */ true, /* IsAliased */ false);
2054 FI->setCRSpillFrameIndex(FrameIdx);
2055 }
2056 }
2057
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const2058 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2059 RegScavenger *RS) const {
2060 // Get callee saved register information.
2061 MachineFrameInfo &MFI = MF.getFrameInfo();
2062 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2063
2064 // If the function is shrink-wrapped, and if the function has a tail call, the
2065 // tail call might not be in the new RestoreBlock, so real branch instruction
2066 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2067 // RestoreBlock. So we handle this case here.
2068 if (MFI.getSavePoint() && MFI.hasTailCall()) {
2069 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2070 for (MachineBasicBlock &MBB : MF) {
2071 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2072 createTailCallBranchInstr(MBB);
2073 }
2074 }
2075
2076 // Early exit if no callee saved registers are modified!
2077 if (CSI.empty() && !needsFP(MF)) {
2078 addScavengingSpillSlot(MF, RS);
2079 return;
2080 }
2081
2082 unsigned MinGPR = PPC::R31;
2083 unsigned MinG8R = PPC::X31;
2084 unsigned MinFPR = PPC::F31;
2085 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2086
2087 bool HasGPSaveArea = false;
2088 bool HasG8SaveArea = false;
2089 bool HasFPSaveArea = false;
2090 bool HasVRSaveArea = false;
2091
2092 SmallVector<CalleeSavedInfo, 18> GPRegs;
2093 SmallVector<CalleeSavedInfo, 18> G8Regs;
2094 SmallVector<CalleeSavedInfo, 18> FPRegs;
2095 SmallVector<CalleeSavedInfo, 18> VRegs;
2096
2097 for (const CalleeSavedInfo &I : CSI) {
2098 Register Reg = I.getReg();
2099 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2100 (Reg != PPC::X2 && Reg != PPC::R2)) &&
2101 "Not expecting to try to spill R2 in a function that must save TOC");
2102 if (PPC::GPRCRegClass.contains(Reg)) {
2103 HasGPSaveArea = true;
2104
2105 GPRegs.push_back(I);
2106
2107 if (Reg < MinGPR) {
2108 MinGPR = Reg;
2109 }
2110 } else if (PPC::G8RCRegClass.contains(Reg)) {
2111 HasG8SaveArea = true;
2112
2113 G8Regs.push_back(I);
2114
2115 if (Reg < MinG8R) {
2116 MinG8R = Reg;
2117 }
2118 } else if (PPC::F8RCRegClass.contains(Reg)) {
2119 HasFPSaveArea = true;
2120
2121 FPRegs.push_back(I);
2122
2123 if (Reg < MinFPR) {
2124 MinFPR = Reg;
2125 }
2126 } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2127 PPC::CRRCRegClass.contains(Reg)) {
2128 ; // do nothing, as we already know whether CRs are spilled
2129 } else if (PPC::VRRCRegClass.contains(Reg) ||
2130 PPC::SPERCRegClass.contains(Reg)) {
2131 // Altivec and SPE are mutually exclusive, but have the same stack
2132 // alignment requirements, so overload the save area for both cases.
2133 HasVRSaveArea = true;
2134
2135 VRegs.push_back(I);
2136
2137 if (Reg < MinVR) {
2138 MinVR = Reg;
2139 }
2140 } else {
2141 llvm_unreachable("Unknown RegisterClass!");
2142 }
2143 }
2144
2145 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2146 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2147
2148 int64_t LowerBound = 0;
2149
2150 // Take into account stack space reserved for tail calls.
2151 int TCSPDelta = 0;
2152 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2153 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2154 LowerBound = TCSPDelta;
2155 }
2156
2157 // The Floating-point register save area is right below the back chain word
2158 // of the previous stack frame.
2159 if (HasFPSaveArea) {
2160 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2161 int FI = FPRegs[i].getFrameIdx();
2162
2163 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2164 }
2165
2166 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2167 }
2168
2169 // Check whether the frame pointer register is allocated. If so, make sure it
2170 // is spilled to the correct offset.
2171 if (needsFP(MF)) {
2172 int FI = PFI->getFramePointerSaveIndex();
2173 assert(FI && "No Frame Pointer Save Slot!");
2174 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2175 // FP is R31/X31, so no need to update MinGPR/MinG8R.
2176 HasGPSaveArea = true;
2177 }
2178
2179 if (PFI->usesPICBase()) {
2180 int FI = PFI->getPICBasePointerSaveIndex();
2181 assert(FI && "No PIC Base Pointer Save Slot!");
2182 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2183
2184 MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2185 HasGPSaveArea = true;
2186 }
2187
2188 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2189 if (RegInfo->hasBasePointer(MF)) {
2190 int FI = PFI->getBasePointerSaveIndex();
2191 assert(FI && "No Base Pointer Save Slot!");
2192 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2193
2194 Register BP = RegInfo->getBaseRegister(MF);
2195 if (PPC::G8RCRegClass.contains(BP)) {
2196 MinG8R = std::min<unsigned>(MinG8R, BP);
2197 HasG8SaveArea = true;
2198 } else if (PPC::GPRCRegClass.contains(BP)) {
2199 MinGPR = std::min<unsigned>(MinGPR, BP);
2200 HasGPSaveArea = true;
2201 }
2202 }
2203
2204 // General register save area starts right below the Floating-point
2205 // register save area.
2206 if (HasGPSaveArea || HasG8SaveArea) {
2207 // Move general register save area spill slots down, taking into account
2208 // the size of the Floating-point register save area.
2209 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2210 if (!GPRegs[i].isSpilledToReg()) {
2211 int FI = GPRegs[i].getFrameIdx();
2212 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2213 }
2214 }
2215
2216 // Move general register save area spill slots down, taking into account
2217 // the size of the Floating-point register save area.
2218 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2219 if (!G8Regs[i].isSpilledToReg()) {
2220 int FI = G8Regs[i].getFrameIdx();
2221 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2222 }
2223 }
2224
2225 unsigned MinReg =
2226 std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2227 TRI->getEncodingValue(MinG8R));
2228
2229 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2230 LowerBound -= (31 - MinReg + 1) * GPRegSize;
2231 }
2232
2233 // For 32-bit only, the CR save area is below the general register
2234 // save area. For 64-bit SVR4, the CR save area is addressed relative
2235 // to the stack pointer and hence does not need an adjustment here.
2236 // Only CR2 (the first nonvolatile spilled) has an associated frame
2237 // index so that we have a single uniform save area.
2238 if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2239 // Adjust the frame index of the CR spill slot.
2240 for (const auto &CSInfo : CSI) {
2241 if (CSInfo.getReg() == PPC::CR2) {
2242 int FI = CSInfo.getFrameIdx();
2243 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2244 break;
2245 }
2246 }
2247
2248 LowerBound -= 4; // The CR save area is always 4 bytes long.
2249 }
2250
2251 // Both Altivec and SPE have the same alignment and padding requirements
2252 // within the stack frame.
2253 if (HasVRSaveArea) {
2254 // Insert alignment padding, we need 16-byte alignment. Note: for positive
2255 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2256 // we are using negative number here (the stack grows downward). We should
2257 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2258 // is the alignment size ( n = 16 here) and y is the size after aligning.
2259 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2260 LowerBound &= ~(15);
2261
2262 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2263 int FI = VRegs[i].getFrameIdx();
2264
2265 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2266 }
2267 }
2268
2269 addScavengingSpillSlot(MF, RS);
2270 }
2271
2272 void
addScavengingSpillSlot(MachineFunction & MF,RegScavenger * RS) const2273 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2274 RegScavenger *RS) const {
2275 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2276 // a large stack, which will require scavenging a register to materialize a
2277 // large offset.
2278
2279 // We need to have a scavenger spill slot for spills if the frame size is
2280 // large. In case there is no free register for large-offset addressing,
2281 // this slot is used for the necessary emergency spill. Also, we need the
2282 // slot for dynamic stack allocations.
2283
2284 // The scavenger might be invoked if the frame offset does not fit into
2285 // the 16-bit immediate. We don't know the complete frame size here
2286 // because we've not yet computed callee-saved register spills or the
2287 // needed alignment padding.
2288 unsigned StackSize = determineFrameLayout(MF, true);
2289 MachineFrameInfo &MFI = MF.getFrameInfo();
2290 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2291 (hasSpills(MF) && !isInt<16>(StackSize))) {
2292 const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2293 const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2294 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2295 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2296 unsigned Size = TRI.getSpillSize(RC);
2297 Align Alignment = TRI.getSpillAlign(RC);
2298 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2299
2300 // Might we have over-aligned allocas?
2301 bool HasAlVars =
2302 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2303
2304 // These kinds of spills might need two registers.
2305 if (spillsCR(MF) || HasAlVars)
2306 RS->addScavengingFrameIndex(
2307 MFI.CreateStackObject(Size, Alignment, false));
2308 }
2309 }
2310
2311 // This function checks if a callee saved gpr can be spilled to a volatile
2312 // vector register. This occurs for leaf functions when the option
2313 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2314 // which were not spilled to vectors, return false so the target independent
2315 // code can handle them by assigning a FrameIdx to a stack slot.
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const2316 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2317 MachineFunction &MF, const TargetRegisterInfo *TRI,
2318 std::vector<CalleeSavedInfo> &CSI) const {
2319
2320 if (CSI.empty())
2321 return true; // Early exit if no callee saved registers are modified!
2322
2323 // Early exit if cannot spill gprs to volatile vector registers.
2324 MachineFrameInfo &MFI = MF.getFrameInfo();
2325 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2326 return false;
2327
2328 // Build a BitVector of VSRs that can be used for spilling GPRs.
2329 BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2330 BitVector BVCalleeSaved(TRI->getNumRegs());
2331 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2332 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2333 for (unsigned i = 0; CSRegs[i]; ++i)
2334 BVCalleeSaved.set(CSRegs[i]);
2335
2336 for (unsigned Reg : BVAllocatable.set_bits()) {
2337 // Set to 0 if the register is not a volatile VSX register, or if it is
2338 // used in the function.
2339 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2340 MF.getRegInfo().isPhysRegUsed(Reg))
2341 BVAllocatable.reset(Reg);
2342 }
2343
2344 bool AllSpilledToReg = true;
2345 unsigned LastVSRUsedForSpill = 0;
2346 for (auto &CS : CSI) {
2347 if (BVAllocatable.none())
2348 return false;
2349
2350 Register Reg = CS.getReg();
2351
2352 if (!PPC::G8RCRegClass.contains(Reg)) {
2353 AllSpilledToReg = false;
2354 continue;
2355 }
2356
2357 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2358 // into one VSR using the mtvsrdd instruction.
2359 if (LastVSRUsedForSpill != 0) {
2360 CS.setDstReg(LastVSRUsedForSpill);
2361 BVAllocatable.reset(LastVSRUsedForSpill);
2362 LastVSRUsedForSpill = 0;
2363 continue;
2364 }
2365
2366 unsigned VolatileVFReg = BVAllocatable.find_first();
2367 if (VolatileVFReg < BVAllocatable.size()) {
2368 CS.setDstReg(VolatileVFReg);
2369 LastVSRUsedForSpill = VolatileVFReg;
2370 } else {
2371 AllSpilledToReg = false;
2372 }
2373 }
2374 return AllSpilledToReg;
2375 }
2376
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2377 bool PPCFrameLowering::spillCalleeSavedRegisters(
2378 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2379 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2380
2381 MachineFunction *MF = MBB.getParent();
2382 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2383 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2384 bool MustSaveTOC = FI->mustSaveTOC();
2385 DebugLoc DL;
2386 bool CRSpilled = false;
2387 MachineInstrBuilder CRMIB;
2388 BitVector Spilled(TRI->getNumRegs());
2389
2390 VSRContainingGPRs.clear();
2391
2392 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2393 // or two GPRs, so we need table to record information for later save/restore.
2394 for (const CalleeSavedInfo &Info : CSI) {
2395 if (Info.isSpilledToReg()) {
2396 auto &SpilledVSR =
2397 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2398 assert(SpilledVSR.second == 0 &&
2399 "Can't spill more than two GPRs into VSR!");
2400 if (SpilledVSR.first == 0)
2401 SpilledVSR.first = Info.getReg();
2402 else
2403 SpilledVSR.second = Info.getReg();
2404 }
2405 }
2406
2407 for (const CalleeSavedInfo &I : CSI) {
2408 Register Reg = I.getReg();
2409
2410 // CR2 through CR4 are the nonvolatile CR fields.
2411 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2412
2413 // Add the callee-saved register as live-in; it's killed at the spill.
2414 // Do not do this for callee-saved registers that are live-in to the
2415 // function because they will already be marked live-in and this will be
2416 // adding it for a second time. It is an error to add the same register
2417 // to the set more than once.
2418 const MachineRegisterInfo &MRI = MF->getRegInfo();
2419 bool IsLiveIn = MRI.isLiveIn(Reg);
2420 if (!IsLiveIn)
2421 MBB.addLiveIn(Reg);
2422
2423 if (CRSpilled && IsCRField) {
2424 CRMIB.addReg(Reg, RegState::ImplicitKill);
2425 continue;
2426 }
2427
2428 // The actual spill will happen in the prologue.
2429 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2430 continue;
2431
2432 // Insert the spill to the stack frame.
2433 if (IsCRField) {
2434 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2435 if (!Subtarget.is32BitELFABI()) {
2436 // The actual spill will happen at the start of the prologue.
2437 FuncInfo->addMustSaveCR(Reg);
2438 } else {
2439 CRSpilled = true;
2440 FuncInfo->setSpillsCR();
2441
2442 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
2443 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2444 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2445 .addReg(Reg, RegState::ImplicitKill);
2446
2447 MBB.insert(MI, CRMIB);
2448 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2449 .addReg(PPC::R12,
2450 getKillRegState(true)),
2451 I.getFrameIdx()));
2452 }
2453 } else {
2454 if (I.isSpilledToReg()) {
2455 unsigned Dst = I.getDstReg();
2456
2457 if (Spilled[Dst])
2458 continue;
2459
2460 if (VSRContainingGPRs[Dst].second != 0) {
2461 assert(Subtarget.hasP9Vector() &&
2462 "mtvsrdd is unavailable on pre-P9 targets.");
2463
2464 NumPESpillVSR += 2;
2465 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2466 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2467 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2468 } else if (VSRContainingGPRs[Dst].second == 0) {
2469 assert(Subtarget.hasP8Vector() &&
2470 "Can't move GPR to VSR on pre-P8 targets.");
2471
2472 ++NumPESpillVSR;
2473 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2474 TRI->getSubReg(Dst, PPC::sub_64))
2475 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2476 } else {
2477 llvm_unreachable("More than two GPRs spilled to a VSR!");
2478 }
2479 Spilled.set(Dst);
2480 } else {
2481 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2482 // Use !IsLiveIn for the kill flag.
2483 // We do not want to kill registers that are live in this function
2484 // before their use because they will become undefined registers.
2485 // Functions without NoUnwind need to preserve the order of elements in
2486 // saved vector registers.
2487 if (Subtarget.needsSwapsForVSXMemOps() &&
2488 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2489 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2490 I.getFrameIdx(), RC, TRI);
2491 else
2492 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2493 TRI, Register());
2494 }
2495 }
2496 }
2497 return true;
2498 }
2499
restoreCRs(bool is31,bool CR2Spilled,bool CR3Spilled,bool CR4Spilled,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,unsigned CSIIndex)2500 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2501 bool CR4Spilled, MachineBasicBlock &MBB,
2502 MachineBasicBlock::iterator MI,
2503 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2504
2505 MachineFunction *MF = MBB.getParent();
2506 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2507 DebugLoc DL;
2508 unsigned MoveReg = PPC::R12;
2509
2510 // 32-bit: FP-relative
2511 MBB.insert(MI,
2512 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2513 CSI[CSIIndex].getFrameIdx()));
2514
2515 unsigned RestoreOp = PPC::MTOCRF;
2516 if (CR2Spilled)
2517 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2518 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2519
2520 if (CR3Spilled)
2521 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2522 .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2523
2524 if (CR4Spilled)
2525 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2526 .addReg(MoveReg, getKillRegState(true)));
2527 }
2528
2529 MachineBasicBlock::iterator PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2530 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2531 MachineBasicBlock::iterator I) const {
2532 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2533 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2534 I->getOpcode() == PPC::ADJCALLSTACKUP) {
2535 // Add (actually subtract) back the amount the callee popped on return.
2536 if (int CalleeAmt = I->getOperand(1).getImm()) {
2537 bool is64Bit = Subtarget.isPPC64();
2538 CalleeAmt *= -1;
2539 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2540 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2541 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2542 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2543 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2544 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2545 const DebugLoc &dl = I->getDebugLoc();
2546
2547 if (isInt<16>(CalleeAmt)) {
2548 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2549 .addReg(StackReg, RegState::Kill)
2550 .addImm(CalleeAmt);
2551 } else {
2552 MachineBasicBlock::iterator MBBI = I;
2553 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2554 .addImm(CalleeAmt >> 16);
2555 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2556 .addReg(TmpReg, RegState::Kill)
2557 .addImm(CalleeAmt & 0xFFFF);
2558 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2559 .addReg(StackReg, RegState::Kill)
2560 .addReg(TmpReg);
2561 }
2562 }
2563 }
2564 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2565 return MBB.erase(I);
2566 }
2567
isCalleeSavedCR(unsigned Reg)2568 static bool isCalleeSavedCR(unsigned Reg) {
2569 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2570 }
2571
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2572 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2573 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2574 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2575 MachineFunction *MF = MBB.getParent();
2576 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2577 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2578 bool MustSaveTOC = FI->mustSaveTOC();
2579 bool CR2Spilled = false;
2580 bool CR3Spilled = false;
2581 bool CR4Spilled = false;
2582 unsigned CSIIndex = 0;
2583 BitVector Restored(TRI->getNumRegs());
2584
2585 // Initialize insertion-point logic; we will be restoring in reverse
2586 // order of spill.
2587 MachineBasicBlock::iterator I = MI, BeforeI = I;
2588 bool AtStart = I == MBB.begin();
2589
2590 if (!AtStart)
2591 --BeforeI;
2592
2593 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2594 Register Reg = CSI[i].getReg();
2595
2596 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2597 continue;
2598
2599 // Restore of callee saved condition register field is handled during
2600 // epilogue insertion.
2601 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2602 continue;
2603
2604 if (Reg == PPC::CR2) {
2605 CR2Spilled = true;
2606 // The spill slot is associated only with CR2, which is the
2607 // first nonvolatile spilled. Save it here.
2608 CSIIndex = i;
2609 continue;
2610 } else if (Reg == PPC::CR3) {
2611 CR3Spilled = true;
2612 continue;
2613 } else if (Reg == PPC::CR4) {
2614 CR4Spilled = true;
2615 continue;
2616 } else {
2617 // On 32-bit ELF when we first encounter a non-CR register after seeing at
2618 // least one CR register, restore all spilled CRs together.
2619 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2620 bool is31 = needsFP(*MF);
2621 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2622 CSIIndex);
2623 CR2Spilled = CR3Spilled = CR4Spilled = false;
2624 }
2625
2626 if (CSI[i].isSpilledToReg()) {
2627 DebugLoc DL;
2628 unsigned Dst = CSI[i].getDstReg();
2629
2630 if (Restored[Dst])
2631 continue;
2632
2633 if (VSRContainingGPRs[Dst].second != 0) {
2634 assert(Subtarget.hasP9Vector());
2635 NumPEReloadVSR += 2;
2636 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2637 VSRContainingGPRs[Dst].second)
2638 .addReg(Dst);
2639 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2640 VSRContainingGPRs[Dst].first)
2641 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2642 } else if (VSRContainingGPRs[Dst].second == 0) {
2643 assert(Subtarget.hasP8Vector());
2644 ++NumPEReloadVSR;
2645 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2646 VSRContainingGPRs[Dst].first)
2647 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2648 } else {
2649 llvm_unreachable("More than two GPRs spilled to a VSR!");
2650 }
2651
2652 Restored.set(Dst);
2653
2654 } else {
2655 // Default behavior for non-CR saves.
2656 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2657
2658 // Functions without NoUnwind need to preserve the order of elements in
2659 // saved vector registers.
2660 if (Subtarget.needsSwapsForVSXMemOps() &&
2661 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2662 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2663 TRI);
2664 else
2665 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2666 Register());
2667
2668 assert(I != MBB.begin() &&
2669 "loadRegFromStackSlot didn't insert any code!");
2670 }
2671 }
2672
2673 // Insert in reverse order.
2674 if (AtStart)
2675 I = MBB.begin();
2676 else {
2677 I = BeforeI;
2678 ++I;
2679 }
2680 }
2681
2682 // If we haven't yet spilled the CRs, do so now.
2683 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2684 assert(Subtarget.is32BitELFABI() &&
2685 "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2686 bool is31 = needsFP(*MF);
2687 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2688 }
2689
2690 return true;
2691 }
2692
getTOCSaveOffset() const2693 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2694 return TOCSaveOffset;
2695 }
2696
getFramePointerSaveOffset() const2697 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2698 return FramePointerSaveOffset;
2699 }
2700
getBasePointerSaveOffset() const2701 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2702 return BasePointerSaveOffset;
2703 }
2704
enableShrinkWrapping(const MachineFunction & MF) const2705 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2706 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2707 return false;
2708 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2709 }
2710
getReturnProtector() const2711 const ReturnProtectorLowering *PPCFrameLowering::getReturnProtector() const {
2712 return &RPL;
2713 }
2714