xref: /minix3/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FrameLowering.cpp (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the X86 implementation of TargetFrameLowering class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86FrameLowering.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/SmallSet.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Target/TargetOptions.h"
32 #include "llvm/Support/Debug.h"
33 #include <cstdlib>
34 
35 using namespace llvm;
36 
37 // FIXME: completely move here.
38 extern cl::opt<bool> ForceStackAlign;
39 
hasReservedCallFrame(const MachineFunction & MF) const40 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
41   return !MF.getFrameInfo()->hasVarSizedObjects();
42 }
43 
44 /// hasFP - Return true if the specified function should have a dedicated frame
45 /// pointer register.  This is true if the function has variable sized allocas
46 /// or if frame pointer elimination is disabled.
hasFP(const MachineFunction & MF) const47 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
48   const MachineFrameInfo *MFI = MF.getFrameInfo();
49   const MachineModuleInfo &MMI = MF.getMMI();
50   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
51 
52   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
53           RegInfo->needsStackRealignment(MF) ||
54           MFI->hasVarSizedObjects() ||
55           MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
56           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
57           MMI.callsUnwindInit() || MMI.callsEHReturn() ||
58           MFI->hasStackMap() || MFI->hasPatchPoint());
59 }
60 
getSUBriOpcode(unsigned IsLP64,int64_t Imm)61 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
62   if (IsLP64) {
63     if (isInt<8>(Imm))
64       return X86::SUB64ri8;
65     return X86::SUB64ri32;
66   } else {
67     if (isInt<8>(Imm))
68       return X86::SUB32ri8;
69     return X86::SUB32ri;
70   }
71 }
72 
getADDriOpcode(unsigned IsLP64,int64_t Imm)73 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
74   if (IsLP64) {
75     if (isInt<8>(Imm))
76       return X86::ADD64ri8;
77     return X86::ADD64ri32;
78   } else {
79     if (isInt<8>(Imm))
80       return X86::ADD32ri8;
81     return X86::ADD32ri;
82   }
83 }
84 
getANDriOpcode(bool IsLP64,int64_t Imm)85 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
86   if (IsLP64) {
87     if (isInt<8>(Imm))
88       return X86::AND64ri8;
89     return X86::AND64ri32;
90   }
91   if (isInt<8>(Imm))
92     return X86::AND32ri8;
93   return X86::AND32ri;
94 }
95 
getPUSHiOpcode(bool IsLP64,MachineOperand MO)96 static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) {
97   // We don't support LP64 for now.
98   assert(!IsLP64);
99 
100   if (MO.isImm() && isInt<8>(MO.getImm()))
101     return X86::PUSH32i8;
102 
103   return X86::PUSHi32;;
104 }
105 
getLEArOpcode(unsigned IsLP64)106 static unsigned getLEArOpcode(unsigned IsLP64) {
107   return IsLP64 ? X86::LEA64r : X86::LEA32r;
108 }
109 
110 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
111 /// when it reaches the "return" instruction. We can then pop a stack object
112 /// to this register without worry about clobbering it.
findDeadCallerSavedReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const TargetRegisterInfo & TRI,bool Is64Bit)113 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
114                                        MachineBasicBlock::iterator &MBBI,
115                                        const TargetRegisterInfo &TRI,
116                                        bool Is64Bit) {
117   const MachineFunction *MF = MBB.getParent();
118   const Function *F = MF->getFunction();
119   if (!F || MF->getMMI().callsEHReturn())
120     return 0;
121 
122   static const uint16_t CallerSavedRegs32Bit[] = {
123     X86::EAX, X86::EDX, X86::ECX, 0
124   };
125 
126   static const uint16_t CallerSavedRegs64Bit[] = {
127     X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
128     X86::R8,  X86::R9,  X86::R10, X86::R11, 0
129   };
130 
131   unsigned Opc = MBBI->getOpcode();
132   switch (Opc) {
133   default: return 0;
134   case X86::RETL:
135   case X86::RETQ:
136   case X86::RETIL:
137   case X86::RETIQ:
138   case X86::TCRETURNdi:
139   case X86::TCRETURNri:
140   case X86::TCRETURNmi:
141   case X86::TCRETURNdi64:
142   case X86::TCRETURNri64:
143   case X86::TCRETURNmi64:
144   case X86::EH_RETURN:
145   case X86::EH_RETURN64: {
146     SmallSet<uint16_t, 8> Uses;
147     for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
148       MachineOperand &MO = MBBI->getOperand(i);
149       if (!MO.isReg() || MO.isDef())
150         continue;
151       unsigned Reg = MO.getReg();
152       if (!Reg)
153         continue;
154       for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
155         Uses.insert(*AI);
156     }
157 
158     const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
159     for (; *CS; ++CS)
160       if (!Uses.count(*CS))
161         return *CS;
162   }
163   }
164 
165   return 0;
166 }
167 
168 
169 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
170 /// stack pointer by a constant value.
171 static
emitSPUpdate(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,unsigned StackPtr,int64_t NumBytes,bool Is64BitTarget,bool Is64BitStackPtr,bool UseLEA,const TargetInstrInfo & TII,const TargetRegisterInfo & TRI)172 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
173                   unsigned StackPtr, int64_t NumBytes,
174                   bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
175                   const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
176   bool isSub = NumBytes < 0;
177   uint64_t Offset = isSub ? -NumBytes : NumBytes;
178   unsigned Opc;
179   if (UseLEA)
180     Opc = getLEArOpcode(Is64BitStackPtr);
181   else
182     Opc = isSub
183       ? getSUBriOpcode(Is64BitStackPtr, Offset)
184       : getADDriOpcode(Is64BitStackPtr, Offset);
185 
186   uint64_t Chunk = (1LL << 31) - 1;
187   DebugLoc DL = MBB.findDebugLoc(MBBI);
188 
189   while (Offset) {
190     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
191     if (ThisVal == (Is64BitTarget ? 8 : 4)) {
192       // Use push / pop instead.
193       unsigned Reg = isSub
194         ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
195         : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
196       if (Reg) {
197         Opc = isSub
198           ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
199           : (Is64BitTarget ? X86::POP64r  : X86::POP32r);
200         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
201           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
202         if (isSub)
203           MI->setFlag(MachineInstr::FrameSetup);
204         Offset -= ThisVal;
205         continue;
206       }
207     }
208 
209     MachineInstr *MI = nullptr;
210 
211     if (UseLEA) {
212       MI =  addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
213                           StackPtr, false, isSub ? -ThisVal : ThisVal);
214     } else {
215       MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
216             .addReg(StackPtr)
217             .addImm(ThisVal);
218       MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
219     }
220 
221     if (isSub)
222       MI->setFlag(MachineInstr::FrameSetup);
223 
224     Offset -= ThisVal;
225   }
226 }
227 
228 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
229 static
mergeSPUpdatesUp(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,unsigned StackPtr,uint64_t * NumBytes=nullptr)230 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
231                       unsigned StackPtr, uint64_t *NumBytes = nullptr) {
232   if (MBBI == MBB.begin()) return;
233 
234   MachineBasicBlock::iterator PI = std::prev(MBBI);
235   unsigned Opc = PI->getOpcode();
236   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
237        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
238        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
239       PI->getOperand(0).getReg() == StackPtr) {
240     if (NumBytes)
241       *NumBytes += PI->getOperand(2).getImm();
242     MBB.erase(PI);
243   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
244               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
245              PI->getOperand(0).getReg() == StackPtr) {
246     if (NumBytes)
247       *NumBytes -= PI->getOperand(2).getImm();
248     MBB.erase(PI);
249   }
250 }
251 
252 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
253 /// iterator.
254 static
mergeSPUpdatesDown(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,unsigned StackPtr,uint64_t * NumBytes=nullptr)255 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
256                         MachineBasicBlock::iterator &MBBI,
257                         unsigned StackPtr, uint64_t *NumBytes = nullptr) {
258   // FIXME:  THIS ISN'T RUN!!!
259   return;
260 
261   if (MBBI == MBB.end()) return;
262 
263   MachineBasicBlock::iterator NI = std::next(MBBI);
264   if (NI == MBB.end()) return;
265 
266   unsigned Opc = NI->getOpcode();
267   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
268        Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
269       NI->getOperand(0).getReg() == StackPtr) {
270     if (NumBytes)
271       *NumBytes -= NI->getOperand(2).getImm();
272     MBB.erase(NI);
273     MBBI = NI;
274   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
275               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
276              NI->getOperand(0).getReg() == StackPtr) {
277     if (NumBytes)
278       *NumBytes += NI->getOperand(2).getImm();
279     MBB.erase(NI);
280     MBBI = NI;
281   }
282 }
283 
284 /// mergeSPUpdates - Checks the instruction before/after the passed
285 /// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
286 /// the stack adjustment is returned as a positive value for ADD/LEA and a
287 /// negative for SUB.
mergeSPUpdates(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,unsigned StackPtr,bool doMergeWithPrevious)288 static int mergeSPUpdates(MachineBasicBlock &MBB,
289                           MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
290                           bool doMergeWithPrevious) {
291   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
292       (!doMergeWithPrevious && MBBI == MBB.end()))
293     return 0;
294 
295   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
296   MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
297                                                        : std::next(MBBI);
298   unsigned Opc = PI->getOpcode();
299   int Offset = 0;
300 
301   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
302        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
303        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
304       PI->getOperand(0).getReg() == StackPtr){
305     Offset += PI->getOperand(2).getImm();
306     MBB.erase(PI);
307     if (!doMergeWithPrevious) MBBI = NI;
308   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
309               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
310              PI->getOperand(0).getReg() == StackPtr) {
311     Offset -= PI->getOperand(2).getImm();
312     MBB.erase(PI);
313     if (!doMergeWithPrevious) MBBI = NI;
314   }
315 
316   return Offset;
317 }
318 
isEAXLiveIn(MachineFunction & MF)319 static bool isEAXLiveIn(MachineFunction &MF) {
320   for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
321        EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
322     unsigned Reg = II->first;
323 
324     if (Reg == X86::EAX || Reg == X86::AX ||
325         Reg == X86::AH || Reg == X86::AL)
326       return true;
327   }
328 
329   return false;
330 }
331 
332 void
emitCalleeSavedFrameMoves(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc DL) const333 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
334                                             MachineBasicBlock::iterator MBBI,
335                                             DebugLoc DL) const {
336   MachineFunction &MF = *MBB.getParent();
337   MachineFrameInfo *MFI = MF.getFrameInfo();
338   MachineModuleInfo &MMI = MF.getMMI();
339   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
340   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
341 
342   // Add callee saved registers to move list.
343   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
344   if (CSI.empty()) return;
345 
346   // Calculate offsets.
347   for (std::vector<CalleeSavedInfo>::const_iterator
348          I = CSI.begin(), E = CSI.end(); I != E; ++I) {
349     int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
350     unsigned Reg = I->getReg();
351 
352     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
353     unsigned CFIIndex =
354         MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
355                                                         Offset));
356     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
357         .addCFIIndex(CFIIndex);
358   }
359 }
360 
361 /// usesTheStack - This function checks if any of the users of EFLAGS
362 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
363 /// to use the stack, and if we don't adjust the stack we clobber the first
364 /// frame index.
365 /// See X86InstrInfo::copyPhysReg.
usesTheStack(const MachineFunction & MF)366 static bool usesTheStack(const MachineFunction &MF) {
367   const MachineRegisterInfo &MRI = MF.getRegInfo();
368 
369   for (MachineRegisterInfo::reg_instr_iterator
370        ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
371        ri != re; ++ri)
372     if (ri->isCopy())
373       return true;
374 
375   return false;
376 }
377 
emitStackProbeCall(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc DL)378 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
379                                           MachineBasicBlock &MBB,
380                                           MachineBasicBlock::iterator MBBI,
381                                           DebugLoc DL) {
382   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
383   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
384   bool Is64Bit = STI.is64Bit();
385   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
386   const X86RegisterInfo *RegInfo =
387       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
388 
389   unsigned CallOp;
390   if (Is64Bit)
391     CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
392   else
393     CallOp = X86::CALLpcrel32;
394 
395   const char *Symbol;
396   if (Is64Bit) {
397     if (STI.isTargetCygMing()) {
398       Symbol = "___chkstk_ms";
399     } else {
400       Symbol = "__chkstk";
401     }
402   } else if (STI.isTargetCygMing())
403     Symbol = "_alloca";
404   else
405     Symbol = "_chkstk";
406 
407   MachineInstrBuilder CI;
408 
409   // All current stack probes take AX and SP as input, clobber flags, and
410   // preserve all registers. x86_64 probes leave RSP unmodified.
411   if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
412     // For the large code model, we have to call through a register. Use R11,
413     // as it is scratch in all supported calling conventions.
414     BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
415         .addExternalSymbol(Symbol);
416     CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
417   } else {
418     CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
419   }
420 
421   unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
422   unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
423   CI.addReg(AX, RegState::Implicit)
424       .addReg(SP, RegState::Implicit)
425       .addReg(AX, RegState::Define | RegState::Implicit)
426       .addReg(SP, RegState::Define | RegState::Implicit)
427       .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
428 
429   if (Is64Bit) {
430     // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
431     // themselves. It also does not clobber %rax so we can reuse it when
432     // adjusting %rsp.
433     BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
434         .addReg(X86::RSP)
435         .addReg(X86::RAX);
436   }
437 }
438 
439 /// emitPrologue - Push callee-saved registers onto the stack, which
440 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
441 /// space for local variables. Also emit labels used by the exception handler to
442 /// generate the exception handling frames.
443 
444 /*
445   Here's a gist of what gets emitted:
446 
447   ; Establish frame pointer, if needed
448   [if needs FP]
449       push  %rbp
450       .cfi_def_cfa_offset 16
451       .cfi_offset %rbp, -16
452       .seh_pushreg %rpb
453       mov  %rsp, %rbp
454       .cfi_def_cfa_register %rbp
455 
456   ; Spill general-purpose registers
457   [for all callee-saved GPRs]
458       pushq %<reg>
459       [if not needs FP]
460          .cfi_def_cfa_offset (offset from RETADDR)
461       .seh_pushreg %<reg>
462 
463   ; If the required stack alignment > default stack alignment
464   ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
465   ; of unknown size in the stack frame.
466   [if stack needs re-alignment]
467       and  $MASK, %rsp
468 
469   ; Allocate space for locals
470   [if target is Windows and allocated space > 4096 bytes]
471       ; Windows needs special care for allocations larger
472       ; than one page.
473       mov $NNN, %rax
474       call ___chkstk_ms/___chkstk
475       sub  %rax, %rsp
476   [else]
477       sub  $NNN, %rsp
478 
479   [if needs FP]
480       .seh_stackalloc (size of XMM spill slots)
481       .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
482   [else]
483       .seh_stackalloc NNN
484 
485   ; Spill XMMs
486   ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
487   ; they may get spilled on any platform, if the current function
488   ; calls @llvm.eh.unwind.init
489   [if needs FP]
490       [for all callee-saved XMM registers]
491           movaps  %<xmm reg>, -MMM(%rbp)
492       [for all callee-saved XMM registers]
493           .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
494               ; i.e. the offset relative to (%rbp - SEHFrameOffset)
495   [else]
496       [for all callee-saved XMM registers]
497           movaps  %<xmm reg>, KKK(%rsp)
498       [for all callee-saved XMM registers]
499           .seh_savexmm %<xmm reg>, KKK
500 
501   .seh_endprologue
502 
503   [if needs base pointer]
504       mov  %rsp, %rbx
505       [if needs to restore base pointer]
506           mov %rsp, -MMM(%rbp)
507 
508   ; Emit CFI info
509   [if needs FP]
510       [for all callee-saved registers]
511           .cfi_offset %<reg>, (offset from %rbp)
512   [else]
513        .cfi_def_cfa_offset (offset from RETADDR)
514       [for all callee-saved registers]
515           .cfi_offset %<reg>, (offset from %rsp)
516 
517   Notes:
518   - .seh directives are emitted only for Windows 64 ABI
519   - .cfi directives are emitted for all other ABIs
520   - for 32-bit code, substitute %e?? registers for %r??
521 */
522 
emitPrologue(MachineFunction & MF) const523 void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
524   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
525   MachineBasicBlock::iterator MBBI = MBB.begin();
526   MachineFrameInfo *MFI = MF.getFrameInfo();
527   const Function *Fn = MF.getFunction();
528   const X86RegisterInfo *RegInfo =
529       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
530   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
531   MachineModuleInfo &MMI = MF.getMMI();
532   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
533   uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
534   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
535   bool HasFP = hasFP(MF);
536   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
537   bool Is64Bit = STI.is64Bit();
538   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
539   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
540   bool IsWin64 = STI.isTargetWin64();
541   // Not necessarily synonymous with IsWin64.
542   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
543   bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
544   bool NeedsDwarfCFI =
545       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
546   bool UseLEA = STI.useLeaForSP();
547   unsigned StackAlign = getStackAlignment();
548   unsigned SlotSize = RegInfo->getSlotSize();
549   unsigned FramePtr = RegInfo->getFrameRegister(MF);
550   const unsigned MachineFramePtr = STI.isTarget64BitILP32() ?
551                  getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
552   unsigned StackPtr = RegInfo->getStackRegister();
553   unsigned BasePtr = RegInfo->getBaseRegister();
554   DebugLoc DL;
555 
556   // If we're forcing a stack realignment we can't rely on just the frame
557   // info, we need to know the ABI stack alignment as well in case we
558   // have a call out.  Otherwise just make sure we have some alignment - we'll
559   // go with the minimum SlotSize.
560   if (ForceStackAlign) {
561     if (MFI->hasCalls())
562       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
563     else if (MaxAlign < SlotSize)
564       MaxAlign = SlotSize;
565   }
566 
567   // Add RETADDR move area to callee saved frame size.
568   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
569   if (TailCallReturnAddrDelta < 0)
570     X86FI->setCalleeSavedFrameSize(
571       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
572 
573   bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO());
574 
575   // The default stack probe size is 4096 if the function has no stackprobesize
576   // attribute.
577   unsigned StackProbeSize = 4096;
578   if (Fn->hasFnAttribute("stack-probe-size"))
579     Fn->getFnAttribute("stack-probe-size")
580         .getValueAsString()
581         .getAsInteger(0, StackProbeSize);
582 
583   // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
584   // function, and use up to 128 bytes of stack space, don't have a frame
585   // pointer, calls, or dynamic alloca then we do not need to adjust the
586   // stack pointer (we fit in the Red Zone). We also check that we don't
587   // push and pop from the stack.
588   if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
589                                                    Attribute::NoRedZone) &&
590       !RegInfo->needsStackRealignment(MF) &&
591       !MFI->hasVarSizedObjects() &&                     // No dynamic alloca.
592       !MFI->adjustsStack() &&                           // No calls.
593       !IsWin64 &&                                       // Win64 has no Red Zone
594       !usesTheStack(MF) &&                              // Don't push and pop.
595       !MF.shouldSplitStack()) {                         // Regular stack
596     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
597     if (HasFP) MinSize += SlotSize;
598     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
599     MFI->setStackSize(StackSize);
600   }
601 
602   // Insert stack pointer adjustment for later moving of return addr.  Only
603   // applies to tail call optimized functions where the callee argument stack
604   // size is bigger than the callers.
605   if (TailCallReturnAddrDelta < 0) {
606     MachineInstr *MI =
607       BuildMI(MBB, MBBI, DL,
608               TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
609               StackPtr)
610         .addReg(StackPtr)
611         .addImm(-TailCallReturnAddrDelta)
612         .setMIFlag(MachineInstr::FrameSetup);
613     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
614   }
615 
616   // Mapping for machine moves:
617   //
618   //   DST: VirtualFP AND
619   //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
620   //        ELSE                        => DW_CFA_def_cfa
621   //
622   //   SRC: VirtualFP AND
623   //        DST: Register               => DW_CFA_def_cfa_register
624   //
625   //   ELSE
626   //        OFFSET < 0                  => DW_CFA_offset_extended_sf
627   //        REG < 64                    => DW_CFA_offset + Reg
628   //        ELSE                        => DW_CFA_offset_extended
629 
630   uint64_t NumBytes = 0;
631   int stackGrowth = -SlotSize;
632 
633   if (HasFP) {
634     // Calculate required stack adjustment.
635     uint64_t FrameSize = StackSize - SlotSize;
636     // If required, include space for extra hidden slot for stashing base pointer.
637     if (X86FI->getRestoreBasePointer())
638       FrameSize += SlotSize;
639     if (RegInfo->needsStackRealignment(MF)) {
640       // Callee-saved registers are pushed on stack before the stack
641       // is realigned.
642       FrameSize -= X86FI->getCalleeSavedFrameSize();
643       NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
644     } else {
645       NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
646     }
647 
648     // Get the offset of the stack slot for the EBP register, which is
649     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
650     // Update the frame offset adjustment.
651     MFI->setOffsetAdjustment(-NumBytes);
652 
653     // Save EBP/RBP into the appropriate stack slot.
654     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
655       .addReg(MachineFramePtr, RegState::Kill)
656       .setMIFlag(MachineInstr::FrameSetup);
657 
658     if (NeedsDwarfCFI) {
659       // Mark the place where EBP/RBP was saved.
660       // Define the current CFA rule to use the provided offset.
661       assert(StackSize);
662       unsigned CFIIndex = MMI.addFrameInst(
663           MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
664       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
665           .addCFIIndex(CFIIndex);
666 
667       // Change the rule for the FramePtr to be an "offset" rule.
668       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
669       CFIIndex = MMI.addFrameInst(
670           MCCFIInstruction::createOffset(nullptr,
671                                          DwarfFramePtr, 2 * stackGrowth));
672       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
673           .addCFIIndex(CFIIndex);
674     }
675 
676     if (NeedsWinEH) {
677       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
678           .addImm(FramePtr)
679           .setMIFlag(MachineInstr::FrameSetup);
680     }
681 
682     // Update EBP with the new base value.
683     BuildMI(MBB, MBBI, DL,
684             TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
685         .addReg(StackPtr)
686         .setMIFlag(MachineInstr::FrameSetup);
687 
688     if (NeedsDwarfCFI) {
689       // Mark effective beginning of when frame pointer becomes valid.
690       // Define the current CFA to use the EBP/RBP register.
691       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
692       unsigned CFIIndex = MMI.addFrameInst(
693           MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
694       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
695           .addCFIIndex(CFIIndex);
696     }
697 
698     // Mark the FramePtr as live-in in every block.
699     for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
700       I->addLiveIn(MachineFramePtr);
701   } else {
702     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
703   }
704 
705   // Skip the callee-saved push instructions.
706   bool PushedRegs = false;
707   int StackOffset = 2 * stackGrowth;
708 
709   while (MBBI != MBB.end() &&
710          (MBBI->getOpcode() == X86::PUSH32r ||
711           MBBI->getOpcode() == X86::PUSH64r)) {
712     PushedRegs = true;
713     unsigned Reg = MBBI->getOperand(0).getReg();
714     ++MBBI;
715 
716     if (!HasFP && NeedsDwarfCFI) {
717       // Mark callee-saved push instruction.
718       // Define the current CFA rule to use the provided offset.
719       assert(StackSize);
720       unsigned CFIIndex = MMI.addFrameInst(
721           MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
722       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
723           .addCFIIndex(CFIIndex);
724       StackOffset += stackGrowth;
725     }
726 
727     if (NeedsWinEH) {
728       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
729           MachineInstr::FrameSetup);
730     }
731   }
732 
733   // Realign stack after we pushed callee-saved registers (so that we'll be
734   // able to calculate their offsets from the frame pointer).
735   if (RegInfo->needsStackRealignment(MF)) {
736     assert(HasFP && "There should be a frame pointer if stack is realigned.");
737     uint64_t Val = -MaxAlign;
738     MachineInstr *MI =
739       BuildMI(MBB, MBBI, DL,
740               TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
741       .addReg(StackPtr)
742       .addImm(Val)
743       .setMIFlag(MachineInstr::FrameSetup);
744 
745     // The EFLAGS implicit def is dead.
746     MI->getOperand(3).setIsDead();
747   }
748 
749   // If there is an SUB32ri of ESP immediately before this instruction, merge
750   // the two. This can be the case when tail call elimination is enabled and
751   // the callee has more arguments then the caller.
752   NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
753 
754   // If there is an ADD32ri or SUB32ri of ESP immediately after this
755   // instruction, merge the two instructions.
756   mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
757 
758   // Adjust stack pointer: ESP -= numbytes.
759 
760   // Windows and cygwin/mingw require a prologue helper routine when allocating
761   // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
762   // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
763   // stack and adjust the stack pointer in one go.  The 64-bit version of
764   // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
765   // responsible for adjusting the stack pointer.  Touching the stack at 4K
766   // increments is necessary to ensure that the guard pages used by the OS
767   // virtual memory manager are allocated in correct sequence.
768   if (NumBytes >= StackProbeSize && UseStackProbe) {
769     // Check whether EAX is livein for this function.
770     bool isEAXAlive = isEAXLiveIn(MF);
771 
772     if (isEAXAlive) {
773       // Sanity check that EAX is not livein for this function.
774       // It should not be, so throw an assert.
775       assert(!Is64Bit && "EAX is livein in x64 case!");
776 
777       // Save EAX
778       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
779         .addReg(X86::EAX, RegState::Kill)
780         .setMIFlag(MachineInstr::FrameSetup);
781     }
782 
783     if (Is64Bit) {
784       // Handle the 64-bit Windows ABI case where we need to call __chkstk.
785       // Function prologue is responsible for adjusting the stack pointer.
786       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
787         .addImm(NumBytes)
788         .setMIFlag(MachineInstr::FrameSetup);
789     } else {
790       // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
791       // We'll also use 4 already allocated bytes for EAX.
792       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
793         .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
794         .setMIFlag(MachineInstr::FrameSetup);
795     }
796 
797     // Save a pointer to the MI where we set AX.
798     MachineBasicBlock::iterator SetRAX = MBBI;
799     --SetRAX;
800 
801     // Call __chkstk, __chkstk_ms, or __alloca.
802     emitStackProbeCall(MF, MBB, MBBI, DL);
803 
804     // Apply the frame setup flag to all inserted instrs.
805     for (; SetRAX != MBBI; ++SetRAX)
806       SetRAX->setFlag(MachineInstr::FrameSetup);
807 
808     if (isEAXAlive) {
809       // Restore EAX
810       MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
811                                               X86::EAX),
812                                       StackPtr, false, NumBytes - 4);
813       MI->setFlag(MachineInstr::FrameSetup);
814       MBB.insert(MBBI, MI);
815     }
816   } else if (NumBytes) {
817     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
818                  UseLEA, TII, *RegInfo);
819   }
820 
821   int SEHFrameOffset = 0;
822   if (NeedsWinEH) {
823     if (HasFP) {
824       // We need to set frame base offset low enough such that all saved
825       // register offsets would be positive relative to it, but we can't
826       // just use NumBytes, because .seh_setframe offset must be <=240.
827       // So we pretend to have only allocated enough space to spill the
828       // non-volatile registers.
829       // We don't care about the rest of stack allocation, because unwinder
830       // will restore SP to (BP - SEHFrameOffset)
831       for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
832         int offset = MFI->getObjectOffset(Info.getFrameIdx());
833         SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
834       }
835       SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
836 
837       // This only needs to account for XMM spill slots, GPR slots
838       // are covered by the .seh_pushreg's emitted above.
839       unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
840       if (Size) {
841         BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
842             .addImm(Size)
843             .setMIFlag(MachineInstr::FrameSetup);
844       }
845 
846       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
847           .addImm(FramePtr)
848           .addImm(SEHFrameOffset)
849           .setMIFlag(MachineInstr::FrameSetup);
850     } else {
851       // SP will be the base register for restoring XMMs
852       if (NumBytes) {
853         BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
854             .addImm(NumBytes)
855             .setMIFlag(MachineInstr::FrameSetup);
856       }
857     }
858   }
859 
860   // Skip the rest of register spilling code
861   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
862     ++MBBI;
863 
864   // Emit SEH info for non-GPRs
865   if (NeedsWinEH) {
866     for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
867       unsigned Reg = Info.getReg();
868       if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
869         continue;
870       assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
871 
872       int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
873       Offset += SEHFrameOffset;
874 
875       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
876           .addImm(Reg)
877           .addImm(Offset)
878           .setMIFlag(MachineInstr::FrameSetup);
879     }
880 
881     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
882         .setMIFlag(MachineInstr::FrameSetup);
883   }
884 
885   // If we need a base pointer, set it up here. It's whatever the value
886   // of the stack pointer is at this point. Any variable size objects
887   // will be allocated after this, so we can still use the base pointer
888   // to reference locals.
889   if (RegInfo->hasBasePointer(MF)) {
890     // Update the base pointer with the current stack pointer.
891     unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
892     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
893       .addReg(StackPtr)
894       .setMIFlag(MachineInstr::FrameSetup);
895     if (X86FI->getRestoreBasePointer()) {
896       // Stash value of base pointer.  Saving RSP instead of EBP shortens dependence chain.
897       unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
898       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
899                    FramePtr, true, X86FI->getRestoreBasePointerOffset())
900         .addReg(StackPtr)
901         .setMIFlag(MachineInstr::FrameSetup);
902     }
903   }
904 
905   if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
906     // Mark end of stack pointer adjustment.
907     if (!HasFP && NumBytes) {
908       // Define the current CFA rule to use the provided offset.
909       assert(StackSize);
910       unsigned CFIIndex = MMI.addFrameInst(
911           MCCFIInstruction::createDefCfaOffset(nullptr,
912                                                -StackSize + stackGrowth));
913 
914       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
915           .addCFIIndex(CFIIndex);
916     }
917 
918     // Emit DWARF info specifying the offsets of the callee-saved registers.
919     if (PushedRegs)
920       emitCalleeSavedFrameMoves(MBB, MBBI, DL);
921   }
922 }
923 
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const924 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
925                                     MachineBasicBlock &MBB) const {
926   const MachineFrameInfo *MFI = MF.getFrameInfo();
927   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
928   const X86RegisterInfo *RegInfo =
929       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
930   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
931   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
932   assert(MBBI != MBB.end() && "Returning block has no instructions");
933   unsigned RetOpcode = MBBI->getOpcode();
934   DebugLoc DL = MBBI->getDebugLoc();
935   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
936   bool Is64Bit = STI.is64Bit();
937   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
938   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
939   const bool Is64BitILP32 = STI.isTarget64BitILP32();
940   bool UseLEA = STI.useLeaForSP();
941   unsigned StackAlign = getStackAlignment();
942   unsigned SlotSize = RegInfo->getSlotSize();
943   unsigned FramePtr = RegInfo->getFrameRegister(MF);
944   unsigned MachineFramePtr = Is64BitILP32 ?
945              getX86SubSuperRegister(FramePtr, MVT::i64, false) : FramePtr;
946   unsigned StackPtr = RegInfo->getStackRegister();
947 
948   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
949   bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
950 
951   switch (RetOpcode) {
952   default:
953     llvm_unreachable("Can only insert epilog into returning blocks");
954   case X86::RETQ:
955   case X86::RETL:
956   case X86::RETIL:
957   case X86::RETIQ:
958   case X86::TCRETURNdi:
959   case X86::TCRETURNri:
960   case X86::TCRETURNmi:
961   case X86::TCRETURNdi64:
962   case X86::TCRETURNri64:
963   case X86::TCRETURNmi64:
964   case X86::EH_RETURN:
965   case X86::EH_RETURN64:
966     break;  // These are ok
967   }
968 
969   // Get the number of bytes to allocate from the FrameInfo.
970   uint64_t StackSize = MFI->getStackSize();
971   uint64_t MaxAlign  = MFI->getMaxAlignment();
972   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
973   uint64_t NumBytes = 0;
974 
975   // If we're forcing a stack realignment we can't rely on just the frame
976   // info, we need to know the ABI stack alignment as well in case we
977   // have a call out.  Otherwise just make sure we have some alignment - we'll
978   // go with the minimum.
979   if (ForceStackAlign) {
980     if (MFI->hasCalls())
981       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
982     else
983       MaxAlign = MaxAlign ? MaxAlign : 4;
984   }
985 
986   if (hasFP(MF)) {
987     // Calculate required stack adjustment.
988     uint64_t FrameSize = StackSize - SlotSize;
989     if (RegInfo->needsStackRealignment(MF)) {
990       // Callee-saved registers were pushed on stack before the stack
991       // was realigned.
992       FrameSize -= CSSize;
993       NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
994     } else {
995       NumBytes = FrameSize - CSSize;
996     }
997 
998     // Pop EBP.
999     BuildMI(MBB, MBBI, DL,
1000             TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
1001   } else {
1002     NumBytes = StackSize - CSSize;
1003   }
1004 
1005   // Skip the callee-saved pop instructions.
1006   while (MBBI != MBB.begin()) {
1007     MachineBasicBlock::iterator PI = std::prev(MBBI);
1008     unsigned Opc = PI->getOpcode();
1009 
1010     if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
1011         !PI->isTerminator())
1012       break;
1013 
1014     --MBBI;
1015   }
1016   MachineBasicBlock::iterator FirstCSPop = MBBI;
1017 
1018   DL = MBBI->getDebugLoc();
1019 
1020   // If there is an ADD32ri or SUB32ri of ESP immediately before this
1021   // instruction, merge the two instructions.
1022   if (NumBytes || MFI->hasVarSizedObjects())
1023     mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
1024 
1025   // If dynamic alloca is used, then reset esp to point to the last callee-saved
1026   // slot before popping them off! Same applies for the case, when stack was
1027   // realigned.
1028   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
1029     if (RegInfo->needsStackRealignment(MF))
1030       MBBI = FirstCSPop;
1031     if (CSSize != 0) {
1032       unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
1033       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
1034                    FramePtr, false, -CSSize);
1035       --MBBI;
1036     } else {
1037       unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
1038       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
1039         .addReg(FramePtr);
1040       --MBBI;
1041     }
1042   } else if (NumBytes) {
1043     // Adjust stack pointer back: ESP += numbytes.
1044     emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr, UseLEA,
1045                  TII, *RegInfo);
1046     --MBBI;
1047   }
1048 
1049   // Windows unwinder will not invoke function's exception handler if IP is
1050   // either in prologue or in epilogue.  This behavior causes a problem when a
1051   // call immediately precedes an epilogue, because the return address points
1052   // into the epilogue.  To cope with that, we insert an epilogue marker here,
1053   // then replace it with a 'nop' if it ends up immediately after a CALL in the
1054   // final emitted code.
1055   if (NeedsWinEH)
1056     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
1057 
1058   // We're returning from function via eh_return.
1059   if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
1060     MBBI = MBB.getLastNonDebugInstr();
1061     MachineOperand &DestAddr  = MBBI->getOperand(0);
1062     assert(DestAddr.isReg() && "Offset should be in register!");
1063     BuildMI(MBB, MBBI, DL,
1064             TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1065             StackPtr).addReg(DestAddr.getReg());
1066   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
1067              RetOpcode == X86::TCRETURNmi ||
1068              RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
1069              RetOpcode == X86::TCRETURNmi64) {
1070     bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
1071     // Tail call return: adjust the stack pointer and jump to callee.
1072     MBBI = MBB.getLastNonDebugInstr();
1073     MachineOperand &JumpTarget = MBBI->getOperand(0);
1074     MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
1075     assert(StackAdjust.isImm() && "Expecting immediate value.");
1076 
1077     // Adjust stack pointer.
1078     int StackAdj = StackAdjust.getImm();
1079     int MaxTCDelta = X86FI->getTCReturnAddrDelta();
1080     int Offset = 0;
1081     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
1082 
1083     // Incoporate the retaddr area.
1084     Offset = StackAdj-MaxTCDelta;
1085     assert(Offset >= 0 && "Offset should never be negative");
1086 
1087     if (Offset) {
1088       // Check for possible merge with preceding ADD instruction.
1089       Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1090       emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
1091                    UseLEA, TII, *RegInfo);
1092     }
1093 
1094     // Jump to label or value in register.
1095     if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
1096       MachineInstrBuilder MIB =
1097         BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
1098                                        ? X86::TAILJMPd : X86::TAILJMPd64));
1099       if (JumpTarget.isGlobal())
1100         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
1101                              JumpTarget.getTargetFlags());
1102       else {
1103         assert(JumpTarget.isSymbol());
1104         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
1105                               JumpTarget.getTargetFlags());
1106       }
1107     } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
1108       MachineInstrBuilder MIB =
1109         BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
1110                                        ? X86::TAILJMPm : X86::TAILJMPm64));
1111       for (unsigned i = 0; i != 5; ++i)
1112         MIB.addOperand(MBBI->getOperand(i));
1113     } else if (RetOpcode == X86::TCRETURNri64) {
1114       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
1115         addReg(JumpTarget.getReg(), RegState::Kill);
1116     } else {
1117       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
1118         addReg(JumpTarget.getReg(), RegState::Kill);
1119     }
1120 
1121     MachineInstr *NewMI = std::prev(MBBI);
1122     NewMI->copyImplicitOps(MF, MBBI);
1123 
1124     // Delete the pseudo instruction TCRETURN.
1125     MBB.erase(MBBI);
1126   } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL ||
1127               RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) &&
1128              (X86FI->getTCReturnAddrDelta() < 0)) {
1129     // Add the return addr area delta back since we are not tail calling.
1130     int delta = -1*X86FI->getTCReturnAddrDelta();
1131     MBBI = MBB.getLastNonDebugInstr();
1132 
1133     // Check for possible merge with preceding ADD instruction.
1134     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
1135     emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr, UseLEA, TII,
1136                  *RegInfo);
1137   }
1138 }
1139 
getFrameIndexOffset(const MachineFunction & MF,int FI) const1140 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
1141                                           int FI) const {
1142   const X86RegisterInfo *RegInfo =
1143       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
1144   const MachineFrameInfo *MFI = MF.getFrameInfo();
1145   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
1146   uint64_t StackSize = MFI->getStackSize();
1147 
1148   if (RegInfo->hasBasePointer(MF)) {
1149     assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
1150     if (FI < 0) {
1151       // Skip the saved EBP.
1152       return Offset + RegInfo->getSlotSize();
1153     } else {
1154       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
1155       return Offset + StackSize;
1156     }
1157   } else if (RegInfo->needsStackRealignment(MF)) {
1158     if (FI < 0) {
1159       // Skip the saved EBP.
1160       return Offset + RegInfo->getSlotSize();
1161     } else {
1162       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
1163       return Offset + StackSize;
1164     }
1165     // FIXME: Support tail calls
1166   } else {
1167     if (!hasFP(MF))
1168       return Offset + StackSize;
1169 
1170     // Skip the saved EBP.
1171     Offset += RegInfo->getSlotSize();
1172 
1173     // Skip the RETADDR move area
1174     const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1175     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1176     if (TailCallReturnAddrDelta < 0)
1177       Offset -= TailCallReturnAddrDelta;
1178   }
1179 
1180   return Offset;
1181 }
1182 
getFrameIndexReference(const MachineFunction & MF,int FI,unsigned & FrameReg) const1183 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
1184                                              unsigned &FrameReg) const {
1185   const X86RegisterInfo *RegInfo =
1186       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
1187   // We can't calculate offset from frame pointer if the stack is realigned,
1188   // so enforce usage of stack/base pointer.  The base pointer is used when we
1189   // have dynamic allocas in addition to dynamic realignment.
1190   if (RegInfo->hasBasePointer(MF))
1191     FrameReg = RegInfo->getBaseRegister();
1192   else if (RegInfo->needsStackRealignment(MF))
1193     FrameReg = RegInfo->getStackRegister();
1194   else
1195     FrameReg = RegInfo->getFrameRegister(MF);
1196   return getFrameIndexOffset(MF, FI);
1197 }
1198 
1199 // Simplified from getFrameIndexOffset keeping only StackPointer cases
getFrameIndexOffsetFromSP(const MachineFunction & MF,int FI) const1200 int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
1201   const MachineFrameInfo *MFI = MF.getFrameInfo();
1202   // Does not include any dynamic realign.
1203   const uint64_t StackSize = MFI->getStackSize();
1204   {
1205 #ifndef NDEBUG
1206     const X86RegisterInfo *RegInfo =
1207       static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
1208     // Note: LLVM arranges the stack as:
1209     // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
1210     //      > "Stack Slots" (<--SP)
1211     // We can always address StackSlots from RSP.  We can usually (unless
1212     // needsStackRealignment) address CSRs from RSP, but sometimes need to
1213     // address them from RBP.  FixedObjects can be placed anywhere in the stack
1214     // frame depending on their specific requirements (i.e. we can actually
1215     // refer to arguments to the function which are stored in the *callers*
1216     // frame).  As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
1217     // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
1218 
1219     assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
1220 
1221     // We don't handle tail calls, and shouldn't be seeing them
1222     // either.
1223     int TailCallReturnAddrDelta =
1224         MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
1225     assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
1226 #endif
1227   }
1228 
1229   // This is how the math works out:
1230   //
1231   //  %rsp grows (i.e. gets lower) left to right. Each box below is
1232   //  one word (eight bytes).  Obj0 is the stack slot we're trying to
1233   //  get to.
1234   //
1235   //    ----------------------------------
1236   //    | BP | Obj0 | Obj1 | ... | ObjN |
1237   //    ----------------------------------
1238   //    ^    ^      ^                   ^
1239   //    A    B      C                   E
1240   //
1241   // A is the incoming stack pointer.
1242   // (B - A) is the local area offset (-8 for x86-64) [1]
1243   // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
1244   //
1245   // |(E - B)| is the StackSize (absolute value, positive).  For a
1246   // stack that grown down, this works out to be (B - E). [3]
1247   //
1248   // E is also the value of %rsp after stack has been set up, and we
1249   // want (C - E) -- the value we can add to %rsp to get to Obj0.  Now
1250   // (C - E) == (C - A) - (B - A) + (B - E)
1251   //            { Using [1], [2] and [3] above }
1252   //         == getObjectOffset - LocalAreaOffset + StackSize
1253   //
1254 
1255   // Get the Offset from the StackPointer
1256   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
1257 
1258   return Offset + StackSize;
1259 }
1260 // Simplified from getFrameIndexReference keeping only StackPointer cases
getFrameIndexReferenceFromSP(const MachineFunction & MF,int FI,unsigned & FrameReg) const1261 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
1262                                                   unsigned &FrameReg) const {
1263   const X86RegisterInfo *RegInfo =
1264     static_cast<const X86RegisterInfo*>(MF.getSubtarget().getRegisterInfo());
1265 
1266   assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
1267 
1268   FrameReg = RegInfo->getStackRegister();
1269   return getFrameIndexOffsetFromSP(MF, FI);
1270 }
1271 
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const1272 bool X86FrameLowering::assignCalleeSavedSpillSlots(
1273     MachineFunction &MF, const TargetRegisterInfo *TRI,
1274     std::vector<CalleeSavedInfo> &CSI) const {
1275   MachineFrameInfo *MFI = MF.getFrameInfo();
1276   const X86RegisterInfo *RegInfo =
1277       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
1278   unsigned SlotSize = RegInfo->getSlotSize();
1279   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1280 
1281   unsigned CalleeSavedFrameSize = 0;
1282   int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
1283 
1284   if (hasFP(MF)) {
1285     // emitPrologue always spills frame register the first thing.
1286     SpillSlotOffset -= SlotSize;
1287     MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
1288 
1289     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
1290     // the frame register, we can delete it from CSI list and not have to worry
1291     // about avoiding it later.
1292     unsigned FPReg = RegInfo->getFrameRegister(MF);
1293     for (unsigned i = 0; i < CSI.size(); ++i) {
1294       if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
1295         CSI.erase(CSI.begin() + i);
1296         break;
1297       }
1298     }
1299   }
1300 
1301   // Assign slots for GPRs. It increases frame size.
1302   for (unsigned i = CSI.size(); i != 0; --i) {
1303     unsigned Reg = CSI[i - 1].getReg();
1304 
1305     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
1306       continue;
1307 
1308     SpillSlotOffset -= SlotSize;
1309     CalleeSavedFrameSize += SlotSize;
1310 
1311     int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
1312     CSI[i - 1].setFrameIdx(SlotIndex);
1313   }
1314 
1315   X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
1316 
1317   // Assign slots for XMMs.
1318   for (unsigned i = CSI.size(); i != 0; --i) {
1319     unsigned Reg = CSI[i - 1].getReg();
1320     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
1321       continue;
1322 
1323     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
1324     // ensure alignment
1325     SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
1326     // spill into slot
1327     SpillSlotOffset -= RC->getSize();
1328     int SlotIndex =
1329         MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
1330     CSI[i - 1].setFrameIdx(SlotIndex);
1331     MFI->ensureMaxAlignment(RC->getAlignment());
1332   }
1333 
1334   return true;
1335 }
1336 
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const std::vector<CalleeSavedInfo> & CSI,const TargetRegisterInfo * TRI) const1337 bool X86FrameLowering::spillCalleeSavedRegisters(
1338     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1339     const std::vector<CalleeSavedInfo> &CSI,
1340     const TargetRegisterInfo *TRI) const {
1341   DebugLoc DL = MBB.findDebugLoc(MI);
1342 
1343   MachineFunction &MF = *MBB.getParent();
1344   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1345   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
1346 
1347   // Push GPRs. It increases frame size.
1348   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
1349   for (unsigned i = CSI.size(); i != 0; --i) {
1350     unsigned Reg = CSI[i - 1].getReg();
1351 
1352     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
1353       continue;
1354     // Add the callee-saved register as live-in. It's killed at the spill.
1355     MBB.addLiveIn(Reg);
1356 
1357     BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
1358       .setMIFlag(MachineInstr::FrameSetup);
1359   }
1360 
1361   // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
1362   // It can be done by spilling XMMs to stack frame.
1363   for (unsigned i = CSI.size(); i != 0; --i) {
1364     unsigned Reg = CSI[i-1].getReg();
1365     if (X86::GR64RegClass.contains(Reg) ||
1366         X86::GR32RegClass.contains(Reg))
1367       continue;
1368     // Add the callee-saved register as live-in. It's killed at the spill.
1369     MBB.addLiveIn(Reg);
1370     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
1371 
1372     TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
1373                             TRI);
1374     --MI;
1375     MI->setFlag(MachineInstr::FrameSetup);
1376     ++MI;
1377   }
1378 
1379   return true;
1380 }
1381 
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const std::vector<CalleeSavedInfo> & CSI,const TargetRegisterInfo * TRI) const1382 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
1383                                                MachineBasicBlock::iterator MI,
1384                                         const std::vector<CalleeSavedInfo> &CSI,
1385                                           const TargetRegisterInfo *TRI) const {
1386   if (CSI.empty())
1387     return false;
1388 
1389   DebugLoc DL = MBB.findDebugLoc(MI);
1390 
1391   MachineFunction &MF = *MBB.getParent();
1392   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1393   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
1394 
1395   // Reload XMMs from stack frame.
1396   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1397     unsigned Reg = CSI[i].getReg();
1398     if (X86::GR64RegClass.contains(Reg) ||
1399         X86::GR32RegClass.contains(Reg))
1400       continue;
1401 
1402     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
1403     TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
1404   }
1405 
1406   // POP GPRs.
1407   unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
1408   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1409     unsigned Reg = CSI[i].getReg();
1410     if (!X86::GR64RegClass.contains(Reg) &&
1411         !X86::GR32RegClass.contains(Reg))
1412       continue;
1413 
1414     BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
1415   }
1416   return true;
1417 }
1418 
1419 void
processFunctionBeforeCalleeSavedScan(MachineFunction & MF,RegScavenger * RS) const1420 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
1421                                                        RegScavenger *RS) const {
1422   MachineFrameInfo *MFI = MF.getFrameInfo();
1423   const X86RegisterInfo *RegInfo =
1424       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
1425   unsigned SlotSize = RegInfo->getSlotSize();
1426 
1427   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1428   int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1429 
1430   if (TailCallReturnAddrDelta < 0) {
1431     // create RETURNADDR area
1432     //   arg
1433     //   arg
1434     //   RETADDR
1435     //   { ...
1436     //     RETADDR area
1437     //     ...
1438     //   }
1439     //   [EBP]
1440     MFI->CreateFixedObject(-TailCallReturnAddrDelta,
1441                            TailCallReturnAddrDelta - SlotSize, true);
1442   }
1443 
1444   // Spill the BasePtr if it's used.
1445   if (RegInfo->hasBasePointer(MF))
1446     MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
1447 }
1448 
1449 static bool
HasNestArgument(const MachineFunction * MF)1450 HasNestArgument(const MachineFunction *MF) {
1451   const Function *F = MF->getFunction();
1452   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
1453        I != E; I++) {
1454     if (I->hasNestAttr())
1455       return true;
1456   }
1457   return false;
1458 }
1459 
1460 /// GetScratchRegister - Get a temp register for performing work in the
1461 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
1462 /// and the properties of the function either one or two registers will be
1463 /// needed. Set primary to true for the first register, false for the second.
1464 static unsigned
GetScratchRegister(bool Is64Bit,bool IsLP64,const MachineFunction & MF,bool Primary)1465 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
1466   CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
1467 
1468   // Erlang stuff.
1469   if (CallingConvention == CallingConv::HiPE) {
1470     if (Is64Bit)
1471       return Primary ? X86::R14 : X86::R13;
1472     else
1473       return Primary ? X86::EBX : X86::EDI;
1474   }
1475 
1476   if (Is64Bit) {
1477     if (IsLP64)
1478       return Primary ? X86::R11 : X86::R12;
1479     else
1480       return Primary ? X86::R11D : X86::R12D;
1481   }
1482 
1483   bool IsNested = HasNestArgument(&MF);
1484 
1485   if (CallingConvention == CallingConv::X86_FastCall ||
1486       CallingConvention == CallingConv::Fast) {
1487     if (IsNested)
1488       report_fatal_error("Segmented stacks does not support fastcall with "
1489                          "nested function.");
1490     return Primary ? X86::EAX : X86::ECX;
1491   }
1492   if (IsNested)
1493     return Primary ? X86::EDX : X86::EAX;
1494   return Primary ? X86::ECX : X86::EAX;
1495 }
1496 
1497 // The stack limit in the TCB is set to this many bytes above the actual stack
1498 // limit.
1499 static const uint64_t kSplitStackAvailable = 256;
1500 
1501 void
adjustForSegmentedStacks(MachineFunction & MF) const1502 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
1503   MachineBasicBlock &prologueMBB = MF.front();
1504   MachineFrameInfo *MFI = MF.getFrameInfo();
1505   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1506   uint64_t StackSize;
1507   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
1508   bool Is64Bit = STI.is64Bit();
1509   const bool IsLP64 = STI.isTarget64BitLP64();
1510   unsigned TlsReg, TlsOffset;
1511   DebugLoc DL;
1512 
1513   unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
1514   assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
1515          "Scratch register is live-in");
1516 
1517   if (MF.getFunction()->isVarArg())
1518     report_fatal_error("Segmented stacks do not support vararg functions.");
1519   if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
1520       !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
1521       !STI.isTargetDragonFly())
1522     report_fatal_error("Segmented stacks not supported on this platform.");
1523 
1524   // Eventually StackSize will be calculated by a link-time pass; which will
1525   // also decide whether checking code needs to be injected into this particular
1526   // prologue.
1527   StackSize = MFI->getStackSize();
1528 
1529   // Do not generate a prologue for functions with a stack of size zero
1530   if (StackSize == 0)
1531     return;
1532 
1533   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
1534   MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
1535   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1536   bool IsNested = false;
1537 
1538   // We need to know if the function has a nest argument only in 64 bit mode.
1539   if (Is64Bit)
1540     IsNested = HasNestArgument(&MF);
1541 
1542   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
1543   // allocMBB needs to be last (terminating) instruction.
1544 
1545   for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
1546          e = prologueMBB.livein_end(); i != e; i++) {
1547     allocMBB->addLiveIn(*i);
1548     checkMBB->addLiveIn(*i);
1549   }
1550 
1551   if (IsNested)
1552     allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
1553 
1554   MF.push_front(allocMBB);
1555   MF.push_front(checkMBB);
1556 
1557   // When the frame size is less than 256 we just compare the stack
1558   // boundary directly to the value of the stack pointer, per gcc.
1559   bool CompareStackPointer = StackSize < kSplitStackAvailable;
1560 
1561   // Read the limit off the current stacklet off the stack_guard location.
1562   if (Is64Bit) {
1563     if (STI.isTargetLinux()) {
1564       TlsReg = X86::FS;
1565       TlsOffset = IsLP64 ? 0x70 : 0x40;
1566     } else if (STI.isTargetDarwin()) {
1567       TlsReg = X86::GS;
1568       TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
1569     } else if (STI.isTargetWin64()) {
1570       TlsReg = X86::GS;
1571       TlsOffset = 0x28; // pvArbitrary, reserved for application use
1572     } else if (STI.isTargetFreeBSD()) {
1573       TlsReg = X86::FS;
1574       TlsOffset = 0x18;
1575     } else if (STI.isTargetDragonFly()) {
1576       TlsReg = X86::FS;
1577       TlsOffset = 0x20; // use tls_tcb.tcb_segstack
1578     } else {
1579       report_fatal_error("Segmented stacks not supported on this platform.");
1580     }
1581 
1582     if (CompareStackPointer)
1583       ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
1584     else
1585       BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
1586         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
1587 
1588     BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
1589       .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
1590   } else {
1591     if (STI.isTargetLinux()) {
1592       TlsReg = X86::GS;
1593       TlsOffset = 0x30;
1594     } else if (STI.isTargetDarwin()) {
1595       TlsReg = X86::GS;
1596       TlsOffset = 0x48 + 90*4;
1597     } else if (STI.isTargetWin32()) {
1598       TlsReg = X86::FS;
1599       TlsOffset = 0x14; // pvArbitrary, reserved for application use
1600     } else if (STI.isTargetDragonFly()) {
1601       TlsReg = X86::FS;
1602       TlsOffset = 0x10; // use tls_tcb.tcb_segstack
1603     } else if (STI.isTargetFreeBSD()) {
1604       report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
1605     } else {
1606       report_fatal_error("Segmented stacks not supported on this platform.");
1607     }
1608 
1609     if (CompareStackPointer)
1610       ScratchReg = X86::ESP;
1611     else
1612       BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
1613         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
1614 
1615     if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
1616         STI.isTargetDragonFly()) {
1617       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
1618         .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
1619     } else if (STI.isTargetDarwin()) {
1620 
1621       // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
1622       unsigned ScratchReg2;
1623       bool SaveScratch2;
1624       if (CompareStackPointer) {
1625         // The primary scratch register is available for holding the TLS offset.
1626         ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
1627         SaveScratch2 = false;
1628       } else {
1629         // Need to use a second register to hold the TLS offset
1630         ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
1631 
1632         // Unfortunately, with fastcc the second scratch register may hold an
1633         // argument.
1634         SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
1635       }
1636 
1637       // If Scratch2 is live-in then it needs to be saved.
1638       assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
1639              "Scratch register is live-in and not saved");
1640 
1641       if (SaveScratch2)
1642         BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
1643           .addReg(ScratchReg2, RegState::Kill);
1644 
1645       BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
1646         .addImm(TlsOffset);
1647       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
1648         .addReg(ScratchReg)
1649         .addReg(ScratchReg2).addImm(1).addReg(0)
1650         .addImm(0)
1651         .addReg(TlsReg);
1652 
1653       if (SaveScratch2)
1654         BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
1655     }
1656   }
1657 
1658   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
1659   // It jumps to normal execution of the function body.
1660   BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB);
1661 
1662   // On 32 bit we first push the arguments size and then the frame size. On 64
1663   // bit, we pass the stack frame size in r10 and the argument size in r11.
1664   if (Is64Bit) {
1665     // Functions with nested arguments use R10, so it needs to be saved across
1666     // the call to _morestack
1667 
1668     const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
1669     const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
1670     const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
1671     const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
1672     const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
1673 
1674     if (IsNested)
1675       BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
1676 
1677     BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
1678       .addImm(StackSize);
1679     BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
1680       .addImm(X86FI->getArgumentStackSize());
1681     MF.getRegInfo().setPhysRegUsed(Reg10);
1682     MF.getRegInfo().setPhysRegUsed(Reg11);
1683   } else {
1684     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
1685       .addImm(X86FI->getArgumentStackSize());
1686     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
1687       .addImm(StackSize);
1688   }
1689 
1690   // __morestack is in libgcc
1691   if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
1692     // Under the large code model, we cannot assume that __morestack lives
1693     // within 2^31 bytes of the call site, so we cannot use pc-relative
1694     // addressing. We cannot perform the call via a temporary register,
1695     // as the rax register may be used to store the static chain, and all
1696     // other suitable registers may be either callee-save or used for
1697     // parameter passing. We cannot use the stack at this point either
1698     // because __morestack manipulates the stack directly.
1699     //
1700     // To avoid these issues, perform an indirect call via a read-only memory
1701     // location containing the address.
1702     //
1703     // This solution is not perfect, as it assumes that the .rodata section
1704     // is laid out within 2^31 bytes of each function body, but this seems
1705     // to be sufficient for JIT.
1706     BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
1707         .addReg(X86::RIP)
1708         .addImm(0)
1709         .addReg(0)
1710         .addExternalSymbol("__morestack_addr")
1711         .addReg(0);
1712     MF.getMMI().setUsesMorestackAddr(true);
1713   } else {
1714     if (Is64Bit)
1715       BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
1716         .addExternalSymbol("__morestack");
1717     else
1718       BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
1719         .addExternalSymbol("__morestack");
1720   }
1721 
1722   if (IsNested)
1723     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
1724   else
1725     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
1726 
1727   allocMBB->addSuccessor(&prologueMBB);
1728 
1729   checkMBB->addSuccessor(allocMBB);
1730   checkMBB->addSuccessor(&prologueMBB);
1731 
1732 #ifdef XDEBUG
1733   MF.verify();
1734 #endif
1735 }
1736 
1737 /// Erlang programs may need a special prologue to handle the stack size they
1738 /// might need at runtime. That is because Erlang/OTP does not implement a C
1739 /// stack but uses a custom implementation of hybrid stack/heap architecture.
1740 /// (for more information see Eric Stenman's Ph.D. thesis:
1741 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
1742 ///
1743 /// CheckStack:
1744 ///       temp0 = sp - MaxStack
1745 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
1746 /// OldStart:
1747 ///       ...
1748 /// IncStack:
1749 ///       call inc_stack   # doubles the stack space
1750 ///       temp0 = sp - MaxStack
1751 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
adjustForHiPEPrologue(MachineFunction & MF) const1752 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
1753   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1754   MachineFrameInfo *MFI = MF.getFrameInfo();
1755   const unsigned SlotSize =
1756       static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo())
1757           ->getSlotSize();
1758   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
1759   const bool Is64Bit = STI.is64Bit();
1760   const bool IsLP64 = STI.isTarget64BitLP64();
1761   DebugLoc DL;
1762   // HiPE-specific values
1763   const unsigned HipeLeafWords = 24;
1764   const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
1765   const unsigned Guaranteed = HipeLeafWords * SlotSize;
1766   unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
1767                             MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
1768   unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
1769 
1770   assert(STI.isTargetLinux() &&
1771          "HiPE prologue is only supported on Linux operating systems.");
1772 
1773   // Compute the largest caller's frame that is needed to fit the callees'
1774   // frames. This 'MaxStack' is computed from:
1775   //
1776   // a) the fixed frame size, which is the space needed for all spilled temps,
1777   // b) outgoing on-stack parameter areas, and
1778   // c) the minimum stack space this function needs to make available for the
1779   //    functions it calls (a tunable ABI property).
1780   if (MFI->hasCalls()) {
1781     unsigned MoreStackForCalls = 0;
1782 
1783     for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
1784          MBBI != MBBE; ++MBBI)
1785       for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
1786            MI != ME; ++MI) {
1787         if (!MI->isCall())
1788           continue;
1789 
1790         // Get callee operand.
1791         const MachineOperand &MO = MI->getOperand(0);
1792 
1793         // Only take account of global function calls (no closures etc.).
1794         if (!MO.isGlobal())
1795           continue;
1796 
1797         const Function *F = dyn_cast<Function>(MO.getGlobal());
1798         if (!F)
1799           continue;
1800 
1801         // Do not update 'MaxStack' for primitive and built-in functions
1802         // (encoded with names either starting with "erlang."/"bif_" or not
1803         // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
1804         // "_", such as the BIF "suspend_0") as they are executed on another
1805         // stack.
1806         if (F->getName().find("erlang.") != StringRef::npos ||
1807             F->getName().find("bif_") != StringRef::npos ||
1808             F->getName().find_first_of("._") == StringRef::npos)
1809           continue;
1810 
1811         unsigned CalleeStkArity =
1812           F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
1813         if (HipeLeafWords - 1 > CalleeStkArity)
1814           MoreStackForCalls = std::max(MoreStackForCalls,
1815                                (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
1816       }
1817     MaxStack += MoreStackForCalls;
1818   }
1819 
1820   // If the stack frame needed is larger than the guaranteed then runtime checks
1821   // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
1822   if (MaxStack > Guaranteed) {
1823     MachineBasicBlock &prologueMBB = MF.front();
1824     MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
1825     MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
1826 
1827     for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
1828            E = prologueMBB.livein_end(); I != E; I++) {
1829       stackCheckMBB->addLiveIn(*I);
1830       incStackMBB->addLiveIn(*I);
1831     }
1832 
1833     MF.push_front(incStackMBB);
1834     MF.push_front(stackCheckMBB);
1835 
1836     unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
1837     unsigned LEAop, CMPop, CALLop;
1838     if (Is64Bit) {
1839       SPReg = X86::RSP;
1840       PReg  = X86::RBP;
1841       LEAop = X86::LEA64r;
1842       CMPop = X86::CMP64rm;
1843       CALLop = X86::CALL64pcrel32;
1844       SPLimitOffset = 0x90;
1845     } else {
1846       SPReg = X86::ESP;
1847       PReg  = X86::EBP;
1848       LEAop = X86::LEA32r;
1849       CMPop = X86::CMP32rm;
1850       CALLop = X86::CALLpcrel32;
1851       SPLimitOffset = 0x4c;
1852     }
1853 
1854     ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
1855     assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
1856            "HiPE prologue scratch register is live-in");
1857 
1858     // Create new MBB for StackCheck:
1859     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
1860                  SPReg, false, -MaxStack);
1861     // SPLimitOffset is in a fixed heap location (pointed by BP).
1862     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
1863                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
1864     BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB);
1865 
1866     // Create new MBB for IncStack:
1867     BuildMI(incStackMBB, DL, TII.get(CALLop)).
1868       addExternalSymbol("inc_stack_0");
1869     addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
1870                  SPReg, false, -MaxStack);
1871     addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
1872                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
1873     BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
1874 
1875     stackCheckMBB->addSuccessor(&prologueMBB, 99);
1876     stackCheckMBB->addSuccessor(incStackMBB, 1);
1877     incStackMBB->addSuccessor(&prologueMBB, 99);
1878     incStackMBB->addSuccessor(incStackMBB, 1);
1879   }
1880 #ifdef XDEBUG
1881   MF.verify();
1882 #endif
1883 }
1884 
1885 bool X86FrameLowering::
convertArgMovsToPushes(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,uint64_t Amount) const1886 convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
1887                        MachineBasicBlock::iterator I, uint64_t Amount) const {
1888   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1889   const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
1890     MF.getSubtarget().getRegisterInfo());
1891   unsigned StackPtr = RegInfo.getStackRegister();
1892 
1893   // Scan the call setup sequence for the pattern we're looking for.
1894   // We only handle a simple case now - a sequence of MOV32mi or MOV32mr
1895   // instructions, that push a sequence of 32-bit values onto the stack, with
1896   // no gaps.
1897   std::map<int64_t, MachineBasicBlock::iterator> MovMap;
1898   do {
1899     int Opcode = I->getOpcode();
1900     if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
1901       break;
1902 
1903     // We only want movs of the form:
1904     // movl imm/r32, k(%ecx)
1905     // If we run into something else, bail
1906     // Note that AddrBaseReg may, counterintuitively, not be a register...
1907     if (!I->getOperand(X86::AddrBaseReg).isReg() ||
1908         (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
1909         !I->getOperand(X86::AddrScaleAmt).isImm() ||
1910         (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
1911         (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
1912         (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
1913         !I->getOperand(X86::AddrDisp).isImm())
1914       return false;
1915 
1916     int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
1917 
1918     // We don't want to consider the unaligned case.
1919     if (StackDisp % 4)
1920       return false;
1921 
1922     // If the same stack slot is being filled twice, something's fishy.
1923     if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
1924       return false;
1925 
1926     ++I;
1927   } while (I != MBB.end());
1928 
1929   // We now expect the end of the sequence - a call and a stack adjust.
1930   if (I == MBB.end())
1931     return false;
1932   if (!I->isCall())
1933     return false;
1934   MachineBasicBlock::iterator Call = I;
1935   if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
1936     return false;
1937 
1938   // Now, go through the map, and see that we don't have any gaps,
1939   // but only a series of 32-bit MOVs.
1940   // Since std::map provides ordered iteration, the original order
1941   // of the MOVs doesn't matter.
1942   int64_t ExpectedDist = 0;
1943   for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
1944        ++MMI, ExpectedDist += 4)
1945     if (MMI->first != ExpectedDist)
1946       return false;
1947 
1948   // Ok, everything looks fine. Do the transformation.
1949   DebugLoc DL = I->getDebugLoc();
1950 
1951   // It's possible the original stack adjustment amount was larger than
1952   // that done by the pushes. If so, we still need a SUB.
1953   Amount -= ExpectedDist;
1954   if (Amount) {
1955     MachineInstr* Sub = BuildMI(MBB, Call, DL,
1956                           TII.get(getSUBriOpcode(false, Amount)), StackPtr)
1957                   .addReg(StackPtr).addImm(Amount);
1958     Sub->getOperand(3).setIsDead();
1959   }
1960 
1961   // Now, iterate through the map in reverse order, and replace the movs
1962   // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
1963   for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
1964     MachineBasicBlock::iterator MOV = MMI->second;
1965     MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
1966 
1967     // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
1968     int PushOpcode = X86::PUSH32r;
1969     if (MOV->getOpcode() == X86::MOV32mi)
1970       PushOpcode = getPUSHiOpcode(false, PushOp);
1971 
1972     BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp);
1973     MBB.erase(MOV);
1974   }
1975 
1976   return true;
1977 }
1978 
1979 void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const1980 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
1981                               MachineBasicBlock::iterator I) const {
1982   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1983   const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
1984                                        MF.getSubtarget().getRegisterInfo());
1985   unsigned StackPtr = RegInfo.getStackRegister();
1986   bool reserveCallFrame = hasReservedCallFrame(MF);
1987   int Opcode = I->getOpcode();
1988   bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
1989   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
1990   bool IsLP64 = STI.isTarget64BitLP64();
1991   DebugLoc DL = I->getDebugLoc();
1992   uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
1993   uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
1994   I = MBB.erase(I);
1995 
1996   if (!reserveCallFrame) {
1997     // If the stack pointer can be changed after prologue, turn the
1998     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
1999     // adjcallstackdown instruction into 'add ESP, <amt>'
2000     if (Amount == 0)
2001       return;
2002 
2003     // We need to keep the stack aligned properly.  To do this, we round the
2004     // amount of space needed for the outgoing arguments up to the next
2005     // alignment boundary.
2006     unsigned StackAlign = MF.getTarget()
2007                               .getSubtargetImpl()
2008                               ->getFrameLowering()
2009                               ->getStackAlignment();
2010     Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
2011 
2012     MachineInstr *New = nullptr;
2013     if (Opcode == TII.getCallFrameSetupOpcode()) {
2014       // Try to convert movs to the stack into pushes.
2015       // We currently only look for a pattern that appears in 32-bit
2016       // calling conventions.
2017       if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
2018         return;
2019 
2020       New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
2021                     StackPtr)
2022         .addReg(StackPtr)
2023         .addImm(Amount);
2024     } else {
2025       assert(Opcode == TII.getCallFrameDestroyOpcode());
2026 
2027       // Factor out the amount the callee already popped.
2028       Amount -= CalleeAmt;
2029 
2030       if (Amount) {
2031         unsigned Opc = getADDriOpcode(IsLP64, Amount);
2032         New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
2033           .addReg(StackPtr).addImm(Amount);
2034       }
2035     }
2036 
2037     if (New) {
2038       // The EFLAGS implicit def is dead.
2039       New->getOperand(3).setIsDead();
2040 
2041       // Replace the pseudo instruction with a new instruction.
2042       MBB.insert(I, New);
2043     }
2044 
2045     return;
2046   }
2047 
2048   if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
2049     // If we are performing frame pointer elimination and if the callee pops
2050     // something off the stack pointer, add it back.  We do this until we have
2051     // more advanced stack pointer tracking ability.
2052     unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
2053     MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
2054       .addReg(StackPtr).addImm(CalleeAmt);
2055 
2056     // The EFLAGS implicit def is dead.
2057     New->getOperand(3).setIsDead();
2058 
2059     // We are not tracking the stack pointer adjustment by the callee, so make
2060     // sure we restore the stack pointer immediately after the call, there may
2061     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
2062     MachineBasicBlock::iterator B = MBB.begin();
2063     while (I != B && !std::prev(I)->isCall())
2064       --I;
2065     MBB.insert(I, New);
2066   }
2067 }
2068 
2069