14ceea774SAmara Emerson //===----- X86DynAllocaExpander.cpp - Expand DynAlloca pseudo instruction -===// 24ceea774SAmara Emerson // 34ceea774SAmara Emerson // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44ceea774SAmara Emerson // See https://llvm.org/LICENSE.txt for license information. 54ceea774SAmara Emerson // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64ceea774SAmara Emerson // 74ceea774SAmara Emerson //===----------------------------------------------------------------------===// 84ceea774SAmara Emerson // 94ceea774SAmara Emerson // This file defines a pass that expands DynAlloca pseudo-instructions. 104ceea774SAmara Emerson // 114ceea774SAmara Emerson // It performs a conservative analysis to determine whether each allocation 124ceea774SAmara Emerson // falls within a region of the stack that is safe to use, or whether stack 134ceea774SAmara Emerson // probes must be emitted. 144ceea774SAmara Emerson // 154ceea774SAmara Emerson //===----------------------------------------------------------------------===// 164ceea774SAmara Emerson 174ceea774SAmara Emerson #include "X86.h" 184ceea774SAmara Emerson #include "X86InstrInfo.h" 194ceea774SAmara Emerson #include "X86MachineFunctionInfo.h" 204ceea774SAmara Emerson #include "X86Subtarget.h" 214ceea774SAmara Emerson #include "llvm/ADT/MapVector.h" 224ceea774SAmara Emerson #include "llvm/ADT/PostOrderIterator.h" 234ceea774SAmara Emerson #include "llvm/CodeGen/MachineFunctionPass.h" 244ceea774SAmara Emerson #include "llvm/CodeGen/MachineInstrBuilder.h" 254ceea774SAmara Emerson #include "llvm/CodeGen/MachineRegisterInfo.h" 264ceea774SAmara Emerson #include "llvm/CodeGen/Passes.h" 274ceea774SAmara Emerson #include "llvm/CodeGen/TargetInstrInfo.h" 284ceea774SAmara Emerson #include "llvm/IR/Function.h" 294ceea774SAmara Emerson 304ceea774SAmara Emerson using namespace llvm; 314ceea774SAmara Emerson 324ceea774SAmara Emerson namespace { 334ceea774SAmara Emerson 344ceea774SAmara Emerson class X86DynAllocaExpander : public MachineFunctionPass { 354ceea774SAmara Emerson public: 364ceea774SAmara Emerson X86DynAllocaExpander() : MachineFunctionPass(ID) {} 374ceea774SAmara Emerson 384ceea774SAmara Emerson bool runOnMachineFunction(MachineFunction &MF) override; 394ceea774SAmara Emerson 404ceea774SAmara Emerson private: 414ceea774SAmara Emerson /// Strategies for lowering a DynAlloca. 424ceea774SAmara Emerson enum Lowering { TouchAndSub, Sub, Probe }; 434ceea774SAmara Emerson 444ceea774SAmara Emerson /// Deterministic-order map from DynAlloca instruction to desired lowering. 454ceea774SAmara Emerson typedef MapVector<MachineInstr*, Lowering> LoweringMap; 464ceea774SAmara Emerson 474ceea774SAmara Emerson /// Compute which lowering to use for each DynAlloca instruction. 484ceea774SAmara Emerson void computeLowerings(MachineFunction &MF, LoweringMap& Lowerings); 494ceea774SAmara Emerson 504ceea774SAmara Emerson /// Get the appropriate lowering based on current offset and amount. 514ceea774SAmara Emerson Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount); 524ceea774SAmara Emerson 534ceea774SAmara Emerson /// Lower a DynAlloca instruction. 544ceea774SAmara Emerson void lower(MachineInstr* MI, Lowering L); 554ceea774SAmara Emerson 564ceea774SAmara Emerson MachineRegisterInfo *MRI = nullptr; 574ceea774SAmara Emerson const X86Subtarget *STI = nullptr; 584ceea774SAmara Emerson const TargetInstrInfo *TII = nullptr; 594ceea774SAmara Emerson const X86RegisterInfo *TRI = nullptr; 604ceea774SAmara Emerson unsigned StackPtr = 0; 614ceea774SAmara Emerson unsigned SlotSize = 0; 624ceea774SAmara Emerson int64_t StackProbeSize = 0; 634ceea774SAmara Emerson bool NoStackArgProbe = false; 644ceea774SAmara Emerson 654ceea774SAmara Emerson StringRef getPassName() const override { return "X86 DynAlloca Expander"; } 66*49aa2550SCraig Topper 67*49aa2550SCraig Topper public: 684ceea774SAmara Emerson static char ID; 694ceea774SAmara Emerson }; 704ceea774SAmara Emerson 714ceea774SAmara Emerson char X86DynAllocaExpander::ID = 0; 724ceea774SAmara Emerson 734ceea774SAmara Emerson } // end anonymous namespace 744ceea774SAmara Emerson 75*49aa2550SCraig Topper INITIALIZE_PASS(X86DynAllocaExpander, "x86-dyn-alloca-expander", 76*49aa2550SCraig Topper "X86 DynAlloca Expander", false, false) 77*49aa2550SCraig Topper 784ceea774SAmara Emerson FunctionPass *llvm::createX86DynAllocaExpander() { 794ceea774SAmara Emerson return new X86DynAllocaExpander(); 804ceea774SAmara Emerson } 814ceea774SAmara Emerson 824ceea774SAmara Emerson /// Return the allocation amount for a DynAlloca instruction, or -1 if unknown. 834ceea774SAmara Emerson static int64_t getDynAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { 844ceea774SAmara Emerson assert(MI->getOpcode() == X86::DYN_ALLOCA_32 || 854ceea774SAmara Emerson MI->getOpcode() == X86::DYN_ALLOCA_64); 864ceea774SAmara Emerson assert(MI->getOperand(0).isReg()); 874ceea774SAmara Emerson 884ceea774SAmara Emerson Register AmountReg = MI->getOperand(0).getReg(); 894ceea774SAmara Emerson MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg); 904ceea774SAmara Emerson 914ceea774SAmara Emerson if (!Def || 924ceea774SAmara Emerson (Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) || 934ceea774SAmara Emerson !Def->getOperand(1).isImm()) 944ceea774SAmara Emerson return -1; 954ceea774SAmara Emerson 964ceea774SAmara Emerson return Def->getOperand(1).getImm(); 974ceea774SAmara Emerson } 984ceea774SAmara Emerson 994ceea774SAmara Emerson X86DynAllocaExpander::Lowering 1004ceea774SAmara Emerson X86DynAllocaExpander::getLowering(int64_t CurrentOffset, 1014ceea774SAmara Emerson int64_t AllocaAmount) { 1024ceea774SAmara Emerson // For a non-constant amount or a large amount, we have to probe. 1034ceea774SAmara Emerson if (AllocaAmount < 0 || AllocaAmount > StackProbeSize) 1044ceea774SAmara Emerson return Probe; 1054ceea774SAmara Emerson 1064ceea774SAmara Emerson // If it fits within the safe region of the stack, just subtract. 1074ceea774SAmara Emerson if (CurrentOffset + AllocaAmount <= StackProbeSize) 1084ceea774SAmara Emerson return Sub; 1094ceea774SAmara Emerson 1104ceea774SAmara Emerson // Otherwise, touch the current tip of the stack, then subtract. 1114ceea774SAmara Emerson return TouchAndSub; 1124ceea774SAmara Emerson } 1134ceea774SAmara Emerson 1144ceea774SAmara Emerson static bool isPushPop(const MachineInstr &MI) { 1154ceea774SAmara Emerson switch (MI.getOpcode()) { 1164ceea774SAmara Emerson case X86::PUSH32r: 1174ceea774SAmara Emerson case X86::PUSH32rmm: 1184ceea774SAmara Emerson case X86::PUSH32rmr: 11989ca4eb0SShengchen Kan case X86::PUSH32i: 1204ceea774SAmara Emerson case X86::PUSH64r: 1214ceea774SAmara Emerson case X86::PUSH64rmm: 1224ceea774SAmara Emerson case X86::PUSH64rmr: 1234ceea774SAmara Emerson case X86::PUSH64i32: 1244ceea774SAmara Emerson case X86::POP32r: 1254ceea774SAmara Emerson case X86::POP64r: 1264ceea774SAmara Emerson return true; 1274ceea774SAmara Emerson default: 1284ceea774SAmara Emerson return false; 1294ceea774SAmara Emerson } 1304ceea774SAmara Emerson } 1314ceea774SAmara Emerson 1324ceea774SAmara Emerson void X86DynAllocaExpander::computeLowerings(MachineFunction &MF, 1334ceea774SAmara Emerson LoweringMap &Lowerings) { 1344ceea774SAmara Emerson // Do a one-pass reverse post-order walk of the CFG to conservatively estimate 1354ceea774SAmara Emerson // the offset between the stack pointer and the lowest touched part of the 1364ceea774SAmara Emerson // stack, and use that to decide how to lower each DynAlloca instruction. 1374ceea774SAmara Emerson 1384ceea774SAmara Emerson // Initialize OutOffset[B], the stack offset at exit from B, to something big. 1394ceea774SAmara Emerson DenseMap<MachineBasicBlock *, int64_t> OutOffset; 1404ceea774SAmara Emerson for (MachineBasicBlock &MBB : MF) 1414ceea774SAmara Emerson OutOffset[&MBB] = INT32_MAX; 1424ceea774SAmara Emerson 1434ceea774SAmara Emerson // Note: we don't know the offset at the start of the entry block since the 1444ceea774SAmara Emerson // prologue hasn't been inserted yet, and how much that will adjust the stack 1454ceea774SAmara Emerson // pointer depends on register spills, which have not been computed yet. 1464ceea774SAmara Emerson 1474ceea774SAmara Emerson // Compute the reverse post-order. 1484ceea774SAmara Emerson ReversePostOrderTraversal<MachineFunction*> RPO(&MF); 1494ceea774SAmara Emerson 1504ceea774SAmara Emerson for (MachineBasicBlock *MBB : RPO) { 1514ceea774SAmara Emerson int64_t Offset = -1; 1524ceea774SAmara Emerson for (MachineBasicBlock *Pred : MBB->predecessors()) 1534ceea774SAmara Emerson Offset = std::max(Offset, OutOffset[Pred]); 1544ceea774SAmara Emerson if (Offset == -1) Offset = INT32_MAX; 1554ceea774SAmara Emerson 1564ceea774SAmara Emerson for (MachineInstr &MI : *MBB) { 1574ceea774SAmara Emerson if (MI.getOpcode() == X86::DYN_ALLOCA_32 || 1584ceea774SAmara Emerson MI.getOpcode() == X86::DYN_ALLOCA_64) { 1594ceea774SAmara Emerson // A DynAlloca moves StackPtr, and potentially touches it. 1604ceea774SAmara Emerson int64_t Amount = getDynAllocaAmount(&MI, MRI); 1614ceea774SAmara Emerson Lowering L = getLowering(Offset, Amount); 1624ceea774SAmara Emerson Lowerings[&MI] = L; 1634ceea774SAmara Emerson switch (L) { 1644ceea774SAmara Emerson case Sub: 1654ceea774SAmara Emerson Offset += Amount; 1664ceea774SAmara Emerson break; 1674ceea774SAmara Emerson case TouchAndSub: 1684ceea774SAmara Emerson Offset = Amount; 1694ceea774SAmara Emerson break; 1704ceea774SAmara Emerson case Probe: 1714ceea774SAmara Emerson Offset = 0; 1724ceea774SAmara Emerson break; 1734ceea774SAmara Emerson } 1744ceea774SAmara Emerson } else if (MI.isCall() || isPushPop(MI)) { 1754ceea774SAmara Emerson // Calls, pushes and pops touch the tip of the stack. 1764ceea774SAmara Emerson Offset = 0; 1774ceea774SAmara Emerson } else if (MI.getOpcode() == X86::ADJCALLSTACKUP32 || 1784ceea774SAmara Emerson MI.getOpcode() == X86::ADJCALLSTACKUP64) { 1794ceea774SAmara Emerson Offset -= MI.getOperand(0).getImm(); 1804ceea774SAmara Emerson } else if (MI.getOpcode() == X86::ADJCALLSTACKDOWN32 || 1814ceea774SAmara Emerson MI.getOpcode() == X86::ADJCALLSTACKDOWN64) { 1824ceea774SAmara Emerson Offset += MI.getOperand(0).getImm(); 1834ceea774SAmara Emerson } else if (MI.modifiesRegister(StackPtr, TRI)) { 1844ceea774SAmara Emerson // Any other modification of SP means we've lost track of it. 1854ceea774SAmara Emerson Offset = INT32_MAX; 1864ceea774SAmara Emerson } 1874ceea774SAmara Emerson } 1884ceea774SAmara Emerson 1894ceea774SAmara Emerson OutOffset[MBB] = Offset; 1904ceea774SAmara Emerson } 1914ceea774SAmara Emerson } 1924ceea774SAmara Emerson 193c81a121fSShengchen Kan static unsigned getSubOpcode(bool Is64Bit) { 1944ceea774SAmara Emerson if (Is64Bit) 195c81a121fSShengchen Kan return X86::SUB64ri32; 196c81a121fSShengchen Kan return X86::SUB32ri; 1974ceea774SAmara Emerson } 1984ceea774SAmara Emerson 1994ceea774SAmara Emerson void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) { 2004ceea774SAmara Emerson const DebugLoc &DL = MI->getDebugLoc(); 2014ceea774SAmara Emerson MachineBasicBlock *MBB = MI->getParent(); 2024ceea774SAmara Emerson MachineBasicBlock::iterator I = *MI; 2034ceea774SAmara Emerson 2044ceea774SAmara Emerson int64_t Amount = getDynAllocaAmount(MI, MRI); 2054ceea774SAmara Emerson if (Amount == 0) { 2064ceea774SAmara Emerson MI->eraseFromParent(); 2074ceea774SAmara Emerson return; 2084ceea774SAmara Emerson } 2094ceea774SAmara Emerson 2104ceea774SAmara Emerson // These two variables differ on x32, which is a 64-bit target with a 2114ceea774SAmara Emerson // 32-bit alloca. 2124ceea774SAmara Emerson bool Is64Bit = STI->is64Bit(); 2134ceea774SAmara Emerson bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64; 2144ceea774SAmara Emerson assert(SlotSize == 4 || SlotSize == 8); 2154ceea774SAmara Emerson 216b0df7040SFangrui Song std::optional<MachineFunction::DebugInstrOperandPair> InstrNum; 2177093c810SJeremy Morse if (unsigned Num = MI->peekDebugInstrNum()) { 2187093c810SJeremy Morse // Operand 2 of DYN_ALLOCAs contains the stack def. 2197093c810SJeremy Morse InstrNum = {Num, 2}; 2207093c810SJeremy Morse } 2217093c810SJeremy Morse 2224ceea774SAmara Emerson switch (L) { 2234ceea774SAmara Emerson case TouchAndSub: { 2244ceea774SAmara Emerson assert(Amount >= SlotSize); 2254ceea774SAmara Emerson 2264ceea774SAmara Emerson // Use a push to touch the top of the stack. 2274ceea774SAmara Emerson unsigned RegA = Is64Bit ? X86::RAX : X86::EAX; 2284ceea774SAmara Emerson BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 2294ceea774SAmara Emerson .addReg(RegA, RegState::Undef); 2304ceea774SAmara Emerson Amount -= SlotSize; 2314ceea774SAmara Emerson if (!Amount) 2324ceea774SAmara Emerson break; 2334ceea774SAmara Emerson 2344ceea774SAmara Emerson // Fall through to make any remaining adjustment. 235de9d80c1SFangrui Song [[fallthrough]]; 2364ceea774SAmara Emerson } 2374ceea774SAmara Emerson case Sub: 2384ceea774SAmara Emerson assert(Amount > 0); 2394ceea774SAmara Emerson if (Amount == SlotSize) { 2404ceea774SAmara Emerson // Use push to save size. 2414ceea774SAmara Emerson unsigned RegA = Is64Bit ? X86::RAX : X86::EAX; 2424ceea774SAmara Emerson BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 2434ceea774SAmara Emerson .addReg(RegA, RegState::Undef); 2444ceea774SAmara Emerson } else { 2454ceea774SAmara Emerson // Sub. 246c81a121fSShengchen Kan BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64BitAlloca)), StackPtr) 2474ceea774SAmara Emerson .addReg(StackPtr) 2484ceea774SAmara Emerson .addImm(Amount); 2494ceea774SAmara Emerson } 2504ceea774SAmara Emerson break; 2514ceea774SAmara Emerson case Probe: 2524ceea774SAmara Emerson if (!NoStackArgProbe) { 2534ceea774SAmara Emerson // The probe lowering expects the amount in RAX/EAX. 2544ceea774SAmara Emerson unsigned RegA = Is64BitAlloca ? X86::RAX : X86::EAX; 2554ceea774SAmara Emerson BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), RegA) 2564ceea774SAmara Emerson .addReg(MI->getOperand(0).getReg()); 2574ceea774SAmara Emerson 2584ceea774SAmara Emerson // Do the probe. 2594ceea774SAmara Emerson STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL, 2607093c810SJeremy Morse /*InProlog=*/false, InstrNum); 2614ceea774SAmara Emerson } else { 2624ceea774SAmara Emerson // Sub 2634ceea774SAmara Emerson BuildMI(*MBB, I, DL, 2644ceea774SAmara Emerson TII->get(Is64BitAlloca ? X86::SUB64rr : X86::SUB32rr), StackPtr) 2654ceea774SAmara Emerson .addReg(StackPtr) 2664ceea774SAmara Emerson .addReg(MI->getOperand(0).getReg()); 2674ceea774SAmara Emerson } 2684ceea774SAmara Emerson break; 2694ceea774SAmara Emerson } 2704ceea774SAmara Emerson 2714ceea774SAmara Emerson Register AmountReg = MI->getOperand(0).getReg(); 2724ceea774SAmara Emerson MI->eraseFromParent(); 2734ceea774SAmara Emerson 2744ceea774SAmara Emerson // Delete the definition of AmountReg. 2754ceea774SAmara Emerson if (MRI->use_empty(AmountReg)) 2764ceea774SAmara Emerson if (MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg)) 2774ceea774SAmara Emerson AmountDef->eraseFromParent(); 2784ceea774SAmara Emerson } 2794ceea774SAmara Emerson 2804ceea774SAmara Emerson bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction &MF) { 2814ceea774SAmara Emerson if (!MF.getInfo<X86MachineFunctionInfo>()->hasDynAlloca()) 2824ceea774SAmara Emerson return false; 2834ceea774SAmara Emerson 2844ceea774SAmara Emerson MRI = &MF.getRegInfo(); 2854ceea774SAmara Emerson STI = &MF.getSubtarget<X86Subtarget>(); 2864ceea774SAmara Emerson TII = STI->getInstrInfo(); 2874ceea774SAmara Emerson TRI = STI->getRegisterInfo(); 2884ceea774SAmara Emerson StackPtr = TRI->getStackRegister(); 2894ceea774SAmara Emerson SlotSize = TRI->getSlotSize(); 290c16a58b3SMatt Arsenault StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF); 2914ceea774SAmara Emerson NoStackArgProbe = MF.getFunction().hasFnAttribute("no-stack-arg-probe"); 2924ceea774SAmara Emerson if (NoStackArgProbe) 2934ceea774SAmara Emerson StackProbeSize = INT64_MAX; 2944ceea774SAmara Emerson 2954ceea774SAmara Emerson LoweringMap Lowerings; 2964ceea774SAmara Emerson computeLowerings(MF, Lowerings); 2974ceea774SAmara Emerson for (auto &P : Lowerings) 2984ceea774SAmara Emerson lower(P.first, P.second); 2994ceea774SAmara Emerson 3004ceea774SAmara Emerson return true; 3014ceea774SAmara Emerson } 302