10b57cec5SDimitry Andric //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains the AArch64 implementation of TargetFrameLowering class. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // On AArch64, stack frames are structured as follows: 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // The stack grows downward. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric // All of the individual frame areas on the frame below are optional, i.e. it's 160b57cec5SDimitry Andric // possible to create a function so that the particular area isn't present 170b57cec5SDimitry Andric // in the frame. 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // At function entry, the "frame" looks as follows: 200b57cec5SDimitry Andric // 210b57cec5SDimitry Andric // | | Higher address 220b57cec5SDimitry Andric // |-----------------------------------| 230b57cec5SDimitry Andric // | | 240b57cec5SDimitry Andric // | arguments passed on the stack | 250b57cec5SDimitry Andric // | | 260b57cec5SDimitry Andric // |-----------------------------------| <- sp 270b57cec5SDimitry Andric // | | Lower address 280b57cec5SDimitry Andric // 290b57cec5SDimitry Andric // 300b57cec5SDimitry Andric // After the prologue has run, the frame has the following general structure. 310b57cec5SDimitry Andric // Note that this doesn't depict the case where a red-zone is used. Also, 320b57cec5SDimitry Andric // technically the last frame area (VLAs) doesn't get created until in the 330b57cec5SDimitry Andric // main function body, after the prologue is run. However, it's depicted here 340b57cec5SDimitry Andric // for completeness. 350b57cec5SDimitry Andric // 360b57cec5SDimitry Andric // | | Higher address 370b57cec5SDimitry Andric // |-----------------------------------| 380b57cec5SDimitry Andric // | | 390b57cec5SDimitry Andric // | arguments passed on the stack | 400b57cec5SDimitry Andric // | | 410b57cec5SDimitry Andric // |-----------------------------------| 420b57cec5SDimitry Andric // | | 430b57cec5SDimitry Andric // | (Win64 only) varargs from reg | 440b57cec5SDimitry Andric // | | 450b57cec5SDimitry Andric // |-----------------------------------| 460b57cec5SDimitry Andric // | | 478bcb0991SDimitry Andric // | callee-saved gpr registers | <--. 488bcb0991SDimitry Andric // | | | On Darwin platforms these 498bcb0991SDimitry Andric // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped, 50fe6060f1SDimitry Andric // | prev_lr | | (frame record first) 51fe6060f1SDimitry Andric // | prev_fp | <--' 52fe6060f1SDimitry Andric // | async context if needed | 530b57cec5SDimitry Andric // | (a.k.a. "frame record") | 540b57cec5SDimitry Andric // |-----------------------------------| <- fp(=x29) 550fca6ea1SDimitry Andric // | <hazard padding> | 560fca6ea1SDimitry Andric // |-----------------------------------| 570b57cec5SDimitry Andric // | | 588bcb0991SDimitry Andric // | callee-saved fp/simd/SVE regs | 598bcb0991SDimitry Andric // | | 608bcb0991SDimitry Andric // |-----------------------------------| 618bcb0991SDimitry Andric // | | 628bcb0991SDimitry Andric // | SVE stack objects | 630b57cec5SDimitry Andric // | | 640b57cec5SDimitry Andric // |-----------------------------------| 650b57cec5SDimitry Andric // |.empty.space.to.make.part.below....| 660b57cec5SDimitry Andric // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 670b57cec5SDimitry Andric // |.the.standard.16-byte.alignment....| compile time; if present) 680b57cec5SDimitry Andric // |-----------------------------------| 690b57cec5SDimitry Andric // | local variables of fixed size | 700b57cec5SDimitry Andric // | including spill slots | 710fca6ea1SDimitry Andric // | <FPR> | 720fca6ea1SDimitry Andric // | <hazard padding> | 730fca6ea1SDimitry Andric // | <GPR> | 740b57cec5SDimitry Andric // |-----------------------------------| <- bp(not defined by ABI, 750b57cec5SDimitry Andric // |.variable-sized.local.variables....| LLVM chooses X19) 760b57cec5SDimitry Andric // |.(VLAs)............................| (size of this area is unknown at 770b57cec5SDimitry Andric // |...................................| compile time) 780b57cec5SDimitry Andric // |-----------------------------------| <- sp 790b57cec5SDimitry Andric // | | Lower address 800b57cec5SDimitry Andric // 810b57cec5SDimitry Andric // 820b57cec5SDimitry Andric // To access the data in a frame, at-compile time, a constant offset must be 830b57cec5SDimitry Andric // computable from one of the pointers (fp, bp, sp) to access it. The size 840b57cec5SDimitry Andric // of the areas with a dotted background cannot be computed at compile-time 850b57cec5SDimitry Andric // if they are present, making it required to have all three of fp, bp and 860b57cec5SDimitry Andric // sp to be set up to be able to access all contents in the frame areas, 870b57cec5SDimitry Andric // assuming all of the frame areas are non-empty. 880b57cec5SDimitry Andric // 890b57cec5SDimitry Andric // For most functions, some of the frame areas are empty. For those functions, 900b57cec5SDimitry Andric // it may not be necessary to set up fp or bp: 910b57cec5SDimitry Andric // * A base pointer is definitely needed when there are both VLAs and local 920b57cec5SDimitry Andric // variables with more-than-default alignment requirements. 930b57cec5SDimitry Andric // * A frame pointer is definitely needed when there are local variables with 940b57cec5SDimitry Andric // more-than-default alignment requirements. 950b57cec5SDimitry Andric // 968bcb0991SDimitry Andric // For Darwin platforms the frame-record (fp, lr) is stored at the top of the 978bcb0991SDimitry Andric // callee-saved area, since the unwind encoding does not allow for encoding 988bcb0991SDimitry Andric // this dynamically and existing tools depend on this layout. For other 998bcb0991SDimitry Andric // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved 1008bcb0991SDimitry Andric // area to allow SVE stack objects (allocated directly below the callee-saves, 1018bcb0991SDimitry Andric // if available) to be accessed directly from the framepointer. 1028bcb0991SDimitry Andric // The SVE spill/fill instructions have VL-scaled addressing modes such 1038bcb0991SDimitry Andric // as: 1048bcb0991SDimitry Andric // ldr z8, [fp, #-7 mul vl] 1058bcb0991SDimitry Andric // For SVE the size of the vector length (VL) is not known at compile-time, so 1068bcb0991SDimitry Andric // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this 1078bcb0991SDimitry Andric // layout, we don't need to add an unscaled offset to the framepointer before 1088bcb0991SDimitry Andric // accessing the SVE object in the frame. 1098bcb0991SDimitry Andric // 1100b57cec5SDimitry Andric // In some cases when a base pointer is not strictly needed, it is generated 1110b57cec5SDimitry Andric // anyway when offsets from the frame pointer to access local variables become 1120b57cec5SDimitry Andric // so large that the offset can't be encoded in the immediate fields of loads 1130b57cec5SDimitry Andric // or stores. 1140b57cec5SDimitry Andric // 115fe6060f1SDimitry Andric // Outgoing function arguments must be at the bottom of the stack frame when 116fe6060f1SDimitry Andric // calling another function. If we do not have variable-sized stack objects, we 117fe6060f1SDimitry Andric // can allocate a "reserved call frame" area at the bottom of the local 118fe6060f1SDimitry Andric // variable area, large enough for all outgoing calls. If we do have VLAs, then 119fe6060f1SDimitry Andric // the stack pointer must be decremented and incremented around each call to 120fe6060f1SDimitry Andric // make space for the arguments below the VLAs. 121fe6060f1SDimitry Andric // 1220b57cec5SDimitry Andric // FIXME: also explain the redzone concept. 1230b57cec5SDimitry Andric // 1240fca6ea1SDimitry Andric // About stack hazards: Under some SME contexts, a coprocessor with its own 1250fca6ea1SDimitry Andric // separate cache can used for FP operations. This can create hazards if the CPU 1260fca6ea1SDimitry Andric // and the SME unit try to access the same area of memory, including if the 1270fca6ea1SDimitry Andric // access is to an area of the stack. To try to alleviate this we attempt to 1280fca6ea1SDimitry Andric // introduce extra padding into the stack frame between FP and GPR accesses, 1290fca6ea1SDimitry Andric // controlled by the StackHazardSize option. Without changing the layout of the 1300fca6ea1SDimitry Andric // stack frame in the diagram above, a stack object of size StackHazardSize is 1310fca6ea1SDimitry Andric // added between GPR and FPR CSRs. Another is added to the stack objects 1320fca6ea1SDimitry Andric // section, and stack objects are sorted so that FPR > Hazard padding slot > 1330fca6ea1SDimitry Andric // GPRs (where possible). Unfortunately some things are not handled well (VLA 1340fca6ea1SDimitry Andric // area, arguments on the stack, object with both GPR and FPR accesses), but if 1350fca6ea1SDimitry Andric // those are controlled by the user then the entire stack frame becomes GPR at 1360fca6ea1SDimitry Andric // the start/end with FPR in the middle, surrounded by Hazard padding. 1370fca6ea1SDimitry Andric // 13881ad6265SDimitry Andric // An example of the prologue: 13981ad6265SDimitry Andric // 14081ad6265SDimitry Andric // .globl __foo 14181ad6265SDimitry Andric // .align 2 14281ad6265SDimitry Andric // __foo: 14381ad6265SDimitry Andric // Ltmp0: 14481ad6265SDimitry Andric // .cfi_startproc 14581ad6265SDimitry Andric // .cfi_personality 155, ___gxx_personality_v0 14681ad6265SDimitry Andric // Leh_func_begin: 14781ad6265SDimitry Andric // .cfi_lsda 16, Lexception33 14881ad6265SDimitry Andric // 14981ad6265SDimitry Andric // stp xa,bx, [sp, -#offset]! 15081ad6265SDimitry Andric // ... 15181ad6265SDimitry Andric // stp x28, x27, [sp, #offset-32] 15281ad6265SDimitry Andric // stp fp, lr, [sp, #offset-16] 15381ad6265SDimitry Andric // add fp, sp, #offset - 16 15481ad6265SDimitry Andric // sub sp, sp, #1360 15581ad6265SDimitry Andric // 15681ad6265SDimitry Andric // The Stack: 15781ad6265SDimitry Andric // +-------------------------------------------+ 15881ad6265SDimitry Andric // 10000 | ........ | ........ | ........ | ........ | 15981ad6265SDimitry Andric // 10004 | ........ | ........ | ........ | ........ | 16081ad6265SDimitry Andric // +-------------------------------------------+ 16181ad6265SDimitry Andric // 10008 | ........ | ........ | ........ | ........ | 16281ad6265SDimitry Andric // 1000c | ........ | ........ | ........ | ........ | 16381ad6265SDimitry Andric // +===========================================+ 16481ad6265SDimitry Andric // 10010 | X28 Register | 16581ad6265SDimitry Andric // 10014 | X28 Register | 16681ad6265SDimitry Andric // +-------------------------------------------+ 16781ad6265SDimitry Andric // 10018 | X27 Register | 16881ad6265SDimitry Andric // 1001c | X27 Register | 16981ad6265SDimitry Andric // +===========================================+ 17081ad6265SDimitry Andric // 10020 | Frame Pointer | 17181ad6265SDimitry Andric // 10024 | Frame Pointer | 17281ad6265SDimitry Andric // +-------------------------------------------+ 17381ad6265SDimitry Andric // 10028 | Link Register | 17481ad6265SDimitry Andric // 1002c | Link Register | 17581ad6265SDimitry Andric // +===========================================+ 17681ad6265SDimitry Andric // 10030 | ........ | ........ | ........ | ........ | 17781ad6265SDimitry Andric // 10034 | ........ | ........ | ........ | ........ | 17881ad6265SDimitry Andric // +-------------------------------------------+ 17981ad6265SDimitry Andric // 10038 | ........ | ........ | ........ | ........ | 18081ad6265SDimitry Andric // 1003c | ........ | ........ | ........ | ........ | 18181ad6265SDimitry Andric // +-------------------------------------------+ 18281ad6265SDimitry Andric // 18381ad6265SDimitry Andric // [sp] = 10030 :: >>initial value<< 18481ad6265SDimitry Andric // sp = 10020 :: stp fp, lr, [sp, #-16]! 18581ad6265SDimitry Andric // fp = sp == 10020 :: mov fp, sp 18681ad6265SDimitry Andric // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 18781ad6265SDimitry Andric // sp == 10010 :: >>final value<< 18881ad6265SDimitry Andric // 18981ad6265SDimitry Andric // The frame pointer (w29) points to address 10020. If we use an offset of 19081ad6265SDimitry Andric // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 19181ad6265SDimitry Andric // for w27, and -32 for w28: 19281ad6265SDimitry Andric // 19381ad6265SDimitry Andric // Ltmp1: 19481ad6265SDimitry Andric // .cfi_def_cfa w29, 16 19581ad6265SDimitry Andric // Ltmp2: 19681ad6265SDimitry Andric // .cfi_offset w30, -8 19781ad6265SDimitry Andric // Ltmp3: 19881ad6265SDimitry Andric // .cfi_offset w29, -16 19981ad6265SDimitry Andric // Ltmp4: 20081ad6265SDimitry Andric // .cfi_offset w27, -24 20181ad6265SDimitry Andric // Ltmp5: 20281ad6265SDimitry Andric // .cfi_offset w28, -32 20381ad6265SDimitry Andric // 2040b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric #include "AArch64FrameLowering.h" 2070b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 2080b57cec5SDimitry Andric #include "AArch64MachineFunctionInfo.h" 2090b57cec5SDimitry Andric #include "AArch64RegisterInfo.h" 2100b57cec5SDimitry Andric #include "AArch64Subtarget.h" 2110b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 2120b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 21381ad6265SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h" 2140b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h" 2150b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 2160b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 2170fca6ea1SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 2180b57cec5SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h" 2190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 2200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 2210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 2220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 2230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 2240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 2250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 2260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 2270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 2280b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h" 2290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 2300b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 2310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 2320b57cec5SDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h" 2330b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 2340b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h" 2350b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 2360b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 2370b57cec5SDimitry Andric #include "llvm/IR/Function.h" 2380b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h" 2390b57cec5SDimitry Andric #include "llvm/MC/MCDwarf.h" 2400b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 2410b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 2420b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 24362987288SDimitry Andric #include "llvm/Support/FormatVariadic.h" 2440b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 2450b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 2460b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 2470b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 2480b57cec5SDimitry Andric #include <cassert> 2490b57cec5SDimitry Andric #include <cstdint> 2500b57cec5SDimitry Andric #include <iterator> 251bdd1243dSDimitry Andric #include <optional> 2520b57cec5SDimitry Andric #include <vector> 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric using namespace llvm; 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info" 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric static cl::opt<bool> EnableRedZone("aarch64-redzone", 2590b57cec5SDimitry Andric cl::desc("enable use of redzone on AArch64"), 2600b57cec5SDimitry Andric cl::init(false), cl::Hidden); 2610b57cec5SDimitry Andric 2625ffd83dbSDimitry Andric static cl::opt<bool> StackTaggingMergeSetTag( 2635ffd83dbSDimitry Andric "stack-tagging-merge-settag", 2645ffd83dbSDimitry Andric cl::desc("merge settag instruction in function epilog"), cl::init(true), 2655ffd83dbSDimitry Andric cl::Hidden); 2665ffd83dbSDimitry Andric 267e8d8bef9SDimitry Andric static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects", 268e8d8bef9SDimitry Andric cl::desc("sort stack allocations"), 269e8d8bef9SDimitry Andric cl::init(true), cl::Hidden); 270e8d8bef9SDimitry Andric 271fe6060f1SDimitry Andric cl::opt<bool> EnableHomogeneousPrologEpilog( 27281ad6265SDimitry Andric "homogeneous-prolog-epilog", cl::Hidden, 273fe6060f1SDimitry Andric cl::desc("Emit homogeneous prologue and epilogue for the size " 274fe6060f1SDimitry Andric "optimization (default = off)")); 275fe6060f1SDimitry Andric 2760fca6ea1SDimitry Andric // Stack hazard padding size. 0 = disabled. 2770fca6ea1SDimitry Andric static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size", 2780fca6ea1SDimitry Andric cl::init(0), cl::Hidden); 27962987288SDimitry Andric // Stack hazard size for analysis remarks. StackHazardSize takes precedence. 28062987288SDimitry Andric static cl::opt<unsigned> 28162987288SDimitry Andric StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), 28262987288SDimitry Andric cl::Hidden); 2830fca6ea1SDimitry Andric // Whether to insert padding into non-streaming functions (for testing). 2840fca6ea1SDimitry Andric static cl::opt<bool> 2850fca6ea1SDimitry Andric StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", 2860fca6ea1SDimitry Andric cl::init(false), cl::Hidden); 2870fca6ea1SDimitry Andric 2880b57cec5SDimitry Andric STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 2890b57cec5SDimitry Andric 290fe6060f1SDimitry Andric /// Returns how much of the incoming argument stack area (in bytes) we should 291fe6060f1SDimitry Andric /// clean up in an epilogue. For the C calling convention this will be 0, for 292fe6060f1SDimitry Andric /// guaranteed tail call conventions it can be positive (a normal return or a 293fe6060f1SDimitry Andric /// tail call to a function that uses less stack space for arguments) or 294fe6060f1SDimitry Andric /// negative (for a tail call to a function that needs more stack space than us 295fe6060f1SDimitry Andric /// for arguments). 296fe6060f1SDimitry Andric static int64_t getArgumentStackToRestore(MachineFunction &MF, 2975ffd83dbSDimitry Andric MachineBasicBlock &MBB) { 2985ffd83dbSDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 2995ffd83dbSDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 3005f757f3fSDimitry Andric bool IsTailCallReturn = (MBB.end() != MBBI) 3015f757f3fSDimitry Andric ? AArch64InstrInfo::isTailCallReturnInst(*MBBI) 3025f757f3fSDimitry Andric : false; 3035ffd83dbSDimitry Andric 304fe6060f1SDimitry Andric int64_t ArgumentPopSize = 0; 3055ffd83dbSDimitry Andric if (IsTailCallReturn) { 3065ffd83dbSDimitry Andric MachineOperand &StackAdjust = MBBI->getOperand(1); 3075ffd83dbSDimitry Andric 3085ffd83dbSDimitry Andric // For a tail-call in a callee-pops-arguments environment, some or all of 3095ffd83dbSDimitry Andric // the stack may actually be in use for the call's arguments, this is 3105ffd83dbSDimitry Andric // calculated during LowerCall and consumed here... 3115ffd83dbSDimitry Andric ArgumentPopSize = StackAdjust.getImm(); 3125ffd83dbSDimitry Andric } else { 3135ffd83dbSDimitry Andric // ... otherwise the amount to pop is *all* of the argument space, 3145ffd83dbSDimitry Andric // conveniently stored in the MachineFunctionInfo by 3155ffd83dbSDimitry Andric // LowerFormalArguments. This will, of course, be zero for the C calling 3165ffd83dbSDimitry Andric // convention. 3175ffd83dbSDimitry Andric ArgumentPopSize = AFI->getArgumentStackToRestore(); 3185ffd83dbSDimitry Andric } 3195ffd83dbSDimitry Andric 3205ffd83dbSDimitry Andric return ArgumentPopSize; 3215ffd83dbSDimitry Andric } 3225ffd83dbSDimitry Andric 323fe6060f1SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF); 324fe6060f1SDimitry Andric static bool needsWinCFI(const MachineFunction &MF); 325fe6060f1SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF); 3260fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); 327fe6060f1SDimitry Andric 328fe6060f1SDimitry Andric /// Returns true if a homogeneous prolog or epilog code can be emitted 329fe6060f1SDimitry Andric /// for the size optimization. If possible, a frame helper call is injected. 330fe6060f1SDimitry Andric /// When Exit block is given, this check is for epilog. 331fe6060f1SDimitry Andric bool AArch64FrameLowering::homogeneousPrologEpilog( 332fe6060f1SDimitry Andric MachineFunction &MF, MachineBasicBlock *Exit) const { 333fe6060f1SDimitry Andric if (!MF.getFunction().hasMinSize()) 334fe6060f1SDimitry Andric return false; 335fe6060f1SDimitry Andric if (!EnableHomogeneousPrologEpilog) 336fe6060f1SDimitry Andric return false; 337fe6060f1SDimitry Andric if (EnableRedZone) 338fe6060f1SDimitry Andric return false; 339fe6060f1SDimitry Andric 340fe6060f1SDimitry Andric // TODO: Window is supported yet. 341fe6060f1SDimitry Andric if (needsWinCFI(MF)) 342fe6060f1SDimitry Andric return false; 343fe6060f1SDimitry Andric // TODO: SVE is not supported yet. 344fe6060f1SDimitry Andric if (getSVEStackSize(MF)) 345fe6060f1SDimitry Andric return false; 346fe6060f1SDimitry Andric 347fe6060f1SDimitry Andric // Bail on stack adjustment needed on return for simplicity. 348fe6060f1SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 349fe6060f1SDimitry Andric const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 350fe6060f1SDimitry Andric if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)) 351fe6060f1SDimitry Andric return false; 352fe6060f1SDimitry Andric if (Exit && getArgumentStackToRestore(MF, *Exit)) 353fe6060f1SDimitry Andric return false; 354fe6060f1SDimitry Andric 3555f757f3fSDimitry Andric auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 3560fca6ea1SDimitry Andric if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges()) 3575f757f3fSDimitry Andric return false; 3585f757f3fSDimitry Andric 3595f757f3fSDimitry Andric // If there are an odd number of GPRs before LR and FP in the CSRs list, 3605f757f3fSDimitry Andric // they will not be paired into one RegPairInfo, which is incompatible with 3615f757f3fSDimitry Andric // the assumption made by the homogeneous prolog epilog pass. 3625f757f3fSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 3635f757f3fSDimitry Andric unsigned NumGPRs = 0; 3645f757f3fSDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) { 3655f757f3fSDimitry Andric Register Reg = CSRegs[I]; 3665f757f3fSDimitry Andric if (Reg == AArch64::LR) { 3675f757f3fSDimitry Andric assert(CSRegs[I + 1] == AArch64::FP); 3685f757f3fSDimitry Andric if (NumGPRs % 2 != 0) 3695f757f3fSDimitry Andric return false; 3705f757f3fSDimitry Andric break; 3715f757f3fSDimitry Andric } 3725f757f3fSDimitry Andric if (AArch64::GPR64RegClass.contains(Reg)) 3735f757f3fSDimitry Andric ++NumGPRs; 3745f757f3fSDimitry Andric } 3755f757f3fSDimitry Andric 376fe6060f1SDimitry Andric return true; 377fe6060f1SDimitry Andric } 378fe6060f1SDimitry Andric 379fe6060f1SDimitry Andric /// Returns true if CSRs should be paired. 380fe6060f1SDimitry Andric bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const { 381fe6060f1SDimitry Andric return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF); 382fe6060f1SDimitry Andric } 383fe6060f1SDimitry Andric 3840b57cec5SDimitry Andric /// This is the biggest offset to the stack pointer we can encode in aarch64 3850b57cec5SDimitry Andric /// instructions (without using a separate calculation and a temp register). 3860b57cec5SDimitry Andric /// Note that the exception here are vector stores/loads which cannot encode any 3870b57cec5SDimitry Andric /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). 3880b57cec5SDimitry Andric static const unsigned DefaultSafeSPDisplacement = 255; 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric /// Look at each instruction that references stack frames and return the stack 3910b57cec5SDimitry Andric /// size limit beyond which some of these instructions will require a scratch 3920b57cec5SDimitry Andric /// register during their expansion later. 3930b57cec5SDimitry Andric static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { 3940b57cec5SDimitry Andric // FIXME: For now, just conservatively guestimate based on unscaled indexing 3950b57cec5SDimitry Andric // range. We'll end up allocating an unnecessary spill slot a lot, but 3960b57cec5SDimitry Andric // realistically that's not a big deal at this stage of the game. 3970b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 3980b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 3990b57cec5SDimitry Andric if (MI.isDebugInstr() || MI.isPseudo() || 4000b57cec5SDimitry Andric MI.getOpcode() == AArch64::ADDXri || 4010b57cec5SDimitry Andric MI.getOpcode() == AArch64::ADDSXri) 4020b57cec5SDimitry Andric continue; 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 4050b57cec5SDimitry Andric if (!MO.isFI()) 4060b57cec5SDimitry Andric continue; 4070b57cec5SDimitry Andric 4088bcb0991SDimitry Andric StackOffset Offset; 4090b57cec5SDimitry Andric if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == 4100b57cec5SDimitry Andric AArch64FrameOffsetCannotUpdate) 4110b57cec5SDimitry Andric return 0; 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric } 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric return DefaultSafeSPDisplacement; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 418480093f4SDimitry Andric TargetStackID::Value 419480093f4SDimitry Andric AArch64FrameLowering::getStackIDForScalableVectors() const { 420e8d8bef9SDimitry Andric return TargetStackID::ScalableVector; 421480093f4SDimitry Andric } 422480093f4SDimitry Andric 42362cfcf62SDimitry Andric /// Returns the size of the fixed object area (allocated next to sp on entry) 42462cfcf62SDimitry Andric /// On Win64 this may include a var args area and an UnwindHelp object for EH. 42562cfcf62SDimitry Andric static unsigned getFixedObjectSize(const MachineFunction &MF, 42662cfcf62SDimitry Andric const AArch64FunctionInfo *AFI, bool IsWin64, 42762cfcf62SDimitry Andric bool IsFunclet) { 42862cfcf62SDimitry Andric if (!IsWin64 || IsFunclet) { 429fe6060f1SDimitry Andric return AFI->getTailCallReservedStack(); 43062cfcf62SDimitry Andric } else { 4310fca6ea1SDimitry Andric if (AFI->getTailCallReservedStack() != 0 && 4320fca6ea1SDimitry Andric !MF.getFunction().getAttributes().hasAttrSomewhere( 4330fca6ea1SDimitry Andric Attribute::SwiftAsync)) 434fe6060f1SDimitry Andric report_fatal_error("cannot generate ABI-changing tail call for Win64"); 43562cfcf62SDimitry Andric // Var args are stored here in the primary function. 43662cfcf62SDimitry Andric const unsigned VarArgsArea = AFI->getVarArgsGPRSize(); 43762cfcf62SDimitry Andric // To support EH funclets we allocate an UnwindHelp object 43862cfcf62SDimitry Andric const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0); 4390fca6ea1SDimitry Andric return AFI->getTailCallReservedStack() + 4400fca6ea1SDimitry Andric alignTo(VarArgsArea + UnwindHelpObject, 16); 44162cfcf62SDimitry Andric } 44262cfcf62SDimitry Andric } 44362cfcf62SDimitry Andric 4448bcb0991SDimitry Andric /// Returns the size of the entire SVE stackframe (calleesaves + spills). 4458bcb0991SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF) { 4468bcb0991SDimitry Andric const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 447e8d8bef9SDimitry Andric return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); 4488bcb0991SDimitry Andric } 4498bcb0991SDimitry Andric 4500b57cec5SDimitry Andric bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 4510b57cec5SDimitry Andric if (!EnableRedZone) 4520b57cec5SDimitry Andric return false; 453fe6060f1SDimitry Andric 4540b57cec5SDimitry Andric // Don't use the red zone if the function explicitly asks us not to. 4550b57cec5SDimitry Andric // This is typically used for kernel code. 456fe6060f1SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 457fe6060f1SDimitry Andric const unsigned RedZoneSize = 458fe6060f1SDimitry Andric Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction()); 459fe6060f1SDimitry Andric if (!RedZoneSize) 4600b57cec5SDimitry Andric return false; 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 4630b57cec5SDimitry Andric const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 464480093f4SDimitry Andric uint64_t NumBytes = AFI->getLocalStackSize(); 4650b57cec5SDimitry Andric 4660fca6ea1SDimitry Andric // If neither NEON or SVE are available, a COPY from one Q-reg to 4670fca6ea1SDimitry Andric // another requires a spill -> reload sequence. We can do that 4680fca6ea1SDimitry Andric // using a pre-decrementing store/post-decrementing load, but 4690fca6ea1SDimitry Andric // if we do so, we can't use the Red Zone. 4700fca6ea1SDimitry Andric bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() && 4710fca6ea1SDimitry Andric !Subtarget.isNeonAvailable() && 4720fca6ea1SDimitry Andric !Subtarget.hasSVE(); 4730fca6ea1SDimitry Andric 474fe6060f1SDimitry Andric return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || 4750fca6ea1SDimitry Andric getSVEStackSize(MF) || LowerQRegCopyThroughMem); 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric /// hasFP - Return true if the specified function should have a dedicated frame 4790b57cec5SDimitry Andric /// pointer register. 4800b57cec5SDimitry Andric bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 4810b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 4820b57cec5SDimitry Andric const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 4835f757f3fSDimitry Andric 4840b57cec5SDimitry Andric // Win64 EH requires a frame pointer if funclets are present, as the locals 4850b57cec5SDimitry Andric // are accessed off the frame pointer in both the parent function and the 4860b57cec5SDimitry Andric // funclets. 4870b57cec5SDimitry Andric if (MF.hasEHFunclets()) 4880b57cec5SDimitry Andric return true; 4890b57cec5SDimitry Andric // Retain behavior of always omitting the FP for leaf functions when possible. 490480093f4SDimitry Andric if (MF.getTarget().Options.DisableFramePointerElim(MF)) 4910b57cec5SDimitry Andric return true; 4920b57cec5SDimitry Andric if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 4930b57cec5SDimitry Andric MFI.hasStackMap() || MFI.hasPatchPoint() || 494fe6060f1SDimitry Andric RegInfo->hasStackRealignment(MF)) 4950b57cec5SDimitry Andric return true; 4960b57cec5SDimitry Andric // With large callframes around we may need to use FP to access the scavenging 4970b57cec5SDimitry Andric // emergency spillslot. 4980b57cec5SDimitry Andric // 4990b57cec5SDimitry Andric // Unfortunately some calls to hasFP() like machine verifier -> 5000b57cec5SDimitry Andric // getReservedReg() -> hasFP in the middle of global isel are too early 5010b57cec5SDimitry Andric // to know the max call frame size. Hopefully conservatively returning "true" 5020b57cec5SDimitry Andric // in those cases is fine. 5030b57cec5SDimitry Andric // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs. 5040b57cec5SDimitry Andric if (!MFI.isMaxCallFrameSizeComputed() || 5050b57cec5SDimitry Andric MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) 5060b57cec5SDimitry Andric return true; 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric return false; 5090b57cec5SDimitry Andric } 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 5120b57cec5SDimitry Andric /// not required, we reserve argument space for call sites in the function 5130b57cec5SDimitry Andric /// immediately on entry to the current function. This eliminates the need for 5140b57cec5SDimitry Andric /// add/sub sp brackets around call sites. Returns true if the call frame is 5150b57cec5SDimitry Andric /// included as part of the stack frame. 5160fca6ea1SDimitry Andric bool AArch64FrameLowering::hasReservedCallFrame( 5170fca6ea1SDimitry Andric const MachineFunction &MF) const { 5185f757f3fSDimitry Andric // The stack probing code for the dynamically allocated outgoing arguments 5195f757f3fSDimitry Andric // area assumes that the stack is probed at the top - either by the prologue 5205f757f3fSDimitry Andric // code, which issues a probe if `hasVarSizedObjects` return true, or by the 5215f757f3fSDimitry Andric // most recent variable-sized object allocation. Changing the condition here 5225f757f3fSDimitry Andric // may need to be followed up by changes to the probe issuing logic. 5230b57cec5SDimitry Andric return !MF.getFrameInfo().hasVarSizedObjects(); 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 5270b57cec5SDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, 5280b57cec5SDimitry Andric MachineBasicBlock::iterator I) const { 5290b57cec5SDimitry Andric const AArch64InstrInfo *TII = 5300b57cec5SDimitry Andric static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 5315f757f3fSDimitry Andric const AArch64TargetLowering *TLI = 5325f757f3fSDimitry Andric MF.getSubtarget<AArch64Subtarget>().getTargetLowering(); 5335f757f3fSDimitry Andric [[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo(); 5340b57cec5SDimitry Andric DebugLoc DL = I->getDebugLoc(); 5350b57cec5SDimitry Andric unsigned Opc = I->getOpcode(); 5360b57cec5SDimitry Andric bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 5370b57cec5SDimitry Andric uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric if (!hasReservedCallFrame(MF)) { 5400b57cec5SDimitry Andric int64_t Amount = I->getOperand(0).getImm(); 5415ffd83dbSDimitry Andric Amount = alignTo(Amount, getStackAlign()); 5420b57cec5SDimitry Andric if (!IsDestroy) 5430b57cec5SDimitry Andric Amount = -Amount; 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 5460b57cec5SDimitry Andric // doesn't have to pop anything), then the first operand will be zero too so 5470b57cec5SDimitry Andric // this adjustment is a no-op. 5480b57cec5SDimitry Andric if (CalleePopAmount == 0) { 5490b57cec5SDimitry Andric // FIXME: in-function stack adjustment for calls is limited to 24-bits 5500b57cec5SDimitry Andric // because there's no guaranteed temporary register available. 5510b57cec5SDimitry Andric // 5520b57cec5SDimitry Andric // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 5530b57cec5SDimitry Andric // 1) For offset <= 12-bit, we use LSL #0 5540b57cec5SDimitry Andric // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 5550b57cec5SDimitry Andric // LSL #0, and the other uses LSL #12. 5560b57cec5SDimitry Andric // 5570b57cec5SDimitry Andric // Most call frames will be allocated at the start of a function so 5580b57cec5SDimitry Andric // this is OK, but it is a limitation that needs dealing with. 5590b57cec5SDimitry Andric assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 5605f757f3fSDimitry Andric 5615f757f3fSDimitry Andric if (TLI->hasInlineStackProbe(MF) && 5625f757f3fSDimitry Andric -Amount >= AArch64::StackProbeMaxUnprobedStack) { 5635f757f3fSDimitry Andric // When stack probing is enabled, the decrement of SP may need to be 5645f757f3fSDimitry Andric // probed. We only need to do this if the call site needs 1024 bytes of 5655f757f3fSDimitry Andric // space or more, because a region smaller than that is allowed to be 5665f757f3fSDimitry Andric // unprobed at an ABI boundary. We rely on the fact that SP has been 5675f757f3fSDimitry Andric // probed exactly at this point, either by the prologue or most recent 5685f757f3fSDimitry Andric // dynamic allocation. 5695f757f3fSDimitry Andric assert(MFI.hasVarSizedObjects() && 5705f757f3fSDimitry Andric "non-reserved call frame without var sized objects?"); 5715f757f3fSDimitry Andric Register ScratchReg = 5725f757f3fSDimitry Andric MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); 5735f757f3fSDimitry Andric inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0)); 5745f757f3fSDimitry Andric } else { 575e8d8bef9SDimitry Andric emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, 576e8d8bef9SDimitry Andric StackOffset::getFixed(Amount), TII); 5770b57cec5SDimitry Andric } 5785f757f3fSDimitry Andric } 5790b57cec5SDimitry Andric } else if (CalleePopAmount != 0) { 5800b57cec5SDimitry Andric // If the calling convention demands that the callee pops arguments from the 5810b57cec5SDimitry Andric // stack, we want to add it back if we have a reserved call frame. 5820b57cec5SDimitry Andric assert(CalleePopAmount < 0xffffff && "call frame too large"); 5838bcb0991SDimitry Andric emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, 584e8d8bef9SDimitry Andric StackOffset::getFixed(-(int64_t)CalleePopAmount), TII); 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric return MBB.erase(I); 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric 58981ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRLocations( 5900b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 5910b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 5920b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 5930fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 5940fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 5950fca6ea1SDimitry Andric bool LocallyStreaming = 5960fca6ea1SDimitry Andric Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface(); 59781ad6265SDimitry Andric 59881ad6265SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 59981ad6265SDimitry Andric if (CSI.empty()) 60081ad6265SDimitry Andric return; 60181ad6265SDimitry Andric 6020b57cec5SDimitry Andric const TargetSubtargetInfo &STI = MF.getSubtarget(); 60381ad6265SDimitry Andric const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 60481ad6265SDimitry Andric const TargetInstrInfo &TII = *STI.getInstrInfo(); 6050b57cec5SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 6060b57cec5SDimitry Andric 60781ad6265SDimitry Andric for (const auto &Info : CSI) { 6080fca6ea1SDimitry Andric unsigned FrameIdx = Info.getFrameIdx(); 6090fca6ea1SDimitry Andric if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) 61081ad6265SDimitry Andric continue; 61181ad6265SDimitry Andric 61281ad6265SDimitry Andric assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); 6130fca6ea1SDimitry Andric int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true); 6140fca6ea1SDimitry Andric int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea(); 61581ad6265SDimitry Andric 6160fca6ea1SDimitry Andric // The location of VG will be emitted before each streaming-mode change in 6170fca6ea1SDimitry Andric // the function. Only locally-streaming functions require emitting the 6180fca6ea1SDimitry Andric // non-streaming VG location here. 6190fca6ea1SDimitry Andric if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) || 6200fca6ea1SDimitry Andric (!LocallyStreaming && 6210fca6ea1SDimitry Andric DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true))) 6220fca6ea1SDimitry Andric continue; 6230fca6ea1SDimitry Andric 62481ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 62581ad6265SDimitry Andric MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 62681ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 62781ad6265SDimitry Andric .addCFIIndex(CFIIndex) 62881ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 62981ad6265SDimitry Andric } 63081ad6265SDimitry Andric } 63181ad6265SDimitry Andric 63281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVELocations( 63381ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 63481ad6265SDimitry Andric MachineFunction &MF = *MBB.getParent(); 63581ad6265SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 63681ad6265SDimitry Andric 6370b57cec5SDimitry Andric // Add callee saved registers to move list. 6380b57cec5SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 6390b57cec5SDimitry Andric if (CSI.empty()) 6400b57cec5SDimitry Andric return; 6410b57cec5SDimitry Andric 64281ad6265SDimitry Andric const TargetSubtargetInfo &STI = MF.getSubtarget(); 64381ad6265SDimitry Andric const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 64481ad6265SDimitry Andric const TargetInstrInfo &TII = *STI.getInstrInfo(); 64581ad6265SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 64681ad6265SDimitry Andric AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 64781ad6265SDimitry Andric 6480b57cec5SDimitry Andric for (const auto &Info : CSI) { 64981ad6265SDimitry Andric if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) 65081ad6265SDimitry Andric continue; 65175b4d546SDimitry Andric 65275b4d546SDimitry Andric // Not all unwinders may know about SVE registers, so assume the lowest 65375b4d546SDimitry Andric // common demoninator. 65481ad6265SDimitry Andric assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); 65581ad6265SDimitry Andric unsigned Reg = Info.getReg(); 65681ad6265SDimitry Andric if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg)) 65775b4d546SDimitry Andric continue; 65875b4d546SDimitry Andric 65981ad6265SDimitry Andric StackOffset Offset = 660e8d8bef9SDimitry Andric StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - 66181ad6265SDimitry Andric StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI)); 66281ad6265SDimitry Andric 66381ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset)); 66481ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 6650b57cec5SDimitry Andric .addCFIIndex(CFIIndex) 6660b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 6670b57cec5SDimitry Andric } 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 67081ad6265SDimitry Andric static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF, 67181ad6265SDimitry Andric MachineBasicBlock &MBB, 67281ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt, 67381ad6265SDimitry Andric unsigned DwarfReg) { 67481ad6265SDimitry Andric unsigned CFIIndex = 67581ad6265SDimitry Andric MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg)); 67681ad6265SDimitry Andric BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex); 67781ad6265SDimitry Andric } 67881ad6265SDimitry Andric 67981ad6265SDimitry Andric void AArch64FrameLowering::resetCFIToInitialState( 68081ad6265SDimitry Andric MachineBasicBlock &MBB) const { 68181ad6265SDimitry Andric 68281ad6265SDimitry Andric MachineFunction &MF = *MBB.getParent(); 68381ad6265SDimitry Andric const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 68481ad6265SDimitry Andric const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 68581ad6265SDimitry Andric const auto &TRI = 68681ad6265SDimitry Andric static_cast<const AArch64RegisterInfo &>(*Subtarget.getRegisterInfo()); 68781ad6265SDimitry Andric const auto &MFI = *MF.getInfo<AArch64FunctionInfo>(); 68881ad6265SDimitry Andric 68981ad6265SDimitry Andric const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION); 69081ad6265SDimitry Andric DebugLoc DL; 69181ad6265SDimitry Andric 69281ad6265SDimitry Andric // Reset the CFA to `SP + 0`. 69381ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt = MBB.begin(); 69481ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( 69581ad6265SDimitry Andric nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0)); 69681ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); 69781ad6265SDimitry Andric 69881ad6265SDimitry Andric // Flip the RA sign state. 699bdd1243dSDimitry Andric if (MFI.shouldSignReturnAddress(MF)) { 70081ad6265SDimitry Andric CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); 70181ad6265SDimitry Andric BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); 70281ad6265SDimitry Andric } 70381ad6265SDimitry Andric 70481ad6265SDimitry Andric // Shadow call stack uses X18, reset it. 7055f757f3fSDimitry Andric if (MFI.needsShadowCallStackPrologueEpilogue(MF)) 70681ad6265SDimitry Andric insertCFISameValue(CFIDesc, MF, MBB, InsertPt, 70781ad6265SDimitry Andric TRI.getDwarfRegNum(AArch64::X18, true)); 70881ad6265SDimitry Andric 70981ad6265SDimitry Andric // Emit .cfi_same_value for callee-saved registers. 71081ad6265SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = 71181ad6265SDimitry Andric MF.getFrameInfo().getCalleeSavedInfo(); 71281ad6265SDimitry Andric for (const auto &Info : CSI) { 71381ad6265SDimitry Andric unsigned Reg = Info.getReg(); 71481ad6265SDimitry Andric if (!TRI.regNeedsCFI(Reg, Reg)) 71581ad6265SDimitry Andric continue; 71681ad6265SDimitry Andric insertCFISameValue(CFIDesc, MF, MBB, InsertPt, 71781ad6265SDimitry Andric TRI.getDwarfRegNum(Reg, true)); 71881ad6265SDimitry Andric } 71981ad6265SDimitry Andric } 72081ad6265SDimitry Andric 72181ad6265SDimitry Andric static void emitCalleeSavedRestores(MachineBasicBlock &MBB, 72281ad6265SDimitry Andric MachineBasicBlock::iterator MBBI, 72381ad6265SDimitry Andric bool SVE) { 72481ad6265SDimitry Andric MachineFunction &MF = *MBB.getParent(); 72581ad6265SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 72681ad6265SDimitry Andric 72781ad6265SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 72881ad6265SDimitry Andric if (CSI.empty()) 72981ad6265SDimitry Andric return; 73081ad6265SDimitry Andric 73181ad6265SDimitry Andric const TargetSubtargetInfo &STI = MF.getSubtarget(); 73281ad6265SDimitry Andric const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 73381ad6265SDimitry Andric const TargetInstrInfo &TII = *STI.getInstrInfo(); 73481ad6265SDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 73581ad6265SDimitry Andric 73681ad6265SDimitry Andric for (const auto &Info : CSI) { 73781ad6265SDimitry Andric if (SVE != 73881ad6265SDimitry Andric (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector)) 73981ad6265SDimitry Andric continue; 74081ad6265SDimitry Andric 74181ad6265SDimitry Andric unsigned Reg = Info.getReg(); 74281ad6265SDimitry Andric if (SVE && 74381ad6265SDimitry Andric !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg)) 74481ad6265SDimitry Andric continue; 74581ad6265SDimitry Andric 7460fca6ea1SDimitry Andric if (!Info.isRestored()) 7470fca6ea1SDimitry Andric continue; 7480fca6ea1SDimitry Andric 74981ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore( 75081ad6265SDimitry Andric nullptr, TRI.getDwarfRegNum(Info.getReg(), true))); 75181ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 75281ad6265SDimitry Andric .addCFIIndex(CFIIndex) 75381ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 75481ad6265SDimitry Andric } 75581ad6265SDimitry Andric } 75681ad6265SDimitry Andric 75781ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRRestores( 75881ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 75981ad6265SDimitry Andric emitCalleeSavedRestores(MBB, MBBI, false); 76081ad6265SDimitry Andric } 76181ad6265SDimitry Andric 76281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVERestores( 76381ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 76481ad6265SDimitry Andric emitCalleeSavedRestores(MBB, MBBI, true); 76581ad6265SDimitry Andric } 76681ad6265SDimitry Andric 7675f757f3fSDimitry Andric // Return the maximum possible number of bytes for `Size` due to the 7685f757f3fSDimitry Andric // architectural limit on the size of a SVE register. 7695f757f3fSDimitry Andric static int64_t upperBound(StackOffset Size) { 7705f757f3fSDimitry Andric static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16; 7715f757f3fSDimitry Andric return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed(); 7725f757f3fSDimitry Andric } 7735f757f3fSDimitry Andric 7745f757f3fSDimitry Andric void AArch64FrameLowering::allocateStackSpace( 7755f757f3fSDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 7765f757f3fSDimitry Andric int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI, 7775f757f3fSDimitry Andric bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset, 7785f757f3fSDimitry Andric bool FollowupAllocs) const { 7795f757f3fSDimitry Andric 7805f757f3fSDimitry Andric if (!AllocSize) 7815f757f3fSDimitry Andric return; 7825f757f3fSDimitry Andric 7835f757f3fSDimitry Andric DebugLoc DL; 7845f757f3fSDimitry Andric MachineFunction &MF = *MBB.getParent(); 7855f757f3fSDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 7865f757f3fSDimitry Andric const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 7875f757f3fSDimitry Andric AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 7885f757f3fSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 7895f757f3fSDimitry Andric 7905f757f3fSDimitry Andric const int64_t MaxAlign = MFI.getMaxAlign().value(); 7915f757f3fSDimitry Andric const uint64_t AndMask = ~(MaxAlign - 1); 7925f757f3fSDimitry Andric 7935f757f3fSDimitry Andric if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) { 7945f757f3fSDimitry Andric Register TargetReg = RealignmentPadding 7955f757f3fSDimitry Andric ? findScratchNonCalleeSaveRegister(&MBB) 7965f757f3fSDimitry Andric : AArch64::SP; 7975f757f3fSDimitry Andric // SUB Xd/SP, SP, AllocSize 7985f757f3fSDimitry Andric emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, 7995f757f3fSDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, 8005f757f3fSDimitry Andric EmitCFI, InitialOffset); 8015f757f3fSDimitry Andric 8025f757f3fSDimitry Andric if (RealignmentPadding) { 8035f757f3fSDimitry Andric // AND SP, X9, 0b11111...0000 8045f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) 8055f757f3fSDimitry Andric .addReg(TargetReg, RegState::Kill) 8065f757f3fSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) 8075f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8085f757f3fSDimitry Andric AFI.setStackRealigned(true); 8095f757f3fSDimitry Andric 8105f757f3fSDimitry Andric // No need for SEH instructions here; if we're realigning the stack, 8115f757f3fSDimitry Andric // we've set a frame pointer and already finished the SEH prologue. 8125f757f3fSDimitry Andric assert(!NeedsWinCFI); 8135f757f3fSDimitry Andric } 8145f757f3fSDimitry Andric return; 8155f757f3fSDimitry Andric } 8165f757f3fSDimitry Andric 8175f757f3fSDimitry Andric // 8185f757f3fSDimitry Andric // Stack probing allocation. 8195f757f3fSDimitry Andric // 8205f757f3fSDimitry Andric 8215f757f3fSDimitry Andric // Fixed length allocation. If we don't need to re-align the stack and don't 8225f757f3fSDimitry Andric // have SVE objects, we can use a more efficient sequence for stack probing. 8235f757f3fSDimitry Andric if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) { 8245f757f3fSDimitry Andric Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB); 8255f757f3fSDimitry Andric assert(ScratchReg != AArch64::NoRegister); 8265f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC)) 8275f757f3fSDimitry Andric .addDef(ScratchReg) 8285f757f3fSDimitry Andric .addImm(AllocSize.getFixed()) 8295f757f3fSDimitry Andric .addImm(InitialOffset.getFixed()) 8305f757f3fSDimitry Andric .addImm(InitialOffset.getScalable()); 8315f757f3fSDimitry Andric // The fixed allocation may leave unprobed bytes at the top of the 8325f757f3fSDimitry Andric // stack. If we have subsequent alocation (e.g. if we have variable-sized 8335f757f3fSDimitry Andric // objects), we need to issue an extra probe, so these allocations start in 8345f757f3fSDimitry Andric // a known state. 8355f757f3fSDimitry Andric if (FollowupAllocs) { 8365f757f3fSDimitry Andric // STR XZR, [SP] 8375f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) 8385f757f3fSDimitry Andric .addReg(AArch64::XZR) 8395f757f3fSDimitry Andric .addReg(AArch64::SP) 8405f757f3fSDimitry Andric .addImm(0) 8415f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8425f757f3fSDimitry Andric } 8435f757f3fSDimitry Andric 8445f757f3fSDimitry Andric return; 8455f757f3fSDimitry Andric } 8465f757f3fSDimitry Andric 8475f757f3fSDimitry Andric // Variable length allocation. 8485f757f3fSDimitry Andric 8495f757f3fSDimitry Andric // If the (unknown) allocation size cannot exceed the probe size, decrement 8505f757f3fSDimitry Andric // the stack pointer right away. 8515f757f3fSDimitry Andric int64_t ProbeSize = AFI.getStackProbeSize(); 8525f757f3fSDimitry Andric if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) { 8535f757f3fSDimitry Andric Register ScratchReg = RealignmentPadding 8545f757f3fSDimitry Andric ? findScratchNonCalleeSaveRegister(&MBB) 8555f757f3fSDimitry Andric : AArch64::SP; 8565f757f3fSDimitry Andric assert(ScratchReg != AArch64::NoRegister); 8575f757f3fSDimitry Andric // SUB Xd, SP, AllocSize 8585f757f3fSDimitry Andric emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII, 8595f757f3fSDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, 8605f757f3fSDimitry Andric EmitCFI, InitialOffset); 8615f757f3fSDimitry Andric if (RealignmentPadding) { 8625f757f3fSDimitry Andric // AND SP, Xn, 0b11111...0000 8635f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP) 8645f757f3fSDimitry Andric .addReg(ScratchReg, RegState::Kill) 8655f757f3fSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) 8665f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8675f757f3fSDimitry Andric AFI.setStackRealigned(true); 8685f757f3fSDimitry Andric } 8695f757f3fSDimitry Andric if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding > 8705f757f3fSDimitry Andric AArch64::StackProbeMaxUnprobedStack) { 8715f757f3fSDimitry Andric // STR XZR, [SP] 8725f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui)) 8735f757f3fSDimitry Andric .addReg(AArch64::XZR) 8745f757f3fSDimitry Andric .addReg(AArch64::SP) 8755f757f3fSDimitry Andric .addImm(0) 8765f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8775f757f3fSDimitry Andric } 8785f757f3fSDimitry Andric return; 8795f757f3fSDimitry Andric } 8805f757f3fSDimitry Andric 8815f757f3fSDimitry Andric // Emit a variable-length allocation probing loop. 8825f757f3fSDimitry Andric // TODO: As an optimisation, the loop can be "unrolled" into a few parts, 8835f757f3fSDimitry Andric // each of them guaranteed to adjust the stack by less than the probe size. 8845f757f3fSDimitry Andric Register TargetReg = findScratchNonCalleeSaveRegister(&MBB); 8855f757f3fSDimitry Andric assert(TargetReg != AArch64::NoRegister); 8865f757f3fSDimitry Andric // SUB Xd, SP, AllocSize 8875f757f3fSDimitry Andric emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII, 8885f757f3fSDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI, 8895f757f3fSDimitry Andric EmitCFI, InitialOffset); 8905f757f3fSDimitry Andric if (RealignmentPadding) { 8915f757f3fSDimitry Andric // AND Xn, Xn, 0b11111...0000 8925f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg) 8935f757f3fSDimitry Andric .addReg(TargetReg, RegState::Kill) 8945f757f3fSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)) 8955f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 8965f757f3fSDimitry Andric } 8975f757f3fSDimitry Andric 8985f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR)) 8995f757f3fSDimitry Andric .addReg(TargetReg); 9005f757f3fSDimitry Andric if (EmitCFI) { 9015f757f3fSDimitry Andric // Set the CFA register back to SP. 9025f757f3fSDimitry Andric unsigned Reg = 9035f757f3fSDimitry Andric Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true); 9045f757f3fSDimitry Andric unsigned CFIIndex = 9055f757f3fSDimitry Andric MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 9065f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 9075f757f3fSDimitry Andric .addCFIIndex(CFIIndex) 9085f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 9095f757f3fSDimitry Andric } 9105f757f3fSDimitry Andric if (RealignmentPadding) 9115f757f3fSDimitry Andric AFI.setStackRealigned(true); 9125f757f3fSDimitry Andric } 9135f757f3fSDimitry Andric 91481ad6265SDimitry Andric static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) { 91581ad6265SDimitry Andric switch (Reg.id()) { 91681ad6265SDimitry Andric default: 91781ad6265SDimitry Andric // The called routine is expected to preserve r19-r28 91881ad6265SDimitry Andric // r29 and r30 are used as frame pointer and link register resp. 91981ad6265SDimitry Andric return 0; 92081ad6265SDimitry Andric 92181ad6265SDimitry Andric // GPRs 92281ad6265SDimitry Andric #define CASE(n) \ 92381ad6265SDimitry Andric case AArch64::W##n: \ 92481ad6265SDimitry Andric case AArch64::X##n: \ 92581ad6265SDimitry Andric return AArch64::X##n 92681ad6265SDimitry Andric CASE(0); 92781ad6265SDimitry Andric CASE(1); 92881ad6265SDimitry Andric CASE(2); 92981ad6265SDimitry Andric CASE(3); 93081ad6265SDimitry Andric CASE(4); 93181ad6265SDimitry Andric CASE(5); 93281ad6265SDimitry Andric CASE(6); 93381ad6265SDimitry Andric CASE(7); 93481ad6265SDimitry Andric CASE(8); 93581ad6265SDimitry Andric CASE(9); 93681ad6265SDimitry Andric CASE(10); 93781ad6265SDimitry Andric CASE(11); 93881ad6265SDimitry Andric CASE(12); 93981ad6265SDimitry Andric CASE(13); 94081ad6265SDimitry Andric CASE(14); 94181ad6265SDimitry Andric CASE(15); 94281ad6265SDimitry Andric CASE(16); 94381ad6265SDimitry Andric CASE(17); 94481ad6265SDimitry Andric CASE(18); 94581ad6265SDimitry Andric #undef CASE 94681ad6265SDimitry Andric 94781ad6265SDimitry Andric // FPRs 94881ad6265SDimitry Andric #define CASE(n) \ 94981ad6265SDimitry Andric case AArch64::B##n: \ 95081ad6265SDimitry Andric case AArch64::H##n: \ 95181ad6265SDimitry Andric case AArch64::S##n: \ 95281ad6265SDimitry Andric case AArch64::D##n: \ 95381ad6265SDimitry Andric case AArch64::Q##n: \ 95481ad6265SDimitry Andric return HasSVE ? AArch64::Z##n : AArch64::Q##n 95581ad6265SDimitry Andric CASE(0); 95681ad6265SDimitry Andric CASE(1); 95781ad6265SDimitry Andric CASE(2); 95881ad6265SDimitry Andric CASE(3); 95981ad6265SDimitry Andric CASE(4); 96081ad6265SDimitry Andric CASE(5); 96181ad6265SDimitry Andric CASE(6); 96281ad6265SDimitry Andric CASE(7); 96381ad6265SDimitry Andric CASE(8); 96481ad6265SDimitry Andric CASE(9); 96581ad6265SDimitry Andric CASE(10); 96681ad6265SDimitry Andric CASE(11); 96781ad6265SDimitry Andric CASE(12); 96881ad6265SDimitry Andric CASE(13); 96981ad6265SDimitry Andric CASE(14); 97081ad6265SDimitry Andric CASE(15); 97181ad6265SDimitry Andric CASE(16); 97281ad6265SDimitry Andric CASE(17); 97381ad6265SDimitry Andric CASE(18); 97481ad6265SDimitry Andric CASE(19); 97581ad6265SDimitry Andric CASE(20); 97681ad6265SDimitry Andric CASE(21); 97781ad6265SDimitry Andric CASE(22); 97881ad6265SDimitry Andric CASE(23); 97981ad6265SDimitry Andric CASE(24); 98081ad6265SDimitry Andric CASE(25); 98181ad6265SDimitry Andric CASE(26); 98281ad6265SDimitry Andric CASE(27); 98381ad6265SDimitry Andric CASE(28); 98481ad6265SDimitry Andric CASE(29); 98581ad6265SDimitry Andric CASE(30); 98681ad6265SDimitry Andric CASE(31); 98781ad6265SDimitry Andric #undef CASE 98881ad6265SDimitry Andric } 98981ad6265SDimitry Andric } 99081ad6265SDimitry Andric 99181ad6265SDimitry Andric void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, 99281ad6265SDimitry Andric MachineBasicBlock &MBB) const { 99381ad6265SDimitry Andric // Insertion point. 99481ad6265SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 99581ad6265SDimitry Andric 99681ad6265SDimitry Andric // Fake a debug loc. 99781ad6265SDimitry Andric DebugLoc DL; 99881ad6265SDimitry Andric if (MBBI != MBB.end()) 99981ad6265SDimitry Andric DL = MBBI->getDebugLoc(); 100081ad6265SDimitry Andric 100181ad6265SDimitry Andric const MachineFunction &MF = *MBB.getParent(); 100281ad6265SDimitry Andric const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 100381ad6265SDimitry Andric const AArch64RegisterInfo &TRI = *STI.getRegisterInfo(); 100481ad6265SDimitry Andric 100581ad6265SDimitry Andric BitVector GPRsToZero(TRI.getNumRegs()); 100681ad6265SDimitry Andric BitVector FPRsToZero(TRI.getNumRegs()); 100781ad6265SDimitry Andric bool HasSVE = STI.hasSVE(); 100881ad6265SDimitry Andric for (MCRegister Reg : RegsToZero.set_bits()) { 100981ad6265SDimitry Andric if (TRI.isGeneralPurposeRegister(MF, Reg)) { 101081ad6265SDimitry Andric // For GPRs, we only care to clear out the 64-bit register. 101181ad6265SDimitry Andric if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) 101281ad6265SDimitry Andric GPRsToZero.set(XReg); 10130fca6ea1SDimitry Andric } else if (AArch64InstrInfo::isFpOrNEON(Reg)) { 101481ad6265SDimitry Andric // For FPRs, 101581ad6265SDimitry Andric if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE)) 101681ad6265SDimitry Andric FPRsToZero.set(XReg); 101781ad6265SDimitry Andric } 101881ad6265SDimitry Andric } 101981ad6265SDimitry Andric 102081ad6265SDimitry Andric const AArch64InstrInfo &TII = *STI.getInstrInfo(); 102181ad6265SDimitry Andric 102281ad6265SDimitry Andric // Zero out GPRs. 102381ad6265SDimitry Andric for (MCRegister Reg : GPRsToZero.set_bits()) 10245f757f3fSDimitry Andric TII.buildClearRegister(Reg, MBB, MBBI, DL); 102581ad6265SDimitry Andric 102681ad6265SDimitry Andric // Zero out FP/vector registers. 102781ad6265SDimitry Andric for (MCRegister Reg : FPRsToZero.set_bits()) 10285f757f3fSDimitry Andric TII.buildClearRegister(Reg, MBB, MBBI, DL); 102981ad6265SDimitry Andric 103081ad6265SDimitry Andric if (HasSVE) { 103181ad6265SDimitry Andric for (MCRegister PReg : 103281ad6265SDimitry Andric {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4, 103381ad6265SDimitry Andric AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9, 103481ad6265SDimitry Andric AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14, 103581ad6265SDimitry Andric AArch64::P15}) { 103681ad6265SDimitry Andric if (RegsToZero[PReg]) 103781ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg); 103881ad6265SDimitry Andric } 103981ad6265SDimitry Andric } 104081ad6265SDimitry Andric } 104181ad6265SDimitry Andric 10425f757f3fSDimitry Andric static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, 10435f757f3fSDimitry Andric const MachineBasicBlock &MBB) { 10445f757f3fSDimitry Andric const MachineFunction *MF = MBB.getParent(); 10455f757f3fSDimitry Andric LiveRegs.addLiveIns(MBB); 10465f757f3fSDimitry Andric // Mark callee saved registers as used so we will not choose them. 10475f757f3fSDimitry Andric const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs(); 10485f757f3fSDimitry Andric for (unsigned i = 0; CSRegs[i]; ++i) 10495f757f3fSDimitry Andric LiveRegs.addReg(CSRegs[i]); 10505f757f3fSDimitry Andric } 10515f757f3fSDimitry Andric 10520b57cec5SDimitry Andric // Find a scratch register that we can use at the start of the prologue to 10530b57cec5SDimitry Andric // re-align the stack pointer. We avoid using callee-save registers since they 10540b57cec5SDimitry Andric // may appear to be free when this is called from canUseAsPrologue (during 10550b57cec5SDimitry Andric // shrink wrapping), but then no longer be free when this is called from 10560b57cec5SDimitry Andric // emitPrologue. 10570b57cec5SDimitry Andric // 10580b57cec5SDimitry Andric // FIXME: This is a bit conservative, since in the above case we could use one 10590b57cec5SDimitry Andric // of the callee-save registers as a scratch temp to re-align the stack pointer, 10600b57cec5SDimitry Andric // but we would then have to make sure that we were in fact saving at least one 10610b57cec5SDimitry Andric // callee-save register in the prologue, which is additional complexity that 10620b57cec5SDimitry Andric // doesn't seem worth the benefit. 10630fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 10640b57cec5SDimitry Andric MachineFunction *MF = MBB->getParent(); 10650b57cec5SDimitry Andric 10660b57cec5SDimitry Andric // If MBB is an entry block, use X9 as the scratch register 10670fca6ea1SDimitry Andric // preserve_none functions may be using X9 to pass arguments, 10680fca6ea1SDimitry Andric // so prefer to pick an available register below. 10690fca6ea1SDimitry Andric if (&MF->front() == MBB && 10700fca6ea1SDimitry Andric MF->getFunction().getCallingConv() != CallingConv::PreserveNone) 10710b57cec5SDimitry Andric return AArch64::X9; 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 10740b57cec5SDimitry Andric const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 10750b57cec5SDimitry Andric LivePhysRegs LiveRegs(TRI); 10765f757f3fSDimitry Andric getLiveRegsForEntryMBB(LiveRegs, *MBB); 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric // Prefer X9 since it was historically used for the prologue scratch reg. 10790b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 10800b57cec5SDimitry Andric if (LiveRegs.available(MRI, AArch64::X9)) 10810b57cec5SDimitry Andric return AArch64::X9; 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric for (unsigned Reg : AArch64::GPR64RegClass) { 10840b57cec5SDimitry Andric if (LiveRegs.available(MRI, Reg)) 10850b57cec5SDimitry Andric return Reg; 10860b57cec5SDimitry Andric } 10870b57cec5SDimitry Andric return AArch64::NoRegister; 10880b57cec5SDimitry Andric } 10890b57cec5SDimitry Andric 10900b57cec5SDimitry Andric bool AArch64FrameLowering::canUseAsPrologue( 10910b57cec5SDimitry Andric const MachineBasicBlock &MBB) const { 10920b57cec5SDimitry Andric const MachineFunction *MF = MBB.getParent(); 10930b57cec5SDimitry Andric MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 10940b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 10950b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 10965f757f3fSDimitry Andric const AArch64TargetLowering *TLI = Subtarget.getTargetLowering(); 10975f757f3fSDimitry Andric const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>(); 10980b57cec5SDimitry Andric 10995f757f3fSDimitry Andric if (AFI->hasSwiftAsyncContext()) { 11005f757f3fSDimitry Andric const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 11015f757f3fSDimitry Andric const MachineRegisterInfo &MRI = MF->getRegInfo(); 11025f757f3fSDimitry Andric LivePhysRegs LiveRegs(TRI); 11035f757f3fSDimitry Andric getLiveRegsForEntryMBB(LiveRegs, MBB); 11045f757f3fSDimitry Andric // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are 11055f757f3fSDimitry Andric // available. 11065f757f3fSDimitry Andric if (!LiveRegs.available(MRI, AArch64::X16) || 11075f757f3fSDimitry Andric !LiveRegs.available(MRI, AArch64::X17)) 11085f757f3fSDimitry Andric return false; 11095f757f3fSDimitry Andric } 11105f757f3fSDimitry Andric 11110fca6ea1SDimitry Andric // Certain stack probing sequences might clobber flags, then we can't use 11120fca6ea1SDimitry Andric // the block as a prologue if the flags register is a live-in. 11130fca6ea1SDimitry Andric if (MF->getInfo<AArch64FunctionInfo>()->hasStackProbing() && 11140fca6ea1SDimitry Andric MBB.isLiveIn(AArch64::NZCV)) 11150fca6ea1SDimitry Andric return false; 11160fca6ea1SDimitry Andric 11175f757f3fSDimitry Andric // Don't need a scratch register if we're not going to re-align the stack or 11185f757f3fSDimitry Andric // emit stack probes. 11190fca6ea1SDimitry Andric if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF)) 11200b57cec5SDimitry Andric return true; 11210b57cec5SDimitry Andric // Otherwise, we can use any block as long as it has a scratch register 11220b57cec5SDimitry Andric // available. 11230b57cec5SDimitry Andric return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 11240b57cec5SDimitry Andric } 11250b57cec5SDimitry Andric 11260b57cec5SDimitry Andric static bool windowsRequiresStackProbe(MachineFunction &MF, 1127480093f4SDimitry Andric uint64_t StackSizeInBytes) { 11280b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 11295f757f3fSDimitry Andric const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>(); 11300b57cec5SDimitry Andric // TODO: When implementing stack protectors, take that into account 11310b57cec5SDimitry Andric // for the probe threshold. 11325f757f3fSDimitry Andric return Subtarget.isTargetWindows() && MFI.hasStackProbing() && 11335f757f3fSDimitry Andric StackSizeInBytes >= uint64_t(MFI.getStackProbeSize()); 11340b57cec5SDimitry Andric } 11350b57cec5SDimitry Andric 1136e8d8bef9SDimitry Andric static bool needsWinCFI(const MachineFunction &MF) { 1137e8d8bef9SDimitry Andric const Function &F = MF.getFunction(); 1138e8d8bef9SDimitry Andric return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 1139e8d8bef9SDimitry Andric F.needsUnwindTableEntry(); 1140e8d8bef9SDimitry Andric } 1141e8d8bef9SDimitry Andric 11420b57cec5SDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( 1143480093f4SDimitry Andric MachineFunction &MF, uint64_t StackBumpBytes) const { 11440b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 11450b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 11460b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 11470b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1148fe6060f1SDimitry Andric if (homogeneousPrologEpilog(MF)) 1149fe6060f1SDimitry Andric return false; 11500b57cec5SDimitry Andric 11510b57cec5SDimitry Andric if (AFI->getLocalStackSize() == 0) 11520b57cec5SDimitry Andric return false; 11530b57cec5SDimitry Andric 1154e8d8bef9SDimitry Andric // For WinCFI, if optimizing for size, prefer to not combine the stack bump 1155e8d8bef9SDimitry Andric // (to force a stp with predecrement) to match the packed unwind format, 1156e8d8bef9SDimitry Andric // provided that there actually are any callee saved registers to merge the 1157e8d8bef9SDimitry Andric // decrement with. 1158e8d8bef9SDimitry Andric // This is potentially marginally slower, but allows using the packed 1159e8d8bef9SDimitry Andric // unwind format for functions that both have a local area and callee saved 1160e8d8bef9SDimitry Andric // registers. Using the packed unwind format notably reduces the size of 1161e8d8bef9SDimitry Andric // the unwind info. 1162e8d8bef9SDimitry Andric if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 && 1163e8d8bef9SDimitry Andric MF.getFunction().hasOptSize()) 1164e8d8bef9SDimitry Andric return false; 1165e8d8bef9SDimitry Andric 11660b57cec5SDimitry Andric // 512 is the maximum immediate for stp/ldp that will be used for 11670b57cec5SDimitry Andric // callee-save save/restores 11680b57cec5SDimitry Andric if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) 11690b57cec5SDimitry Andric return false; 11700b57cec5SDimitry Andric 11710b57cec5SDimitry Andric if (MFI.hasVarSizedObjects()) 11720b57cec5SDimitry Andric return false; 11730b57cec5SDimitry Andric 1174fe6060f1SDimitry Andric if (RegInfo->hasStackRealignment(MF)) 11750b57cec5SDimitry Andric return false; 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric // This isn't strictly necessary, but it simplifies things a bit since the 11780b57cec5SDimitry Andric // current RedZone handling code assumes the SP is adjusted by the 11790b57cec5SDimitry Andric // callee-save save/restore code. 11800b57cec5SDimitry Andric if (canUseRedZone(MF)) 11810b57cec5SDimitry Andric return false; 11820b57cec5SDimitry Andric 11838bcb0991SDimitry Andric // When there is an SVE area on the stack, always allocate the 11848bcb0991SDimitry Andric // callee-saves and spills/locals separately. 11858bcb0991SDimitry Andric if (getSVEStackSize(MF)) 11868bcb0991SDimitry Andric return false; 11878bcb0991SDimitry Andric 11880b57cec5SDimitry Andric return true; 11890b57cec5SDimitry Andric } 11900b57cec5SDimitry Andric 11915ffd83dbSDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue( 11925ffd83dbSDimitry Andric MachineBasicBlock &MBB, unsigned StackBumpBytes) const { 11935ffd83dbSDimitry Andric if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes)) 11945ffd83dbSDimitry Andric return false; 11955ffd83dbSDimitry Andric 11965ffd83dbSDimitry Andric if (MBB.empty()) 11975ffd83dbSDimitry Andric return true; 11985ffd83dbSDimitry Andric 11995ffd83dbSDimitry Andric // Disable combined SP bump if the last instruction is an MTE tag store. It 12005ffd83dbSDimitry Andric // is almost always better to merge SP adjustment into those instructions. 12015ffd83dbSDimitry Andric MachineBasicBlock::iterator LastI = MBB.getFirstTerminator(); 12025ffd83dbSDimitry Andric MachineBasicBlock::iterator Begin = MBB.begin(); 12035ffd83dbSDimitry Andric while (LastI != Begin) { 12045ffd83dbSDimitry Andric --LastI; 12055ffd83dbSDimitry Andric if (LastI->isTransient()) 12065ffd83dbSDimitry Andric continue; 12075ffd83dbSDimitry Andric if (!LastI->getFlag(MachineInstr::FrameDestroy)) 12085ffd83dbSDimitry Andric break; 12095ffd83dbSDimitry Andric } 12105ffd83dbSDimitry Andric switch (LastI->getOpcode()) { 12115ffd83dbSDimitry Andric case AArch64::STGloop: 12125ffd83dbSDimitry Andric case AArch64::STZGloop: 121306c3fb27SDimitry Andric case AArch64::STGi: 121406c3fb27SDimitry Andric case AArch64::STZGi: 121506c3fb27SDimitry Andric case AArch64::ST2Gi: 121606c3fb27SDimitry Andric case AArch64::STZ2Gi: 12175ffd83dbSDimitry Andric return false; 12185ffd83dbSDimitry Andric default: 12195ffd83dbSDimitry Andric return true; 12205ffd83dbSDimitry Andric } 12215ffd83dbSDimitry Andric llvm_unreachable("unreachable"); 12225ffd83dbSDimitry Andric } 12235ffd83dbSDimitry Andric 12240b57cec5SDimitry Andric // Given a load or a store instruction, generate an appropriate unwinding SEH 12250b57cec5SDimitry Andric // code on Windows. 12260b57cec5SDimitry Andric static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, 12270b57cec5SDimitry Andric const TargetInstrInfo &TII, 12280b57cec5SDimitry Andric MachineInstr::MIFlag Flag) { 12290b57cec5SDimitry Andric unsigned Opc = MBBI->getOpcode(); 12300b57cec5SDimitry Andric MachineBasicBlock *MBB = MBBI->getParent(); 12310b57cec5SDimitry Andric MachineFunction &MF = *MBB->getParent(); 12320b57cec5SDimitry Andric DebugLoc DL = MBBI->getDebugLoc(); 12330b57cec5SDimitry Andric unsigned ImmIdx = MBBI->getNumOperands() - 1; 12340b57cec5SDimitry Andric int Imm = MBBI->getOperand(ImmIdx).getImm(); 12350b57cec5SDimitry Andric MachineInstrBuilder MIB; 12360b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 12370b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric switch (Opc) { 12400b57cec5SDimitry Andric default: 12410b57cec5SDimitry Andric llvm_unreachable("No SEH Opcode for this instruction"); 12420b57cec5SDimitry Andric case AArch64::LDPDpost: 12430b57cec5SDimitry Andric Imm = -Imm; 1244bdd1243dSDimitry Andric [[fallthrough]]; 12450b57cec5SDimitry Andric case AArch64::STPDpre: { 12460b57cec5SDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12470b57cec5SDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); 12480b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X)) 12490b57cec5SDimitry Andric .addImm(Reg0) 12500b57cec5SDimitry Andric .addImm(Reg1) 12510b57cec5SDimitry Andric .addImm(Imm * 8) 12520b57cec5SDimitry Andric .setMIFlag(Flag); 12530b57cec5SDimitry Andric break; 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric case AArch64::LDPXpost: 12560b57cec5SDimitry Andric Imm = -Imm; 1257bdd1243dSDimitry Andric [[fallthrough]]; 12580b57cec5SDimitry Andric case AArch64::STPXpre: { 12598bcb0991SDimitry Andric Register Reg0 = MBBI->getOperand(1).getReg(); 12608bcb0991SDimitry Andric Register Reg1 = MBBI->getOperand(2).getReg(); 12610b57cec5SDimitry Andric if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) 12620b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) 12630b57cec5SDimitry Andric .addImm(Imm * 8) 12640b57cec5SDimitry Andric .setMIFlag(Flag); 12650b57cec5SDimitry Andric else 12660b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X)) 12670b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg0)) 12680b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg1)) 12690b57cec5SDimitry Andric .addImm(Imm * 8) 12700b57cec5SDimitry Andric .setMIFlag(Flag); 12710b57cec5SDimitry Andric break; 12720b57cec5SDimitry Andric } 12730b57cec5SDimitry Andric case AArch64::LDRDpost: 12740b57cec5SDimitry Andric Imm = -Imm; 1275bdd1243dSDimitry Andric [[fallthrough]]; 12760b57cec5SDimitry Andric case AArch64::STRDpre: { 12770b57cec5SDimitry Andric unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12780b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X)) 12790b57cec5SDimitry Andric .addImm(Reg) 12800b57cec5SDimitry Andric .addImm(Imm) 12810b57cec5SDimitry Andric .setMIFlag(Flag); 12820b57cec5SDimitry Andric break; 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric case AArch64::LDRXpost: 12850b57cec5SDimitry Andric Imm = -Imm; 1286bdd1243dSDimitry Andric [[fallthrough]]; 12870b57cec5SDimitry Andric case AArch64::STRXpre: { 12880b57cec5SDimitry Andric unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12890b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X)) 12900b57cec5SDimitry Andric .addImm(Reg) 12910b57cec5SDimitry Andric .addImm(Imm) 12920b57cec5SDimitry Andric .setMIFlag(Flag); 12930b57cec5SDimitry Andric break; 12940b57cec5SDimitry Andric } 12950b57cec5SDimitry Andric case AArch64::STPDi: 12960b57cec5SDimitry Andric case AArch64::LDPDi: { 12970b57cec5SDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 12980b57cec5SDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 12990b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP)) 13000b57cec5SDimitry Andric .addImm(Reg0) 13010b57cec5SDimitry Andric .addImm(Reg1) 13020b57cec5SDimitry Andric .addImm(Imm * 8) 13030b57cec5SDimitry Andric .setMIFlag(Flag); 13040b57cec5SDimitry Andric break; 13050b57cec5SDimitry Andric } 13060b57cec5SDimitry Andric case AArch64::STPXi: 13070b57cec5SDimitry Andric case AArch64::LDPXi: { 13088bcb0991SDimitry Andric Register Reg0 = MBBI->getOperand(0).getReg(); 13098bcb0991SDimitry Andric Register Reg1 = MBBI->getOperand(1).getReg(); 13100b57cec5SDimitry Andric if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) 13110b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) 13120b57cec5SDimitry Andric .addImm(Imm * 8) 13130b57cec5SDimitry Andric .setMIFlag(Flag); 13140b57cec5SDimitry Andric else 13150b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP)) 13160b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg0)) 13170b57cec5SDimitry Andric .addImm(RegInfo->getSEHRegNum(Reg1)) 13180b57cec5SDimitry Andric .addImm(Imm * 8) 13190b57cec5SDimitry Andric .setMIFlag(Flag); 13200b57cec5SDimitry Andric break; 13210b57cec5SDimitry Andric } 13220b57cec5SDimitry Andric case AArch64::STRXui: 13230b57cec5SDimitry Andric case AArch64::LDRXui: { 13240b57cec5SDimitry Andric int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 13250b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg)) 13260b57cec5SDimitry Andric .addImm(Reg) 13270b57cec5SDimitry Andric .addImm(Imm * 8) 13280b57cec5SDimitry Andric .setMIFlag(Flag); 13290b57cec5SDimitry Andric break; 13300b57cec5SDimitry Andric } 13310b57cec5SDimitry Andric case AArch64::STRDui: 13320b57cec5SDimitry Andric case AArch64::LDRDui: { 13330b57cec5SDimitry Andric unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 13340b57cec5SDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg)) 13350b57cec5SDimitry Andric .addImm(Reg) 13360b57cec5SDimitry Andric .addImm(Imm * 8) 13370b57cec5SDimitry Andric .setMIFlag(Flag); 13380b57cec5SDimitry Andric break; 13390b57cec5SDimitry Andric } 13407a6dacacSDimitry Andric case AArch64::STPQi: 13417a6dacacSDimitry Andric case AArch64::LDPQi: { 13427a6dacacSDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg()); 13437a6dacacSDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 13447a6dacacSDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP)) 13457a6dacacSDimitry Andric .addImm(Reg0) 13467a6dacacSDimitry Andric .addImm(Reg1) 13477a6dacacSDimitry Andric .addImm(Imm * 16) 13487a6dacacSDimitry Andric .setMIFlag(Flag); 13497a6dacacSDimitry Andric break; 13507a6dacacSDimitry Andric } 13517a6dacacSDimitry Andric case AArch64::LDPQpost: 13527a6dacacSDimitry Andric Imm = -Imm; 13530fca6ea1SDimitry Andric [[fallthrough]]; 13547a6dacacSDimitry Andric case AArch64::STPQpre: { 13557a6dacacSDimitry Andric unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg()); 13567a6dacacSDimitry Andric unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg()); 13577a6dacacSDimitry Andric MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX)) 13587a6dacacSDimitry Andric .addImm(Reg0) 13597a6dacacSDimitry Andric .addImm(Reg1) 13607a6dacacSDimitry Andric .addImm(Imm * 16) 13617a6dacacSDimitry Andric .setMIFlag(Flag); 13627a6dacacSDimitry Andric break; 13637a6dacacSDimitry Andric } 13640b57cec5SDimitry Andric } 13650b57cec5SDimitry Andric auto I = MBB->insertAfter(MBBI, MIB); 13660b57cec5SDimitry Andric return I; 13670b57cec5SDimitry Andric } 13680b57cec5SDimitry Andric 13690b57cec5SDimitry Andric // Fix up the SEH opcode associated with the save/restore instruction. 13700b57cec5SDimitry Andric static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, 13710b57cec5SDimitry Andric unsigned LocalStackSize) { 13720b57cec5SDimitry Andric MachineOperand *ImmOpnd = nullptr; 13730b57cec5SDimitry Andric unsigned ImmIdx = MBBI->getNumOperands() - 1; 13740b57cec5SDimitry Andric switch (MBBI->getOpcode()) { 13750b57cec5SDimitry Andric default: 13760b57cec5SDimitry Andric llvm_unreachable("Fix the offset in the SEH instruction"); 13770b57cec5SDimitry Andric case AArch64::SEH_SaveFPLR: 13780b57cec5SDimitry Andric case AArch64::SEH_SaveRegP: 13790b57cec5SDimitry Andric case AArch64::SEH_SaveReg: 13800b57cec5SDimitry Andric case AArch64::SEH_SaveFRegP: 13810b57cec5SDimitry Andric case AArch64::SEH_SaveFReg: 13827a6dacacSDimitry Andric case AArch64::SEH_SaveAnyRegQP: 13837a6dacacSDimitry Andric case AArch64::SEH_SaveAnyRegQPX: 13840b57cec5SDimitry Andric ImmOpnd = &MBBI->getOperand(ImmIdx); 13850b57cec5SDimitry Andric break; 13860b57cec5SDimitry Andric } 13870b57cec5SDimitry Andric if (ImmOpnd) 13880b57cec5SDimitry Andric ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric 13910fca6ea1SDimitry Andric bool requiresGetVGCall(MachineFunction &MF) { 13920fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 13930fca6ea1SDimitry Andric return AFI->hasStreamingModeChanges() && 13940fca6ea1SDimitry Andric !MF.getSubtarget<AArch64Subtarget>().hasSVE(); 13950fca6ea1SDimitry Andric } 13960fca6ea1SDimitry Andric 1397*71ac745dSDimitry Andric static bool requiresSaveVG(MachineFunction &MF) { 1398*71ac745dSDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1399*71ac745dSDimitry Andric // For Darwin platforms we don't save VG for non-SVE functions, even if SME 1400*71ac745dSDimitry Andric // is enabled with streaming mode changes. 1401*71ac745dSDimitry Andric if (!AFI->hasStreamingModeChanges()) 1402*71ac745dSDimitry Andric return false; 1403*71ac745dSDimitry Andric auto &ST = MF.getSubtarget<AArch64Subtarget>(); 1404*71ac745dSDimitry Andric if (ST.isTargetDarwin()) 1405*71ac745dSDimitry Andric return ST.hasSVE(); 1406*71ac745dSDimitry Andric return true; 1407*71ac745dSDimitry Andric } 1408*71ac745dSDimitry Andric 14090fca6ea1SDimitry Andric bool isVGInstruction(MachineBasicBlock::iterator MBBI) { 14100fca6ea1SDimitry Andric unsigned Opc = MBBI->getOpcode(); 14110fca6ea1SDimitry Andric if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || 14120fca6ea1SDimitry Andric Opc == AArch64::UBFMXri) 14130fca6ea1SDimitry Andric return true; 14140fca6ea1SDimitry Andric 14150fca6ea1SDimitry Andric if (requiresGetVGCall(*MBBI->getMF())) { 14160fca6ea1SDimitry Andric if (Opc == AArch64::ORRXrr) 14170fca6ea1SDimitry Andric return true; 14180fca6ea1SDimitry Andric 14190fca6ea1SDimitry Andric if (Opc == AArch64::BL) { 14200fca6ea1SDimitry Andric auto Op1 = MBBI->getOperand(0); 14210fca6ea1SDimitry Andric return Op1.isSymbol() && 14220fca6ea1SDimitry Andric (StringRef(Op1.getSymbolName()) == "__arm_get_current_vg"); 14230fca6ea1SDimitry Andric } 14240fca6ea1SDimitry Andric } 14250fca6ea1SDimitry Andric 14260fca6ea1SDimitry Andric return false; 14270fca6ea1SDimitry Andric } 14280fca6ea1SDimitry Andric 14290b57cec5SDimitry Andric // Convert callee-save register save/restore instruction to do stack pointer 14300b57cec5SDimitry Andric // decrement/increment to allocate/deallocate the callee-save stack area by 14310b57cec5SDimitry Andric // converting store/load to use pre/post increment version. 14320b57cec5SDimitry Andric static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( 14330b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 14340b57cec5SDimitry Andric const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, 143581ad6265SDimitry Andric bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, 143681ad6265SDimitry Andric MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup, 143781ad6265SDimitry Andric int CFAOffset = 0) { 14380b57cec5SDimitry Andric unsigned NewOpc; 14390fca6ea1SDimitry Andric 14400fca6ea1SDimitry Andric // If the function contains streaming mode changes, we expect instructions 14410fca6ea1SDimitry Andric // to calculate the value of VG before spilling. For locally-streaming 14420fca6ea1SDimitry Andric // functions, we need to do this for both the streaming and non-streaming 14430fca6ea1SDimitry Andric // vector length. Move past these instructions if necessary. 14440fca6ea1SDimitry Andric MachineFunction &MF = *MBB.getParent(); 1445*71ac745dSDimitry Andric if (requiresSaveVG(MF)) 14460fca6ea1SDimitry Andric while (isVGInstruction(MBBI)) 14470fca6ea1SDimitry Andric ++MBBI; 14480fca6ea1SDimitry Andric 14490b57cec5SDimitry Andric switch (MBBI->getOpcode()) { 14500b57cec5SDimitry Andric default: 14510b57cec5SDimitry Andric llvm_unreachable("Unexpected callee-save save/restore opcode!"); 14520b57cec5SDimitry Andric case AArch64::STPXi: 14530b57cec5SDimitry Andric NewOpc = AArch64::STPXpre; 14540b57cec5SDimitry Andric break; 14550b57cec5SDimitry Andric case AArch64::STPDi: 14560b57cec5SDimitry Andric NewOpc = AArch64::STPDpre; 14570b57cec5SDimitry Andric break; 14580b57cec5SDimitry Andric case AArch64::STPQi: 14590b57cec5SDimitry Andric NewOpc = AArch64::STPQpre; 14600b57cec5SDimitry Andric break; 14610b57cec5SDimitry Andric case AArch64::STRXui: 14620b57cec5SDimitry Andric NewOpc = AArch64::STRXpre; 14630b57cec5SDimitry Andric break; 14640b57cec5SDimitry Andric case AArch64::STRDui: 14650b57cec5SDimitry Andric NewOpc = AArch64::STRDpre; 14660b57cec5SDimitry Andric break; 14670b57cec5SDimitry Andric case AArch64::STRQui: 14680b57cec5SDimitry Andric NewOpc = AArch64::STRQpre; 14690b57cec5SDimitry Andric break; 14700b57cec5SDimitry Andric case AArch64::LDPXi: 14710b57cec5SDimitry Andric NewOpc = AArch64::LDPXpost; 14720b57cec5SDimitry Andric break; 14730b57cec5SDimitry Andric case AArch64::LDPDi: 14740b57cec5SDimitry Andric NewOpc = AArch64::LDPDpost; 14750b57cec5SDimitry Andric break; 14760b57cec5SDimitry Andric case AArch64::LDPQi: 14770b57cec5SDimitry Andric NewOpc = AArch64::LDPQpost; 14780b57cec5SDimitry Andric break; 14790b57cec5SDimitry Andric case AArch64::LDRXui: 14800b57cec5SDimitry Andric NewOpc = AArch64::LDRXpost; 14810b57cec5SDimitry Andric break; 14820b57cec5SDimitry Andric case AArch64::LDRDui: 14830b57cec5SDimitry Andric NewOpc = AArch64::LDRDpost; 14840b57cec5SDimitry Andric break; 14850b57cec5SDimitry Andric case AArch64::LDRQui: 14860b57cec5SDimitry Andric NewOpc = AArch64::LDRQpost; 14870b57cec5SDimitry Andric break; 14880b57cec5SDimitry Andric } 14890b57cec5SDimitry Andric // Get rid of the SEH code associated with the old instruction. 14900b57cec5SDimitry Andric if (NeedsWinCFI) { 14910b57cec5SDimitry Andric auto SEH = std::next(MBBI); 14920b57cec5SDimitry Andric if (AArch64InstrInfo::isSEHInstruction(*SEH)) 14930b57cec5SDimitry Andric SEH->eraseFromParent(); 14940b57cec5SDimitry Andric } 14950b57cec5SDimitry Andric 14965f757f3fSDimitry Andric TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0); 1497fe6060f1SDimitry Andric int64_t MinOffset, MaxOffset; 1498fe6060f1SDimitry Andric bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo( 1499fe6060f1SDimitry Andric NewOpc, Scale, Width, MinOffset, MaxOffset); 1500fe6060f1SDimitry Andric (void)Success; 1501fe6060f1SDimitry Andric assert(Success && "unknown load/store opcode"); 1502fe6060f1SDimitry Andric 1503fe6060f1SDimitry Andric // If the first store isn't right where we want SP then we can't fold the 1504fe6060f1SDimitry Andric // update in so create a normal arithmetic instruction instead. 1505fe6060f1SDimitry Andric if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 || 1506fe6060f1SDimitry Andric CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) { 15070fca6ea1SDimitry Andric // If we are destroying the frame, make sure we add the increment after the 15080fca6ea1SDimitry Andric // last frame operation. 15090fca6ea1SDimitry Andric if (FrameFlag == MachineInstr::FrameDestroy) 15100fca6ea1SDimitry Andric ++MBBI; 1511fe6060f1SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, 151281ad6265SDimitry Andric StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag, 151381ad6265SDimitry Andric false, false, nullptr, EmitCFI, 151481ad6265SDimitry Andric StackOffset::getFixed(CFAOffset)); 151581ad6265SDimitry Andric 1516fe6060f1SDimitry Andric return std::prev(MBBI); 1517fe6060f1SDimitry Andric } 1518fe6060f1SDimitry Andric 15190b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 15200b57cec5SDimitry Andric MIB.addReg(AArch64::SP, RegState::Define); 15210b57cec5SDimitry Andric 15220b57cec5SDimitry Andric // Copy all operands other than the immediate offset. 15230b57cec5SDimitry Andric unsigned OpndIdx = 0; 15240b57cec5SDimitry Andric for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; 15250b57cec5SDimitry Andric ++OpndIdx) 15260b57cec5SDimitry Andric MIB.add(MBBI->getOperand(OpndIdx)); 15270b57cec5SDimitry Andric 15280b57cec5SDimitry Andric assert(MBBI->getOperand(OpndIdx).getImm() == 0 && 15290b57cec5SDimitry Andric "Unexpected immediate offset in first/last callee-save save/restore " 15300b57cec5SDimitry Andric "instruction!"); 15310b57cec5SDimitry Andric assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && 15320b57cec5SDimitry Andric "Unexpected base register in callee-save save/restore instruction!"); 15330b57cec5SDimitry Andric assert(CSStackSizeInc % Scale == 0); 1534fe6060f1SDimitry Andric MIB.addImm(CSStackSizeInc / (int)Scale); 15350b57cec5SDimitry Andric 15360b57cec5SDimitry Andric MIB.setMIFlags(MBBI->getFlags()); 15370b57cec5SDimitry Andric MIB.setMemRefs(MBBI->memoperands()); 15380b57cec5SDimitry Andric 15390b57cec5SDimitry Andric // Generate a new SEH code that corresponds to the new instruction. 15400b57cec5SDimitry Andric if (NeedsWinCFI) { 15410b57cec5SDimitry Andric *HasWinCFI = true; 154281ad6265SDimitry Andric InsertSEH(*MIB, *TII, FrameFlag); 154381ad6265SDimitry Andric } 154481ad6265SDimitry Andric 154581ad6265SDimitry Andric if (EmitCFI) { 154681ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 154781ad6265SDimitry Andric MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc)); 154881ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 154981ad6265SDimitry Andric .addCFIIndex(CFIIndex) 155081ad6265SDimitry Andric .setMIFlags(FrameFlag); 15510b57cec5SDimitry Andric } 15520b57cec5SDimitry Andric 15530b57cec5SDimitry Andric return std::prev(MBB.erase(MBBI)); 15540b57cec5SDimitry Andric } 15550b57cec5SDimitry Andric 15560b57cec5SDimitry Andric // Fixup callee-save register save/restore instructions to take into account 15570b57cec5SDimitry Andric // combined SP bump by adding the local stack size to the stack offsets. 15580b57cec5SDimitry Andric static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, 1559480093f4SDimitry Andric uint64_t LocalStackSize, 15600b57cec5SDimitry Andric bool NeedsWinCFI, 15610b57cec5SDimitry Andric bool *HasWinCFI) { 15620b57cec5SDimitry Andric if (AArch64InstrInfo::isSEHInstruction(MI)) 15630b57cec5SDimitry Andric return; 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 15660b57cec5SDimitry Andric unsigned Scale; 15670b57cec5SDimitry Andric switch (Opc) { 15680b57cec5SDimitry Andric case AArch64::STPXi: 15690b57cec5SDimitry Andric case AArch64::STRXui: 15700b57cec5SDimitry Andric case AArch64::STPDi: 15710b57cec5SDimitry Andric case AArch64::STRDui: 15720b57cec5SDimitry Andric case AArch64::LDPXi: 15730b57cec5SDimitry Andric case AArch64::LDRXui: 15740b57cec5SDimitry Andric case AArch64::LDPDi: 15750b57cec5SDimitry Andric case AArch64::LDRDui: 15760b57cec5SDimitry Andric Scale = 8; 15770b57cec5SDimitry Andric break; 15780b57cec5SDimitry Andric case AArch64::STPQi: 15790b57cec5SDimitry Andric case AArch64::STRQui: 15800b57cec5SDimitry Andric case AArch64::LDPQi: 15810b57cec5SDimitry Andric case AArch64::LDRQui: 15820b57cec5SDimitry Andric Scale = 16; 15830b57cec5SDimitry Andric break; 15840b57cec5SDimitry Andric default: 15850b57cec5SDimitry Andric llvm_unreachable("Unexpected callee-save save/restore opcode!"); 15860b57cec5SDimitry Andric } 15870b57cec5SDimitry Andric 15880b57cec5SDimitry Andric unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; 15890b57cec5SDimitry Andric assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && 15900b57cec5SDimitry Andric "Unexpected base register in callee-save save/restore instruction!"); 15910b57cec5SDimitry Andric // Last operand is immediate offset that needs fixing. 15920b57cec5SDimitry Andric MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); 15930b57cec5SDimitry Andric // All generated opcodes have scaled offsets. 15940b57cec5SDimitry Andric assert(LocalStackSize % Scale == 0); 15950b57cec5SDimitry Andric OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale); 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric if (NeedsWinCFI) { 15980b57cec5SDimitry Andric *HasWinCFI = true; 15990b57cec5SDimitry Andric auto MBBI = std::next(MachineBasicBlock::iterator(MI)); 16000b57cec5SDimitry Andric assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction"); 16010b57cec5SDimitry Andric assert(AArch64InstrInfo::isSEHInstruction(*MBBI) && 16020b57cec5SDimitry Andric "Expecting a SEH instruction"); 16030b57cec5SDimitry Andric fixupSEHOpcode(MBBI, LocalStackSize); 16040b57cec5SDimitry Andric } 16050b57cec5SDimitry Andric } 16060b57cec5SDimitry Andric 1607480093f4SDimitry Andric static bool isTargetWindows(const MachineFunction &MF) { 1608480093f4SDimitry Andric return MF.getSubtarget<AArch64Subtarget>().isTargetWindows(); 1609480093f4SDimitry Andric } 1610480093f4SDimitry Andric 1611480093f4SDimitry Andric // Convenience function to determine whether I is an SVE callee save. 1612480093f4SDimitry Andric static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { 1613480093f4SDimitry Andric switch (I->getOpcode()) { 1614480093f4SDimitry Andric default: 1615480093f4SDimitry Andric return false; 16160fca6ea1SDimitry Andric case AArch64::PTRUE_C_B: 16170fca6ea1SDimitry Andric case AArch64::LD1B_2Z_IMM: 16180fca6ea1SDimitry Andric case AArch64::ST1B_2Z_IMM: 1619480093f4SDimitry Andric case AArch64::STR_ZXI: 1620480093f4SDimitry Andric case AArch64::STR_PXI: 1621480093f4SDimitry Andric case AArch64::LDR_ZXI: 1622480093f4SDimitry Andric case AArch64::LDR_PXI: 1623480093f4SDimitry Andric return I->getFlag(MachineInstr::FrameSetup) || 1624480093f4SDimitry Andric I->getFlag(MachineInstr::FrameDestroy); 1625480093f4SDimitry Andric } 1626480093f4SDimitry Andric } 1627480093f4SDimitry Andric 162881ad6265SDimitry Andric static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, 162981ad6265SDimitry Andric MachineFunction &MF, 163081ad6265SDimitry Andric MachineBasicBlock &MBB, 163181ad6265SDimitry Andric MachineBasicBlock::iterator MBBI, 163281ad6265SDimitry Andric const DebugLoc &DL, bool NeedsWinCFI, 163381ad6265SDimitry Andric bool NeedsUnwindInfo) { 163481ad6265SDimitry Andric // Shadow call stack prolog: str x30, [x18], #8 163581ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost)) 163681ad6265SDimitry Andric .addReg(AArch64::X18, RegState::Define) 163781ad6265SDimitry Andric .addReg(AArch64::LR) 163881ad6265SDimitry Andric .addReg(AArch64::X18) 163981ad6265SDimitry Andric .addImm(8) 164081ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 164181ad6265SDimitry Andric 164281ad6265SDimitry Andric // This instruction also makes x18 live-in to the entry block. 164381ad6265SDimitry Andric MBB.addLiveIn(AArch64::X18); 164481ad6265SDimitry Andric 164581ad6265SDimitry Andric if (NeedsWinCFI) 164681ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop)) 164781ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 164881ad6265SDimitry Andric 164981ad6265SDimitry Andric if (NeedsUnwindInfo) { 165081ad6265SDimitry Andric // Emit a CFI instruction that causes 8 to be subtracted from the value of 165181ad6265SDimitry Andric // x18 when unwinding past this frame. 165281ad6265SDimitry Andric static const char CFIInst[] = { 165381ad6265SDimitry Andric dwarf::DW_CFA_val_expression, 165481ad6265SDimitry Andric 18, // register 165581ad6265SDimitry Andric 2, // length 165681ad6265SDimitry Andric static_cast<char>(unsigned(dwarf::DW_OP_breg18)), 165781ad6265SDimitry Andric static_cast<char>(-8) & 0x7f, // addend (sleb128) 165881ad6265SDimitry Andric }; 165981ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape( 166081ad6265SDimitry Andric nullptr, StringRef(CFIInst, sizeof(CFIInst)))); 166181ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION)) 166281ad6265SDimitry Andric .addCFIIndex(CFIIndex) 166381ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 166481ad6265SDimitry Andric } 166581ad6265SDimitry Andric } 166681ad6265SDimitry Andric 166781ad6265SDimitry Andric static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, 166881ad6265SDimitry Andric MachineFunction &MF, 166981ad6265SDimitry Andric MachineBasicBlock &MBB, 167081ad6265SDimitry Andric MachineBasicBlock::iterator MBBI, 167181ad6265SDimitry Andric const DebugLoc &DL) { 167281ad6265SDimitry Andric // Shadow call stack epilog: ldr x30, [x18, #-8]! 167381ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre)) 167481ad6265SDimitry Andric .addReg(AArch64::X18, RegState::Define) 167581ad6265SDimitry Andric .addReg(AArch64::LR, RegState::Define) 167681ad6265SDimitry Andric .addReg(AArch64::X18) 167781ad6265SDimitry Andric .addImm(-8) 167881ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 167981ad6265SDimitry Andric 1680bdd1243dSDimitry Andric if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF)) { 168181ad6265SDimitry Andric unsigned CFIIndex = 168281ad6265SDimitry Andric MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18)); 168381ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 168481ad6265SDimitry Andric .addCFIIndex(CFIIndex) 168581ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 168681ad6265SDimitry Andric } 168781ad6265SDimitry Andric } 168881ad6265SDimitry Andric 168906c3fb27SDimitry Andric // Define the current CFA rule to use the provided FP. 169006c3fb27SDimitry Andric static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, 169106c3fb27SDimitry Andric MachineBasicBlock::iterator MBBI, 169206c3fb27SDimitry Andric const DebugLoc &DL, unsigned FixedObject) { 169306c3fb27SDimitry Andric const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 169406c3fb27SDimitry Andric const AArch64RegisterInfo *TRI = STI.getRegisterInfo(); 169506c3fb27SDimitry Andric const TargetInstrInfo *TII = STI.getInstrInfo(); 169606c3fb27SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 169706c3fb27SDimitry Andric 169806c3fb27SDimitry Andric const int OffsetToFirstCalleeSaveFromFP = 169906c3fb27SDimitry Andric AFI->getCalleeSaveBaseToFrameRecordOffset() - 170006c3fb27SDimitry Andric AFI->getCalleeSavedStackSize(); 170106c3fb27SDimitry Andric Register FramePtr = TRI->getFrameRegister(MF); 170206c3fb27SDimitry Andric unsigned Reg = TRI->getDwarfRegNum(FramePtr, true); 170306c3fb27SDimitry Andric unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( 170406c3fb27SDimitry Andric nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP)); 170506c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 170606c3fb27SDimitry Andric .addCFIIndex(CFIIndex) 170706c3fb27SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 170806c3fb27SDimitry Andric } 170906c3fb27SDimitry Andric 17105f757f3fSDimitry Andric #ifndef NDEBUG 17115f757f3fSDimitry Andric /// Collect live registers from the end of \p MI's parent up to (including) \p 17125f757f3fSDimitry Andric /// MI in \p LiveRegs. 17135f757f3fSDimitry Andric static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, 17145f757f3fSDimitry Andric LivePhysRegs &LiveRegs) { 17155f757f3fSDimitry Andric 17165f757f3fSDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 17175f757f3fSDimitry Andric LiveRegs.addLiveOuts(MBB); 17185f757f3fSDimitry Andric for (const MachineInstr &MI : 17195f757f3fSDimitry Andric reverse(make_range(MI.getIterator(), MBB.instr_end()))) 17205f757f3fSDimitry Andric LiveRegs.stepBackward(MI); 17215f757f3fSDimitry Andric } 17225f757f3fSDimitry Andric #endif 17235f757f3fSDimitry Andric 17240b57cec5SDimitry Andric void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 17250b57cec5SDimitry Andric MachineBasicBlock &MBB) const { 17260b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.begin(); 17270b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 17280b57cec5SDimitry Andric const Function &F = MF.getFunction(); 17290b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 17300b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 17310b57cec5SDimitry Andric const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 17325f757f3fSDimitry Andric 17330b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1734bdd1243dSDimitry Andric bool EmitCFI = AFI->needsDwarfUnwindInfo(MF); 173506c3fb27SDimitry Andric bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF); 17360b57cec5SDimitry Andric bool HasFP = hasFP(MF); 17370b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 17380b57cec5SDimitry Andric bool HasWinCFI = false; 17390b57cec5SDimitry Andric auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); }); 17400b57cec5SDimitry Andric 17415f757f3fSDimitry Andric MachineBasicBlock::iterator End = MBB.end(); 17425f757f3fSDimitry Andric #ifndef NDEBUG 17435f757f3fSDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 17445f757f3fSDimitry Andric // Collect live register from the end of MBB up to the start of the existing 17455f757f3fSDimitry Andric // frame setup instructions. 17465f757f3fSDimitry Andric MachineBasicBlock::iterator NonFrameStart = MBB.begin(); 17475f757f3fSDimitry Andric while (NonFrameStart != End && 17485f757f3fSDimitry Andric NonFrameStart->getFlag(MachineInstr::FrameSetup)) 17495f757f3fSDimitry Andric ++NonFrameStart; 17505f757f3fSDimitry Andric 17515f757f3fSDimitry Andric LivePhysRegs LiveRegs(*TRI); 17525f757f3fSDimitry Andric if (NonFrameStart != MBB.end()) { 17535f757f3fSDimitry Andric getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs); 17545f757f3fSDimitry Andric // Ignore registers used for stack management for now. 17555f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::SP); 17565f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::X19); 17575f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::FP); 17585f757f3fSDimitry Andric LiveRegs.removeReg(AArch64::LR); 17590fca6ea1SDimitry Andric 17600fca6ea1SDimitry Andric // X0 will be clobbered by a call to __arm_get_current_vg in the prologue. 17610fca6ea1SDimitry Andric // This is necessary to spill VG if required where SVE is unavailable, but 17620fca6ea1SDimitry Andric // X0 is preserved around this call. 17630fca6ea1SDimitry Andric if (requiresGetVGCall(MF)) 17640fca6ea1SDimitry Andric LiveRegs.removeReg(AArch64::X0); 17655f757f3fSDimitry Andric } 17665f757f3fSDimitry Andric 17675f757f3fSDimitry Andric auto VerifyClobberOnExit = make_scope_exit([&]() { 17685f757f3fSDimitry Andric if (NonFrameStart == MBB.end()) 17695f757f3fSDimitry Andric return; 17705f757f3fSDimitry Andric // Check if any of the newly instructions clobber any of the live registers. 17715f757f3fSDimitry Andric for (MachineInstr &MI : 17725f757f3fSDimitry Andric make_range(MBB.instr_begin(), NonFrameStart->getIterator())) { 17735f757f3fSDimitry Andric for (auto &Op : MI.operands()) 17745f757f3fSDimitry Andric if (Op.isReg() && Op.isDef()) 17755f757f3fSDimitry Andric assert(!LiveRegs.contains(Op.getReg()) && 17765f757f3fSDimitry Andric "live register clobbered by inserted prologue instructions"); 17775f757f3fSDimitry Andric } 17785f757f3fSDimitry Andric }); 17795f757f3fSDimitry Andric #endif 17805f757f3fSDimitry Andric 17810b57cec5SDimitry Andric bool IsFunclet = MBB.isEHFuncletEntry(); 17820b57cec5SDimitry Andric 17830b57cec5SDimitry Andric // At this point, we're going to decide whether or not the function uses a 17840b57cec5SDimitry Andric // redzone. In most cases, the function doesn't have a redzone so let's 17850b57cec5SDimitry Andric // assume that's false and set it to true in the case that there's a redzone. 17860b57cec5SDimitry Andric AFI->setHasRedZone(false); 17870b57cec5SDimitry Andric 17880b57cec5SDimitry Andric // Debug location must be unknown since the first debug location is used 17890b57cec5SDimitry Andric // to determine the end of the prologue. 17900b57cec5SDimitry Andric DebugLoc DL; 17910b57cec5SDimitry Andric 1792e8d8bef9SDimitry Andric const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>(); 17935f757f3fSDimitry Andric if (MFnI.needsShadowCallStackPrologueEpilogue(MF)) 179481ad6265SDimitry Andric emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI, 1795bdd1243dSDimitry Andric MFnI.needsDwarfUnwindInfo(MF)); 1796fe6060f1SDimitry Andric 1797bdd1243dSDimitry Andric if (MFnI.shouldSignReturnAddress(MF)) { 17985f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE)) 17990b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 18005f757f3fSDimitry Andric if (NeedsWinCFI) 18015f757f3fSDimitry Andric HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR 18020b57cec5SDimitry Andric } 18030b57cec5SDimitry Andric 180481ad6265SDimitry Andric if (EmitCFI && MFnI.isMTETagged()) { 180581ad6265SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED)) 180681ad6265SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 180781ad6265SDimitry Andric } 18080b57cec5SDimitry Andric 1809fe6060f1SDimitry Andric // We signal the presence of a Swift extended frame to external tools by 1810fe6060f1SDimitry Andric // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple 1811fe6060f1SDimitry Andric // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI 1812fe6060f1SDimitry Andric // bits so that is still true. 1813fe6060f1SDimitry Andric if (HasFP && AFI->hasSwiftAsyncContext()) { 1814349cc55cSDimitry Andric switch (MF.getTarget().Options.SwiftAsyncFramePointer) { 1815349cc55cSDimitry Andric case SwiftAsyncFramePointerMode::DeploymentBased: 1816349cc55cSDimitry Andric if (Subtarget.swiftAsyncContextIsDynamicallySet()) { 1817349cc55cSDimitry Andric // The special symbol below is absolute and has a *value* that can be 1818349cc55cSDimitry Andric // combined with the frame pointer to signal an extended frame. 1819349cc55cSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16) 1820349cc55cSDimitry Andric .addExternalSymbol("swift_async_extendedFramePointerFlags", 1821349cc55cSDimitry Andric AArch64II::MO_GOT); 18225f757f3fSDimitry Andric if (NeedsWinCFI) { 18235f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 18245f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 18255f757f3fSDimitry Andric HasWinCFI = true; 18265f757f3fSDimitry Andric } 1827349cc55cSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP) 1828349cc55cSDimitry Andric .addUse(AArch64::FP) 1829349cc55cSDimitry Andric .addUse(AArch64::X16) 1830349cc55cSDimitry Andric .addImm(Subtarget.isTargetILP32() ? 32 : 0); 18315f757f3fSDimitry Andric if (NeedsWinCFI) { 18325f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 18335f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 18345f757f3fSDimitry Andric HasWinCFI = true; 18355f757f3fSDimitry Andric } 1836349cc55cSDimitry Andric break; 1837349cc55cSDimitry Andric } 1838bdd1243dSDimitry Andric [[fallthrough]]; 1839349cc55cSDimitry Andric 1840349cc55cSDimitry Andric case SwiftAsyncFramePointerMode::Always: 1841fe6060f1SDimitry Andric // ORR x29, x29, #0x1000_0000_0000_0000 1842fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP) 1843fe6060f1SDimitry Andric .addUse(AArch64::FP) 1844fe6060f1SDimitry Andric .addImm(0x1100) 1845fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 18465f757f3fSDimitry Andric if (NeedsWinCFI) { 18475f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 18485f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 18495f757f3fSDimitry Andric HasWinCFI = true; 18505f757f3fSDimitry Andric } 1851349cc55cSDimitry Andric break; 1852349cc55cSDimitry Andric 1853349cc55cSDimitry Andric case SwiftAsyncFramePointerMode::Never: 1854349cc55cSDimitry Andric break; 1855349cc55cSDimitry Andric } 1856fe6060f1SDimitry Andric } 1857fe6060f1SDimitry Andric 18580b57cec5SDimitry Andric // All calls are tail calls in GHC calling conv, and functions have no 18590b57cec5SDimitry Andric // prologue/epilogue. 18600b57cec5SDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::GHC) 18610b57cec5SDimitry Andric return; 18620b57cec5SDimitry Andric 1863e8d8bef9SDimitry Andric // Set tagged base pointer to the requested stack slot. 18640b57cec5SDimitry Andric // Ideally it should match SP value after prologue. 1865bdd1243dSDimitry Andric std::optional<int> TBPI = AFI->getTaggedBasePointerIndex(); 1866e8d8bef9SDimitry Andric if (TBPI) 1867e8d8bef9SDimitry Andric AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI)); 1868e8d8bef9SDimitry Andric else 18690b57cec5SDimitry Andric AFI->setTaggedBasePointerOffset(MFI.getStackSize()); 18700b57cec5SDimitry Andric 18718bcb0991SDimitry Andric const StackOffset &SVEStackSize = getSVEStackSize(MF); 18728bcb0991SDimitry Andric 18730b57cec5SDimitry Andric // getStackSize() includes all the locals in its size calculation. We don't 18740b57cec5SDimitry Andric // include these locals when computing the stack size of a funclet, as they 18750b57cec5SDimitry Andric // are allocated in the parent's stack frame and accessed via the frame 18760b57cec5SDimitry Andric // pointer from the funclet. We only save the callee saved registers in the 18770b57cec5SDimitry Andric // funclet, which are really the callee saved registers of the parent 18780b57cec5SDimitry Andric // function, including the funclet. 18790fca6ea1SDimitry Andric int64_t NumBytes = 18800fca6ea1SDimitry Andric IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); 18810b57cec5SDimitry Andric if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { 18820b57cec5SDimitry Andric assert(!HasFP && "unexpected function without stack frame but with FP"); 18838bcb0991SDimitry Andric assert(!SVEStackSize && 18848bcb0991SDimitry Andric "unexpected function without stack frame but with SVE objects"); 18850b57cec5SDimitry Andric // All of the stack allocation is for locals. 18860b57cec5SDimitry Andric AFI->setLocalStackSize(NumBytes); 18870b57cec5SDimitry Andric if (!NumBytes) 18880b57cec5SDimitry Andric return; 18890b57cec5SDimitry Andric // REDZONE: If the stack size is less than 128 bytes, we don't need 18900b57cec5SDimitry Andric // to actually allocate. 18910b57cec5SDimitry Andric if (canUseRedZone(MF)) { 18920b57cec5SDimitry Andric AFI->setHasRedZone(true); 18930b57cec5SDimitry Andric ++NumRedZoneFunctions; 18940b57cec5SDimitry Andric } else { 18958bcb0991SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, 1896e8d8bef9SDimitry Andric StackOffset::getFixed(-NumBytes), TII, 1897e8d8bef9SDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 189881ad6265SDimitry Andric if (EmitCFI) { 18990b57cec5SDimitry Andric // Label used to tie together the PROLOG_LABEL and the MachineMoves. 19000fca6ea1SDimitry Andric MCSymbol *FrameLabel = MF.getContext().createTempSymbol(); 19010b57cec5SDimitry Andric // Encode the stack size of the leaf function. 19020b57cec5SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 19035ffd83dbSDimitry Andric MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes)); 19040b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 19050b57cec5SDimitry Andric .addCFIIndex(CFIIndex) 19060b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 19070b57cec5SDimitry Andric } 19080b57cec5SDimitry Andric } 19090b57cec5SDimitry Andric 19100b57cec5SDimitry Andric if (NeedsWinCFI) { 19110b57cec5SDimitry Andric HasWinCFI = true; 19120b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) 19130b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 19140b57cec5SDimitry Andric } 19150b57cec5SDimitry Andric 19160b57cec5SDimitry Andric return; 19170b57cec5SDimitry Andric } 19180b57cec5SDimitry Andric 19190fca6ea1SDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); 192062cfcf62SDimitry Andric unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); 19210b57cec5SDimitry Andric 19220b57cec5SDimitry Andric auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 19230b57cec5SDimitry Andric // All of the remaining stack allocations are for locals. 19240b57cec5SDimitry Andric AFI->setLocalStackSize(NumBytes - PrologueSaveSize); 19250b57cec5SDimitry Andric bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 1926fe6060f1SDimitry Andric bool HomPrologEpilog = homogeneousPrologEpilog(MF); 19270b57cec5SDimitry Andric if (CombineSPBump) { 19288bcb0991SDimitry Andric assert(!SVEStackSize && "Cannot combine SP bump with SVE"); 19298bcb0991SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, 1930e8d8bef9SDimitry Andric StackOffset::getFixed(-NumBytes), TII, 193181ad6265SDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI, 193206c3fb27SDimitry Andric EmitAsyncCFI); 19330b57cec5SDimitry Andric NumBytes = 0; 1934fe6060f1SDimitry Andric } else if (HomPrologEpilog) { 1935fe6060f1SDimitry Andric // Stack has been already adjusted. 1936fe6060f1SDimitry Andric NumBytes -= PrologueSaveSize; 19370b57cec5SDimitry Andric } else if (PrologueSaveSize != 0) { 19380b57cec5SDimitry Andric MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( 193981ad6265SDimitry Andric MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI, 194006c3fb27SDimitry Andric EmitAsyncCFI); 19410b57cec5SDimitry Andric NumBytes -= PrologueSaveSize; 19420b57cec5SDimitry Andric } 19430b57cec5SDimitry Andric assert(NumBytes >= 0 && "Negative stack allocation size!?"); 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric // Move past the saves of the callee-saved registers, fixing up the offsets 19460b57cec5SDimitry Andric // and pre-inc if we decided to combine the callee-save and local stack 19470b57cec5SDimitry Andric // pointer bump above. 1948480093f4SDimitry Andric while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && 1949480093f4SDimitry Andric !IsSVECalleeSave(MBBI)) { 1950*71ac745dSDimitry Andric if (CombineSPBump && 1951*71ac745dSDimitry Andric // Only fix-up frame-setup load/store instructions. 1952*71ac745dSDimitry Andric (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) 19530b57cec5SDimitry Andric fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), 19540b57cec5SDimitry Andric NeedsWinCFI, &HasWinCFI); 19550b57cec5SDimitry Andric ++MBBI; 19560b57cec5SDimitry Andric } 19570b57cec5SDimitry Andric 195862cfcf62SDimitry Andric // For funclets the FP belongs to the containing function. 195962cfcf62SDimitry Andric if (!IsFunclet && HasFP) { 19608bcb0991SDimitry Andric // Only set up FP if we actually need to. 1961e8d8bef9SDimitry Andric int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); 19628bcb0991SDimitry Andric 19630b57cec5SDimitry Andric if (CombineSPBump) 19640b57cec5SDimitry Andric FPOffset += AFI->getLocalStackSize(); 19650b57cec5SDimitry Andric 1966fe6060f1SDimitry Andric if (AFI->hasSwiftAsyncContext()) { 1967fe6060f1SDimitry Andric // Before we update the live FP we have to ensure there's a valid (or 1968fe6060f1SDimitry Andric // null) asynchronous context in its slot just before FP in the frame 1969fe6060f1SDimitry Andric // record, so store it now. 1970fe6060f1SDimitry Andric const auto &Attrs = MF.getFunction().getAttributes(); 1971fe6060f1SDimitry Andric bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync); 1972fe6060f1SDimitry Andric if (HaveInitialContext) 1973fe6060f1SDimitry Andric MBB.addLiveIn(AArch64::X22); 19745f757f3fSDimitry Andric Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR; 1975fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext)) 19765f757f3fSDimitry Andric .addUse(Reg) 1977fe6060f1SDimitry Andric .addUse(AArch64::SP) 1978fe6060f1SDimitry Andric .addImm(FPOffset - 8) 1979fe6060f1SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 19805f757f3fSDimitry Andric if (NeedsWinCFI) { 19815f757f3fSDimitry Andric // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded 19825f757f3fSDimitry Andric // to multiple instructions, should be mutually-exclusive. 19835f757f3fSDimitry Andric assert(Subtarget.getTargetTriple().getArchName() != "arm64e"); 19845f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 19855f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 19865f757f3fSDimitry Andric HasWinCFI = true; 19875f757f3fSDimitry Andric } 1988fe6060f1SDimitry Andric } 1989fe6060f1SDimitry Andric 1990fe6060f1SDimitry Andric if (HomPrologEpilog) { 1991fe6060f1SDimitry Andric auto Prolog = MBBI; 1992fe6060f1SDimitry Andric --Prolog; 1993fe6060f1SDimitry Andric assert(Prolog->getOpcode() == AArch64::HOM_Prolog); 1994fe6060f1SDimitry Andric Prolog->addOperand(MachineOperand::CreateImm(FPOffset)); 1995fe6060f1SDimitry Andric } else { 19960b57cec5SDimitry Andric // Issue sub fp, sp, FPOffset or 19970b57cec5SDimitry Andric // mov fp,sp when FPOffset is zero. 19980b57cec5SDimitry Andric // Note: All stores of callee-saved registers are marked as "FrameSetup". 19990b57cec5SDimitry Andric // This code marks the instruction(s) that set the FP also. 20008bcb0991SDimitry Andric emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, 2001e8d8bef9SDimitry Andric StackOffset::getFixed(FPOffset), TII, 2002e8d8bef9SDimitry Andric MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); 2003bdd1243dSDimitry Andric if (NeedsWinCFI && HasWinCFI) { 2004bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) 2005bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 2006bdd1243dSDimitry Andric // After setting up the FP, the rest of the prolog doesn't need to be 2007bdd1243dSDimitry Andric // included in the SEH unwind info. 2008bdd1243dSDimitry Andric NeedsWinCFI = false; 2009bdd1243dSDimitry Andric } 20100b57cec5SDimitry Andric } 201106c3fb27SDimitry Andric if (EmitAsyncCFI) 201206c3fb27SDimitry Andric emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject); 201381ad6265SDimitry Andric } 201481ad6265SDimitry Andric 201581ad6265SDimitry Andric // Now emit the moves for whatever callee saved regs we have (including FP, 201681ad6265SDimitry Andric // LR if those are saved). Frame instructions for SVE register are emitted 201781ad6265SDimitry Andric // later, after the instruction which actually save SVE regs. 201806c3fb27SDimitry Andric if (EmitAsyncCFI) 201981ad6265SDimitry Andric emitCalleeSavedGPRLocations(MBB, MBBI); 20200b57cec5SDimitry Andric 2021bdd1243dSDimitry Andric // Alignment is required for the parent frame, not the funclet 2022bdd1243dSDimitry Andric const bool NeedsRealignment = 2023bdd1243dSDimitry Andric NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF); 20245f757f3fSDimitry Andric const int64_t RealignmentPadding = 2025bdd1243dSDimitry Andric (NeedsRealignment && MFI.getMaxAlign() > Align(16)) 2026bdd1243dSDimitry Andric ? MFI.getMaxAlign().value() - 16 2027bdd1243dSDimitry Andric : 0; 2028bdd1243dSDimitry Andric 2029bdd1243dSDimitry Andric if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { 2030bdd1243dSDimitry Andric uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4; 20310b57cec5SDimitry Andric if (NeedsWinCFI) { 20320b57cec5SDimitry Andric HasWinCFI = true; 20330b57cec5SDimitry Andric // alloc_l can hold at most 256MB, so assume that NumBytes doesn't 20340b57cec5SDimitry Andric // exceed this amount. We need to move at most 2^24 - 1 into x15. 20350b57cec5SDimitry Andric // This is at most two instructions, MOVZ follwed by MOVK. 20360b57cec5SDimitry Andric // TODO: Fix to use multiple stack alloc unwind codes for stacks 20370b57cec5SDimitry Andric // exceeding 256MB in size. 20380b57cec5SDimitry Andric if (NumBytes >= (1 << 28)) 20390b57cec5SDimitry Andric report_fatal_error("Stack size cannot exceed 256MB for stack " 20400b57cec5SDimitry Andric "unwinding purposes"); 20410b57cec5SDimitry Andric 20420b57cec5SDimitry Andric uint32_t LowNumWords = NumWords & 0xFFFF; 20430b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15) 20440b57cec5SDimitry Andric .addImm(LowNumWords) 20450b57cec5SDimitry Andric .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 20460b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20470b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20480b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20490b57cec5SDimitry Andric if ((NumWords & 0xFFFF0000) != 0) { 20500b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15) 20510b57cec5SDimitry Andric .addReg(AArch64::X15) 20520b57cec5SDimitry Andric .addImm((NumWords & 0xFFFF0000) >> 16) // High half 20530b57cec5SDimitry Andric .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16)) 20540b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20550b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20560b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20570b57cec5SDimitry Andric } 20580b57cec5SDimitry Andric } else { 20590b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) 20600b57cec5SDimitry Andric .addImm(NumWords) 20610b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 20620b57cec5SDimitry Andric } 20630b57cec5SDimitry Andric 2064bdd1243dSDimitry Andric const char *ChkStk = Subtarget.getChkStkName(); 20650b57cec5SDimitry Andric switch (MF.getTarget().getCodeModel()) { 20660b57cec5SDimitry Andric case CodeModel::Tiny: 20670b57cec5SDimitry Andric case CodeModel::Small: 20680b57cec5SDimitry Andric case CodeModel::Medium: 20690b57cec5SDimitry Andric case CodeModel::Kernel: 20700b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 2071bdd1243dSDimitry Andric .addExternalSymbol(ChkStk) 20720b57cec5SDimitry Andric .addReg(AArch64::X15, RegState::Implicit) 20730b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) 20740b57cec5SDimitry Andric .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) 20750b57cec5SDimitry Andric .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) 20760b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 20770b57cec5SDimitry Andric if (NeedsWinCFI) { 20780b57cec5SDimitry Andric HasWinCFI = true; 20790b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20800b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20810b57cec5SDimitry Andric } 20820b57cec5SDimitry Andric break; 20830b57cec5SDimitry Andric case CodeModel::Large: 20840b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) 20850b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Define) 2086bdd1243dSDimitry Andric .addExternalSymbol(ChkStk) 2087bdd1243dSDimitry Andric .addExternalSymbol(ChkStk) 20880b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 20890b57cec5SDimitry Andric if (NeedsWinCFI) { 20900b57cec5SDimitry Andric HasWinCFI = true; 20910b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 20920b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 20930b57cec5SDimitry Andric } 20940b57cec5SDimitry Andric 20955ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF))) 20960b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Kill) 20970b57cec5SDimitry Andric .addReg(AArch64::X15, RegState::Implicit | RegState::Define) 20980b57cec5SDimitry Andric .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) 20990b57cec5SDimitry Andric .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) 21000b57cec5SDimitry Andric .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) 21010b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 21020b57cec5SDimitry Andric if (NeedsWinCFI) { 21030b57cec5SDimitry Andric HasWinCFI = true; 21040b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 21050b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 21060b57cec5SDimitry Andric } 21070b57cec5SDimitry Andric break; 21080b57cec5SDimitry Andric } 21090b57cec5SDimitry Andric 21100b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) 21110b57cec5SDimitry Andric .addReg(AArch64::SP, RegState::Kill) 21120b57cec5SDimitry Andric .addReg(AArch64::X15, RegState::Kill) 21130b57cec5SDimitry Andric .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) 21140b57cec5SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 21150b57cec5SDimitry Andric if (NeedsWinCFI) { 21160b57cec5SDimitry Andric HasWinCFI = true; 21170b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) 21180b57cec5SDimitry Andric .addImm(NumBytes) 21190b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 21200b57cec5SDimitry Andric } 21210b57cec5SDimitry Andric NumBytes = 0; 2122bdd1243dSDimitry Andric 2123bdd1243dSDimitry Andric if (RealignmentPadding > 0) { 212406c3fb27SDimitry Andric if (RealignmentPadding >= 4096) { 212506c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm)) 212606c3fb27SDimitry Andric .addReg(AArch64::X16, RegState::Define) 212706c3fb27SDimitry Andric .addImm(RealignmentPadding) 212806c3fb27SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 212906c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15) 213006c3fb27SDimitry Andric .addReg(AArch64::SP) 213106c3fb27SDimitry Andric .addReg(AArch64::X16, RegState::Kill) 213206c3fb27SDimitry Andric .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) 213306c3fb27SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 213406c3fb27SDimitry Andric } else { 2135bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15) 2136bdd1243dSDimitry Andric .addReg(AArch64::SP) 2137bdd1243dSDimitry Andric .addImm(RealignmentPadding) 213806c3fb27SDimitry Andric .addImm(0) 213906c3fb27SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 214006c3fb27SDimitry Andric } 2141bdd1243dSDimitry Andric 2142bdd1243dSDimitry Andric uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); 2143bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 2144bdd1243dSDimitry Andric .addReg(AArch64::X15, RegState::Kill) 2145bdd1243dSDimitry Andric .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); 2146bdd1243dSDimitry Andric AFI->setStackRealigned(true); 2147bdd1243dSDimitry Andric 2148bdd1243dSDimitry Andric // No need for SEH instructions here; if we're realigning the stack, 2149bdd1243dSDimitry Andric // we've set a frame pointer and already finished the SEH prologue. 2150bdd1243dSDimitry Andric assert(!NeedsWinCFI); 2151bdd1243dSDimitry Andric } 21520b57cec5SDimitry Andric } 21530b57cec5SDimitry Andric 21545f757f3fSDimitry Andric StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; 2155480093f4SDimitry Andric MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI; 2156480093f4SDimitry Andric 2157480093f4SDimitry Andric // Process the SVE callee-saves to determine what space needs to be 2158480093f4SDimitry Andric // allocated. 2159979e22ffSDimitry Andric if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { 21605f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize 21615f757f3fSDimitry Andric << "\n"); 2162480093f4SDimitry Andric // Find callee save instructions in frame. 2163480093f4SDimitry Andric CalleeSavesBegin = MBBI; 2164480093f4SDimitry Andric assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction"); 2165480093f4SDimitry Andric while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) 2166480093f4SDimitry Andric ++MBBI; 2167480093f4SDimitry Andric CalleeSavesEnd = MBBI; 2168480093f4SDimitry Andric 21695f757f3fSDimitry Andric SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); 21705f757f3fSDimitry Andric SVELocalsSize = SVEStackSize - SVECalleeSavesSize; 2171480093f4SDimitry Andric } 2172480093f4SDimitry Andric 2173480093f4SDimitry Andric // Allocate space for the callee saves (if any). 21745f757f3fSDimitry Andric StackOffset CFAOffset = 21755f757f3fSDimitry Andric StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); 21765f757f3fSDimitry Andric StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); 21775f757f3fSDimitry Andric allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, 21785f757f3fSDimitry Andric nullptr, EmitAsyncCFI && !HasFP, CFAOffset, 21795f757f3fSDimitry Andric MFI.hasVarSizedObjects() || LocalsSize); 21805f757f3fSDimitry Andric CFAOffset += SVECalleeSavesSize; 218181ad6265SDimitry Andric 218206c3fb27SDimitry Andric if (EmitAsyncCFI) 218381ad6265SDimitry Andric emitCalleeSavedSVELocations(MBB, CalleeSavesEnd); 2184480093f4SDimitry Andric 21855f757f3fSDimitry Andric // Allocate space for the rest of the frame including SVE locals. Align the 21865f757f3fSDimitry Andric // stack as necessary. 21875f757f3fSDimitry Andric assert(!(canUseRedZone(MF) && NeedsRealignment) && 21885f757f3fSDimitry Andric "Cannot use redzone with stack realignment"); 218981ad6265SDimitry Andric if (!canUseRedZone(MF)) { 21900b57cec5SDimitry Andric // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 21910b57cec5SDimitry Andric // the correct value here, as NumBytes also includes padding bytes, 21920b57cec5SDimitry Andric // which shouldn't be counted here. 21935f757f3fSDimitry Andric allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, 21945f757f3fSDimitry Andric SVELocalsSize + StackOffset::getFixed(NumBytes), 21955f757f3fSDimitry Andric NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, 21965f757f3fSDimitry Andric CFAOffset, MFI.hasVarSizedObjects()); 21970b57cec5SDimitry Andric } 21980b57cec5SDimitry Andric 21990b57cec5SDimitry Andric // If we need a base pointer, set it up here. It's whatever the value of the 22000b57cec5SDimitry Andric // stack pointer is at this point. Any variable size objects will be allocated 22010b57cec5SDimitry Andric // after this, so we can still use the base pointer to reference locals. 22020b57cec5SDimitry Andric // 22030b57cec5SDimitry Andric // FIXME: Clarify FrameSetup flags here. 22040b57cec5SDimitry Andric // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 22050b57cec5SDimitry Andric // needed. 220662cfcf62SDimitry Andric // For funclets the BP belongs to the containing function. 220762cfcf62SDimitry Andric if (!IsFunclet && RegInfo->hasBasePointer(MF)) { 22080b57cec5SDimitry Andric TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 22090b57cec5SDimitry Andric false); 22100b57cec5SDimitry Andric if (NeedsWinCFI) { 22110b57cec5SDimitry Andric HasWinCFI = true; 22120b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 22130b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 22140b57cec5SDimitry Andric } 22150b57cec5SDimitry Andric } 22160b57cec5SDimitry Andric 22170b57cec5SDimitry Andric // The very last FrameSetup instruction indicates the end of prologue. Emit a 22180b57cec5SDimitry Andric // SEH opcode indicating the prologue end. 22190b57cec5SDimitry Andric if (NeedsWinCFI && HasWinCFI) { 22200b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) 22210b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 22220b57cec5SDimitry Andric } 22230b57cec5SDimitry Andric 222462cfcf62SDimitry Andric // SEH funclets are passed the frame pointer in X1. If the parent 222562cfcf62SDimitry Andric // function uses the base register, then the base register is used 222662cfcf62SDimitry Andric // directly, and is not retrieved from X1. 222762cfcf62SDimitry Andric if (IsFunclet && F.hasPersonalityFn()) { 222862cfcf62SDimitry Andric EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); 222962cfcf62SDimitry Andric if (isAsynchronousEHPersonality(Per)) { 223062cfcf62SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) 223162cfcf62SDimitry Andric .addReg(AArch64::X1) 223262cfcf62SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 223362cfcf62SDimitry Andric MBB.addLiveIn(AArch64::X1); 223462cfcf62SDimitry Andric } 223562cfcf62SDimitry Andric } 223606c3fb27SDimitry Andric 223706c3fb27SDimitry Andric if (EmitCFI && !EmitAsyncCFI) { 223806c3fb27SDimitry Andric if (HasFP) { 223906c3fb27SDimitry Andric emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject); 224006c3fb27SDimitry Andric } else { 224106c3fb27SDimitry Andric StackOffset TotalSize = 224206c3fb27SDimitry Andric SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); 224306c3fb27SDimitry Andric unsigned CFIIndex = MF.addFrameInst(createDefCFA( 224406c3fb27SDimitry Andric *RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, TotalSize, 224506c3fb27SDimitry Andric /*LastAdjustmentWasScalable=*/false)); 224606c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 224706c3fb27SDimitry Andric .addCFIIndex(CFIIndex) 224806c3fb27SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 224906c3fb27SDimitry Andric } 225006c3fb27SDimitry Andric emitCalleeSavedGPRLocations(MBB, MBBI); 225106c3fb27SDimitry Andric emitCalleeSavedSVELocations(MBB, MBBI); 225206c3fb27SDimitry Andric } 22530b57cec5SDimitry Andric } 22540b57cec5SDimitry Andric 22550b57cec5SDimitry Andric static bool isFuncletReturnInstr(const MachineInstr &MI) { 22560b57cec5SDimitry Andric switch (MI.getOpcode()) { 22570b57cec5SDimitry Andric default: 22580b57cec5SDimitry Andric return false; 22590b57cec5SDimitry Andric case AArch64::CATCHRET: 22600b57cec5SDimitry Andric case AArch64::CLEANUPRET: 22610b57cec5SDimitry Andric return true; 22620b57cec5SDimitry Andric } 22630b57cec5SDimitry Andric } 22640b57cec5SDimitry Andric 22650b57cec5SDimitry Andric void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 22660b57cec5SDimitry Andric MachineBasicBlock &MBB) const { 22670b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 22680b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 22695f757f3fSDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 22700b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 22710b57cec5SDimitry Andric const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 22720b57cec5SDimitry Andric DebugLoc DL; 22730b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 22745f757f3fSDimitry Andric bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF); 22750b57cec5SDimitry Andric bool HasWinCFI = false; 22760b57cec5SDimitry Andric bool IsFunclet = false; 22770b57cec5SDimitry Andric 22780b57cec5SDimitry Andric if (MBB.end() != MBBI) { 22790b57cec5SDimitry Andric DL = MBBI->getDebugLoc(); 22800b57cec5SDimitry Andric IsFunclet = isFuncletReturnInstr(*MBBI); 22810b57cec5SDimitry Andric } 22820b57cec5SDimitry Andric 22835f757f3fSDimitry Andric MachineBasicBlock::iterator EpilogStartI = MBB.end(); 22845f757f3fSDimitry Andric 228581ad6265SDimitry Andric auto FinishingTouches = make_scope_exit([&]() { 22865f757f3fSDimitry Andric if (AFI->shouldSignReturnAddress(MF)) { 22875f757f3fSDimitry Andric BuildMI(MBB, MBB.getFirstTerminator(), DL, 22885f757f3fSDimitry Andric TII->get(AArch64::PAUTH_EPILOGUE)) 22895f757f3fSDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 22905f757f3fSDimitry Andric if (NeedsWinCFI) 22915f757f3fSDimitry Andric HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR 22925f757f3fSDimitry Andric } 22935f757f3fSDimitry Andric if (AFI->needsShadowCallStackPrologueEpilogue(MF)) 229481ad6265SDimitry Andric emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL); 229581ad6265SDimitry Andric if (EmitCFI) 229681ad6265SDimitry Andric emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator()); 22975f757f3fSDimitry Andric if (HasWinCFI) { 2298bdd1243dSDimitry Andric BuildMI(MBB, MBB.getFirstTerminator(), DL, 2299bdd1243dSDimitry Andric TII->get(AArch64::SEH_EpilogEnd)) 2300bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 23015f757f3fSDimitry Andric if (!MF.hasWinCFI()) 23025f757f3fSDimitry Andric MF.setHasWinCFI(true); 23035f757f3fSDimitry Andric } 23045f757f3fSDimitry Andric if (NeedsWinCFI) { 23055f757f3fSDimitry Andric assert(EpilogStartI != MBB.end()); 23065f757f3fSDimitry Andric if (!HasWinCFI) 23075f757f3fSDimitry Andric MBB.erase(EpilogStartI); 23085f757f3fSDimitry Andric } 230981ad6265SDimitry Andric }); 231081ad6265SDimitry Andric 2311480093f4SDimitry Andric int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF) 23120b57cec5SDimitry Andric : MFI.getStackSize(); 23130b57cec5SDimitry Andric 23140b57cec5SDimitry Andric // All calls are tail calls in GHC calling conv, and functions have no 23150b57cec5SDimitry Andric // prologue/epilogue. 23160b57cec5SDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::GHC) 23170b57cec5SDimitry Andric return; 23180b57cec5SDimitry Andric 2319fe6060f1SDimitry Andric // How much of the stack used by incoming arguments this function is expected 2320fe6060f1SDimitry Andric // to restore in this particular epilogue. 2321fe6060f1SDimitry Andric int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB); 23220fca6ea1SDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), 23230fca6ea1SDimitry Andric MF.getFunction().isVarArg()); 232462cfcf62SDimitry Andric unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); 23250b57cec5SDimitry Andric 2326fe6060f1SDimitry Andric int64_t AfterCSRPopSize = ArgumentStackToRestore; 23270b57cec5SDimitry Andric auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 23280b57cec5SDimitry Andric // We cannot rely on the local stack size set in emitPrologue if the function 23290b57cec5SDimitry Andric // has funclets, as funclets have different local stack size requirements, and 23300b57cec5SDimitry Andric // the current value set in emitPrologue may be that of the containing 23310b57cec5SDimitry Andric // function. 23320b57cec5SDimitry Andric if (MF.hasEHFunclets()) 23330b57cec5SDimitry Andric AFI->setLocalStackSize(NumBytes - PrologueSaveSize); 2334fe6060f1SDimitry Andric if (homogeneousPrologEpilog(MF, &MBB)) { 2335fe6060f1SDimitry Andric assert(!NeedsWinCFI); 2336fe6060f1SDimitry Andric auto LastPopI = MBB.getFirstTerminator(); 2337fe6060f1SDimitry Andric if (LastPopI != MBB.begin()) { 2338fe6060f1SDimitry Andric auto HomogeneousEpilog = std::prev(LastPopI); 2339fe6060f1SDimitry Andric if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog) 2340fe6060f1SDimitry Andric LastPopI = HomogeneousEpilog; 2341fe6060f1SDimitry Andric } 2342fe6060f1SDimitry Andric 2343fe6060f1SDimitry Andric // Adjust local stack 2344fe6060f1SDimitry Andric emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 2345fe6060f1SDimitry Andric StackOffset::getFixed(AFI->getLocalStackSize()), TII, 23465f757f3fSDimitry Andric MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); 2347fe6060f1SDimitry Andric 2348fe6060f1SDimitry Andric // SP has been already adjusted while restoring callee save regs. 2349fe6060f1SDimitry Andric // We've bailed-out the case with adjusting SP for arguments. 2350fe6060f1SDimitry Andric assert(AfterCSRPopSize == 0); 2351fe6060f1SDimitry Andric return; 2352fe6060f1SDimitry Andric } 23535ffd83dbSDimitry Andric bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes); 23540b57cec5SDimitry Andric // Assume we can't combine the last pop with the sp restore. 23550b57cec5SDimitry Andric 235681ad6265SDimitry Andric bool CombineAfterCSRBump = false; 23570b57cec5SDimitry Andric if (!CombineSPBump && PrologueSaveSize != 0) { 23580b57cec5SDimitry Andric MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); 235981ad6265SDimitry Andric while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || 236081ad6265SDimitry Andric AArch64InstrInfo::isSEHInstruction(*Pop)) 23610b57cec5SDimitry Andric Pop = std::prev(Pop); 23620b57cec5SDimitry Andric // Converting the last ldp to a post-index ldp is valid only if the last 23630b57cec5SDimitry Andric // ldp's offset is 0. 23640b57cec5SDimitry Andric const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); 2365fe6060f1SDimitry Andric // If the offset is 0 and the AfterCSR pop is not actually trying to 2366fe6060f1SDimitry Andric // allocate more stack for arguments (in space that an untimely interrupt 2367fe6060f1SDimitry Andric // may clobber), convert it to a post-index ldp. 236881ad6265SDimitry Andric if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { 23690b57cec5SDimitry Andric convertCalleeSaveRestoreToSPPrePostIncDec( 237081ad6265SDimitry Andric MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI, 237181ad6265SDimitry Andric MachineInstr::FrameDestroy, PrologueSaveSize); 237281ad6265SDimitry Andric } else { 23730b57cec5SDimitry Andric // If not, make sure to emit an add after the last ldp. 23740b57cec5SDimitry Andric // We're doing this by transfering the size to be restored from the 23750b57cec5SDimitry Andric // adjustment *before* the CSR pops to the adjustment *after* the CSR 23760b57cec5SDimitry Andric // pops. 23770b57cec5SDimitry Andric AfterCSRPopSize += PrologueSaveSize; 237881ad6265SDimitry Andric CombineAfterCSRBump = true; 23790b57cec5SDimitry Andric } 23800b57cec5SDimitry Andric } 23810b57cec5SDimitry Andric 23820b57cec5SDimitry Andric // Move past the restores of the callee-saved registers. 23830b57cec5SDimitry Andric // If we plan on combining the sp bump of the local stack size and the callee 23840b57cec5SDimitry Andric // save stack size, we might need to adjust the CSR save and restore offsets. 23850b57cec5SDimitry Andric MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 23860b57cec5SDimitry Andric MachineBasicBlock::iterator Begin = MBB.begin(); 23870b57cec5SDimitry Andric while (LastPopI != Begin) { 23880b57cec5SDimitry Andric --LastPopI; 2389480093f4SDimitry Andric if (!LastPopI->getFlag(MachineInstr::FrameDestroy) || 2390480093f4SDimitry Andric IsSVECalleeSave(LastPopI)) { 23910b57cec5SDimitry Andric ++LastPopI; 23920b57cec5SDimitry Andric break; 23930b57cec5SDimitry Andric } else if (CombineSPBump) 23940b57cec5SDimitry Andric fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), 23950b57cec5SDimitry Andric NeedsWinCFI, &HasWinCFI); 23960b57cec5SDimitry Andric } 23970b57cec5SDimitry Andric 23985f757f3fSDimitry Andric if (NeedsWinCFI) { 23995f757f3fSDimitry Andric // Note that there are cases where we insert SEH opcodes in the 24005f757f3fSDimitry Andric // epilogue when we had no SEH opcodes in the prologue. For 24015f757f3fSDimitry Andric // example, when there is no stack frame but there are stack 24025f757f3fSDimitry Andric // arguments. Insert the SEH_EpilogStart and remove it later if it 24035f757f3fSDimitry Andric // we didn't emit any SEH opcodes to avoid generating WinCFI for 24045f757f3fSDimitry Andric // functions that don't need it. 24050b57cec5SDimitry Andric BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) 24060b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 24075f757f3fSDimitry Andric EpilogStartI = LastPopI; 24085f757f3fSDimitry Andric --EpilogStartI; 24090b57cec5SDimitry Andric } 24100b57cec5SDimitry Andric 2411fe6060f1SDimitry Andric if (hasFP(MF) && AFI->hasSwiftAsyncContext()) { 241281ad6265SDimitry Andric switch (MF.getTarget().Options.SwiftAsyncFramePointer) { 241381ad6265SDimitry Andric case SwiftAsyncFramePointerMode::DeploymentBased: 241481ad6265SDimitry Andric // Avoid the reload as it is GOT relative, and instead fall back to the 241581ad6265SDimitry Andric // hardcoded value below. This allows a mismatch between the OS and 241681ad6265SDimitry Andric // application without immediately terminating on the difference. 2417bdd1243dSDimitry Andric [[fallthrough]]; 241881ad6265SDimitry Andric case SwiftAsyncFramePointerMode::Always: 241981ad6265SDimitry Andric // We need to reset FP to its untagged state on return. Bit 60 is 242081ad6265SDimitry Andric // currently used to show the presence of an extended frame. 2421fe6060f1SDimitry Andric 2422fe6060f1SDimitry Andric // BIC x29, x29, #0x1000_0000_0000_0000 2423fe6060f1SDimitry Andric BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri), 2424fe6060f1SDimitry Andric AArch64::FP) 2425fe6060f1SDimitry Andric .addUse(AArch64::FP) 2426fe6060f1SDimitry Andric .addImm(0x10fe) 2427fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 24285f757f3fSDimitry Andric if (NeedsWinCFI) { 24295f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) 24305f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 24315f757f3fSDimitry Andric HasWinCFI = true; 24325f757f3fSDimitry Andric } 243381ad6265SDimitry Andric break; 243481ad6265SDimitry Andric 243581ad6265SDimitry Andric case SwiftAsyncFramePointerMode::Never: 243681ad6265SDimitry Andric break; 243781ad6265SDimitry Andric } 2438fe6060f1SDimitry Andric } 2439fe6060f1SDimitry Andric 24408bcb0991SDimitry Andric const StackOffset &SVEStackSize = getSVEStackSize(MF); 24418bcb0991SDimitry Andric 24420b57cec5SDimitry Andric // If there is a single SP update, insert it before the ret and we're done. 24430b57cec5SDimitry Andric if (CombineSPBump) { 24448bcb0991SDimitry Andric assert(!SVEStackSize && "Cannot combine SP bump with SVE"); 244581ad6265SDimitry Andric 244681ad6265SDimitry Andric // When we are about to restore the CSRs, the CFA register is SP again. 244781ad6265SDimitry Andric if (EmitCFI && hasFP(MF)) { 244881ad6265SDimitry Andric const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); 244981ad6265SDimitry Andric unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); 245081ad6265SDimitry Andric unsigned CFIIndex = 245181ad6265SDimitry Andric MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes)); 245281ad6265SDimitry Andric BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 245381ad6265SDimitry Andric .addCFIIndex(CFIIndex) 245481ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 245581ad6265SDimitry Andric } 245681ad6265SDimitry Andric 24570b57cec5SDimitry Andric emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 2458e8d8bef9SDimitry Andric StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize), 2459e8d8bef9SDimitry Andric TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, 246081ad6265SDimitry Andric &HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes)); 24610b57cec5SDimitry Andric return; 24620b57cec5SDimitry Andric } 24630b57cec5SDimitry Andric 24640b57cec5SDimitry Andric NumBytes -= PrologueSaveSize; 24650b57cec5SDimitry Andric assert(NumBytes >= 0 && "Negative stack allocation size!?"); 24660b57cec5SDimitry Andric 2467480093f4SDimitry Andric // Process the SVE callee-saves to determine what space needs to be 2468480093f4SDimitry Andric // deallocated. 2469480093f4SDimitry Andric StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize; 2470480093f4SDimitry Andric MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI; 2471979e22ffSDimitry Andric if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { 247216d6b3b3SDimitry Andric RestoreBegin = std::prev(RestoreEnd); 247316d6b3b3SDimitry Andric while (RestoreBegin != MBB.begin() && 247416d6b3b3SDimitry Andric IsSVECalleeSave(std::prev(RestoreBegin))) 2475480093f4SDimitry Andric --RestoreBegin; 2476480093f4SDimitry Andric 2477480093f4SDimitry Andric assert(IsSVECalleeSave(RestoreBegin) && 2478480093f4SDimitry Andric IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); 2479480093f4SDimitry Andric 2480e8d8bef9SDimitry Andric StackOffset CalleeSavedSizeAsOffset = 2481e8d8bef9SDimitry Andric StackOffset::getScalable(CalleeSavedSize); 2482979e22ffSDimitry Andric DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset; 2483979e22ffSDimitry Andric DeallocateAfter = CalleeSavedSizeAsOffset; 2484480093f4SDimitry Andric } 2485480093f4SDimitry Andric 24868bcb0991SDimitry Andric // Deallocate the SVE area. 2487480093f4SDimitry Andric if (SVEStackSize) { 248881ad6265SDimitry Andric // If we have stack realignment or variable sized objects on the stack, 248981ad6265SDimitry Andric // restore the stack pointer from the frame pointer prior to SVE CSR 249081ad6265SDimitry Andric // restoration. 249181ad6265SDimitry Andric if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { 249281ad6265SDimitry Andric if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { 2493979e22ffSDimitry Andric // Set SP to start of SVE callee-save area from which they can 2494979e22ffSDimitry Andric // be reloaded. The code below will deallocate the stack space 2495480093f4SDimitry Andric // space by moving FP -> SP. 2496480093f4SDimitry Andric emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, 2497e8d8bef9SDimitry Andric StackOffset::getScalable(-CalleeSavedSize), TII, 2498979e22ffSDimitry Andric MachineInstr::FrameDestroy); 249981ad6265SDimitry Andric } 2500480093f4SDimitry Andric } else { 2501480093f4SDimitry Andric if (AFI->getSVECalleeSavedStackSize()) { 2502480093f4SDimitry Andric // Deallocate the non-SVE locals first before we can deallocate (and 2503480093f4SDimitry Andric // restore callee saves) from the SVE area. 250481ad6265SDimitry Andric emitFrameOffset( 250581ad6265SDimitry Andric MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, 250681ad6265SDimitry Andric StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy, 250781ad6265SDimitry Andric false, false, nullptr, EmitCFI && !hasFP(MF), 250881ad6265SDimitry Andric SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize)); 2509480093f4SDimitry Andric NumBytes = 0; 2510480093f4SDimitry Andric } 2511480093f4SDimitry Andric 2512480093f4SDimitry Andric emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, 251381ad6265SDimitry Andric DeallocateBefore, TII, MachineInstr::FrameDestroy, false, 251481ad6265SDimitry Andric false, nullptr, EmitCFI && !hasFP(MF), 251581ad6265SDimitry Andric SVEStackSize + 251681ad6265SDimitry Andric StackOffset::getFixed(NumBytes + PrologueSaveSize)); 2517480093f4SDimitry Andric 2518480093f4SDimitry Andric emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, 251981ad6265SDimitry Andric DeallocateAfter, TII, MachineInstr::FrameDestroy, false, 252081ad6265SDimitry Andric false, nullptr, EmitCFI && !hasFP(MF), 252181ad6265SDimitry Andric DeallocateAfter + 252281ad6265SDimitry Andric StackOffset::getFixed(NumBytes + PrologueSaveSize)); 2523480093f4SDimitry Andric } 252481ad6265SDimitry Andric if (EmitCFI) 252581ad6265SDimitry Andric emitCalleeSavedSVERestores(MBB, RestoreEnd); 2526480093f4SDimitry Andric } 25278bcb0991SDimitry Andric 25280b57cec5SDimitry Andric if (!hasFP(MF)) { 25290b57cec5SDimitry Andric bool RedZone = canUseRedZone(MF); 25300b57cec5SDimitry Andric // If this was a redzone leaf function, we don't need to restore the 25310b57cec5SDimitry Andric // stack pointer (but we may need to pop stack args for fastcc). 25320b57cec5SDimitry Andric if (RedZone && AfterCSRPopSize == 0) 25330b57cec5SDimitry Andric return; 25340b57cec5SDimitry Andric 253581ad6265SDimitry Andric // Pop the local variables off the stack. If there are no callee-saved 253681ad6265SDimitry Andric // registers, it means we are actually positioned at the terminator and can 253781ad6265SDimitry Andric // combine stack increment for the locals and the stack increment for 253881ad6265SDimitry Andric // callee-popped arguments into (possibly) a single instruction and be done. 25390b57cec5SDimitry Andric bool NoCalleeSaveRestore = PrologueSaveSize == 0; 2540480093f4SDimitry Andric int64_t StackRestoreBytes = RedZone ? 0 : NumBytes; 25410b57cec5SDimitry Andric if (NoCalleeSaveRestore) 25420b57cec5SDimitry Andric StackRestoreBytes += AfterCSRPopSize; 25430b57cec5SDimitry Andric 254481ad6265SDimitry Andric emitFrameOffset( 254581ad6265SDimitry Andric MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 254681ad6265SDimitry Andric StackOffset::getFixed(StackRestoreBytes), TII, 254781ad6265SDimitry Andric MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, 254881ad6265SDimitry Andric StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize)); 254981ad6265SDimitry Andric 25500b57cec5SDimitry Andric // If we were able to combine the local stack pop with the argument pop, 25510b57cec5SDimitry Andric // then we're done. 255281ad6265SDimitry Andric if (NoCalleeSaveRestore || AfterCSRPopSize == 0) { 25530b57cec5SDimitry Andric return; 25540b57cec5SDimitry Andric } 25550b57cec5SDimitry Andric 25560b57cec5SDimitry Andric NumBytes = 0; 25570b57cec5SDimitry Andric } 25580b57cec5SDimitry Andric 25590b57cec5SDimitry Andric // Restore the original stack pointer. 25600b57cec5SDimitry Andric // FIXME: Rather than doing the math here, we should instead just use 25610b57cec5SDimitry Andric // non-post-indexed loads for the restores if we aren't actually going to 25620b57cec5SDimitry Andric // be able to save any instructions. 25638bcb0991SDimitry Andric if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { 2564e8d8bef9SDimitry Andric emitFrameOffset( 2565e8d8bef9SDimitry Andric MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 2566e8d8bef9SDimitry Andric StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), 25675f757f3fSDimitry Andric TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); 25688bcb0991SDimitry Andric } else if (NumBytes) 25698bcb0991SDimitry Andric emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 2570e8d8bef9SDimitry Andric StackOffset::getFixed(NumBytes), TII, 25715f757f3fSDimitry Andric MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); 25720b57cec5SDimitry Andric 257381ad6265SDimitry Andric // When we are about to restore the CSRs, the CFA register is SP again. 257481ad6265SDimitry Andric if (EmitCFI && hasFP(MF)) { 257581ad6265SDimitry Andric const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo(); 257681ad6265SDimitry Andric unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); 257781ad6265SDimitry Andric unsigned CFIIndex = MF.addFrameInst( 257881ad6265SDimitry Andric MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize)); 257981ad6265SDimitry Andric BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 258081ad6265SDimitry Andric .addCFIIndex(CFIIndex) 258181ad6265SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 258281ad6265SDimitry Andric } 258381ad6265SDimitry Andric 25840b57cec5SDimitry Andric // This must be placed after the callee-save restore code because that code 25850b57cec5SDimitry Andric // assumes the SP is at the same location as it was after the callee-save save 25860b57cec5SDimitry Andric // code in the prologue. 25870b57cec5SDimitry Andric if (AfterCSRPopSize) { 2588fe6060f1SDimitry Andric assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an " 2589fe6060f1SDimitry Andric "interrupt may have clobbered"); 25900b57cec5SDimitry Andric 259181ad6265SDimitry Andric emitFrameOffset( 259281ad6265SDimitry Andric MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 259381ad6265SDimitry Andric StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, 259481ad6265SDimitry Andric false, NeedsWinCFI, &HasWinCFI, EmitCFI, 259581ad6265SDimitry Andric StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); 25960b57cec5SDimitry Andric } 25970b57cec5SDimitry Andric } 25980b57cec5SDimitry Andric 259906c3fb27SDimitry Andric bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const { 260006c3fb27SDimitry Andric return TargetFrameLowering::enableCFIFixup(MF) && 260106c3fb27SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF); 260206c3fb27SDimitry Andric } 260306c3fb27SDimitry Andric 26040b57cec5SDimitry Andric /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 26050b57cec5SDimitry Andric /// debug info. It's the same as what we use for resolving the code-gen 26060b57cec5SDimitry Andric /// references for now. FIXME: This can go wrong when references are 26070b57cec5SDimitry Andric /// SP-relative and simple call frames aren't used. 2608e8d8bef9SDimitry Andric StackOffset 2609e8d8bef9SDimitry Andric AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 26105ffd83dbSDimitry Andric Register &FrameReg) const { 26110b57cec5SDimitry Andric return resolveFrameIndexReference( 26120b57cec5SDimitry Andric MF, FI, FrameReg, 26130b57cec5SDimitry Andric /*PreferFP=*/ 26140fca6ea1SDimitry Andric MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) || 26150fca6ea1SDimitry Andric MF.getFunction().hasFnAttribute(Attribute::SanitizeMemTag), 2616e8d8bef9SDimitry Andric /*ForSimm=*/false); 26170b57cec5SDimitry Andric } 26180b57cec5SDimitry Andric 2619e8d8bef9SDimitry Andric StackOffset 262052418fc2SDimitry Andric AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF, 262152418fc2SDimitry Andric int FI) const { 262252418fc2SDimitry Andric // This function serves to provide a comparable offset from a single reference 262352418fc2SDimitry Andric // point (the value of SP at function entry) that can be used for analysis, 262452418fc2SDimitry Andric // e.g. the stack-frame-layout analysis pass. It is not guaranteed to be 262552418fc2SDimitry Andric // correct for all objects in the presence of VLA-area objects or dynamic 262652418fc2SDimitry Andric // stack re-alignment. 262752418fc2SDimitry Andric 262852418fc2SDimitry Andric const auto &MFI = MF.getFrameInfo(); 262952418fc2SDimitry Andric 263052418fc2SDimitry Andric int64_t ObjectOffset = MFI.getObjectOffset(FI); 263162987288SDimitry Andric StackOffset SVEStackSize = getSVEStackSize(MF); 263262987288SDimitry Andric 263362987288SDimitry Andric // For VLA-area objects, just emit an offset at the end of the stack frame. 263462987288SDimitry Andric // Whilst not quite correct, these objects do live at the end of the frame and 263562987288SDimitry Andric // so it is more useful for analysis for the offset to reflect this. 263662987288SDimitry Andric if (MFI.isVariableSizedObjectIndex(FI)) { 263762987288SDimitry Andric return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize; 263862987288SDimitry Andric } 263952418fc2SDimitry Andric 264052418fc2SDimitry Andric // This is correct in the absence of any SVE stack objects. 264152418fc2SDimitry Andric if (!SVEStackSize) 264252418fc2SDimitry Andric return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea()); 264352418fc2SDimitry Andric 264452418fc2SDimitry Andric const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 264552418fc2SDimitry Andric if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { 264652418fc2SDimitry Andric return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()), 264752418fc2SDimitry Andric ObjectOffset); 264852418fc2SDimitry Andric } 264952418fc2SDimitry Andric 265052418fc2SDimitry Andric bool IsFixed = MFI.isFixedObjectIndex(FI); 265152418fc2SDimitry Andric bool IsCSR = 265252418fc2SDimitry Andric !IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); 265352418fc2SDimitry Andric 265452418fc2SDimitry Andric StackOffset ScalableOffset = {}; 265552418fc2SDimitry Andric if (!IsFixed && !IsCSR) 265652418fc2SDimitry Andric ScalableOffset = -SVEStackSize; 265752418fc2SDimitry Andric 265852418fc2SDimitry Andric return StackOffset::getFixed(ObjectOffset) + ScalableOffset; 265952418fc2SDimitry Andric } 266052418fc2SDimitry Andric 266152418fc2SDimitry Andric StackOffset 2662e8d8bef9SDimitry Andric AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, 2663e8d8bef9SDimitry Andric int FI) const { 2664e8d8bef9SDimitry Andric return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI)); 26650b57cec5SDimitry Andric } 26660b57cec5SDimitry Andric 2667e8d8bef9SDimitry Andric static StackOffset getFPOffset(const MachineFunction &MF, 2668e8d8bef9SDimitry Andric int64_t ObjectOffset) { 26690b57cec5SDimitry Andric const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 26700b57cec5SDimitry Andric const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 26710fca6ea1SDimitry Andric const Function &F = MF.getFunction(); 26720fca6ea1SDimitry Andric bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); 267362cfcf62SDimitry Andric unsigned FixedObject = 267462cfcf62SDimitry Andric getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); 2675e8d8bef9SDimitry Andric int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo()); 2676e8d8bef9SDimitry Andric int64_t FPAdjust = 2677e8d8bef9SDimitry Andric CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset(); 2678e8d8bef9SDimitry Andric return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust); 26790b57cec5SDimitry Andric } 26800b57cec5SDimitry Andric 2681e8d8bef9SDimitry Andric static StackOffset getStackOffset(const MachineFunction &MF, 2682e8d8bef9SDimitry Andric int64_t ObjectOffset) { 26830b57cec5SDimitry Andric const auto &MFI = MF.getFrameInfo(); 2684e8d8bef9SDimitry Andric return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize()); 26850b57cec5SDimitry Andric } 26860b57cec5SDimitry Andric 2687e8d8bef9SDimitry Andric // TODO: This function currently does not work for scalable vectors. 26880b57cec5SDimitry Andric int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF, 26890b57cec5SDimitry Andric int FI) const { 26900b57cec5SDimitry Andric const auto *RegInfo = static_cast<const AArch64RegisterInfo *>( 26910b57cec5SDimitry Andric MF.getSubtarget().getRegisterInfo()); 26920b57cec5SDimitry Andric int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI); 26930b57cec5SDimitry Andric return RegInfo->getLocalAddressRegister(MF) == AArch64::FP 2694e8d8bef9SDimitry Andric ? getFPOffset(MF, ObjectOffset).getFixed() 2695e8d8bef9SDimitry Andric : getStackOffset(MF, ObjectOffset).getFixed(); 26960b57cec5SDimitry Andric } 26970b57cec5SDimitry Andric 26988bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameIndexReference( 26995ffd83dbSDimitry Andric const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, 27000b57cec5SDimitry Andric bool ForSimm) const { 27010b57cec5SDimitry Andric const auto &MFI = MF.getFrameInfo(); 2702480093f4SDimitry Andric int64_t ObjectOffset = MFI.getObjectOffset(FI); 27030b57cec5SDimitry Andric bool isFixed = MFI.isFixedObjectIndex(FI); 2704e8d8bef9SDimitry Andric bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector; 27058bcb0991SDimitry Andric return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg, 27060b57cec5SDimitry Andric PreferFP, ForSimm); 27070b57cec5SDimitry Andric } 27080b57cec5SDimitry Andric 27098bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameOffsetReference( 2710480093f4SDimitry Andric const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, 27115ffd83dbSDimitry Andric Register &FrameReg, bool PreferFP, bool ForSimm) const { 27120b57cec5SDimitry Andric const auto &MFI = MF.getFrameInfo(); 27130b57cec5SDimitry Andric const auto *RegInfo = static_cast<const AArch64RegisterInfo *>( 27140b57cec5SDimitry Andric MF.getSubtarget().getRegisterInfo()); 27150b57cec5SDimitry Andric const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 27160b57cec5SDimitry Andric const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 27170b57cec5SDimitry Andric 2718e8d8bef9SDimitry Andric int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed(); 2719e8d8bef9SDimitry Andric int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed(); 27200b57cec5SDimitry Andric bool isCSR = 2721480093f4SDimitry Andric !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI)); 27220b57cec5SDimitry Andric 27238bcb0991SDimitry Andric const StackOffset &SVEStackSize = getSVEStackSize(MF); 27248bcb0991SDimitry Andric 27250b57cec5SDimitry Andric // Use frame pointer to reference fixed objects. Use it for locals if 27260b57cec5SDimitry Andric // there are VLAs or a dynamically realigned SP (and thus the SP isn't 27270b57cec5SDimitry Andric // reliable as a base). Make sure useFPForScavengingIndex() does the 27280b57cec5SDimitry Andric // right thing for the emergency spill slot. 27290b57cec5SDimitry Andric bool UseFP = false; 27308bcb0991SDimitry Andric if (AFI->hasStackFrame() && !isSVE) { 273181ad6265SDimitry Andric // We shouldn't prefer using the FP to access fixed-sized stack objects when 273281ad6265SDimitry Andric // there are scalable (SVE) objects in between the FP and the fixed-sized 273381ad6265SDimitry Andric // objects. 27348bcb0991SDimitry Andric PreferFP &= !SVEStackSize; 27358bcb0991SDimitry Andric 27360b57cec5SDimitry Andric // Note: Keeping the following as multiple 'if' statements rather than 27370b57cec5SDimitry Andric // merging to a single expression for readability. 27380b57cec5SDimitry Andric // 27390b57cec5SDimitry Andric // Argument access should always use the FP. 27400b57cec5SDimitry Andric if (isFixed) { 27410b57cec5SDimitry Andric UseFP = hasFP(MF); 2742fe6060f1SDimitry Andric } else if (isCSR && RegInfo->hasStackRealignment(MF)) { 27430b57cec5SDimitry Andric // References to the CSR area must use FP if we're re-aligning the stack 27440b57cec5SDimitry Andric // since the dynamically-sized alignment padding is between the SP/BP and 27450b57cec5SDimitry Andric // the CSR area. 27460b57cec5SDimitry Andric assert(hasFP(MF) && "Re-aligned stack must have frame pointer"); 27470b57cec5SDimitry Andric UseFP = true; 2748fe6060f1SDimitry Andric } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) { 27490b57cec5SDimitry Andric // If the FPOffset is negative and we're producing a signed immediate, we 27500b57cec5SDimitry Andric // have to keep in mind that the available offset range for negative 27510b57cec5SDimitry Andric // offsets is smaller than for positive ones. If an offset is available 27520b57cec5SDimitry Andric // via the FP and the SP, use whichever is closest. 27530b57cec5SDimitry Andric bool FPOffsetFits = !ForSimm || FPOffset >= -256; 275481ad6265SDimitry Andric PreferFP |= Offset > -FPOffset && !SVEStackSize; 27550b57cec5SDimitry Andric 27560b57cec5SDimitry Andric if (MFI.hasVarSizedObjects()) { 27570b57cec5SDimitry Andric // If we have variable sized objects, we can use either FP or BP, as the 27580b57cec5SDimitry Andric // SP offset is unknown. We can use the base pointer if we have one and 27590b57cec5SDimitry Andric // FP is not preferred. If not, we're stuck with using FP. 27600b57cec5SDimitry Andric bool CanUseBP = RegInfo->hasBasePointer(MF); 27610b57cec5SDimitry Andric if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. 27620b57cec5SDimitry Andric UseFP = PreferFP; 27635ffd83dbSDimitry Andric else if (!CanUseBP) // Can't use BP. Forced to use FP. 27640b57cec5SDimitry Andric UseFP = true; 27650b57cec5SDimitry Andric // else we can use BP and FP, but the offset from FP won't fit. 27660b57cec5SDimitry Andric // That will make us scavenge registers which we can probably avoid by 27670b57cec5SDimitry Andric // using BP. If it won't fit for BP either, we'll scavenge anyway. 27680b57cec5SDimitry Andric } else if (FPOffset >= 0) { 27690b57cec5SDimitry Andric // Use SP or FP, whichever gives us the best chance of the offset 27700b57cec5SDimitry Andric // being in range for direct access. If the FPOffset is positive, 27710b57cec5SDimitry Andric // that'll always be best, as the SP will be even further away. 27720b57cec5SDimitry Andric UseFP = true; 27730b57cec5SDimitry Andric } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) { 27740b57cec5SDimitry Andric // Funclets access the locals contained in the parent's stack frame 27750b57cec5SDimitry Andric // via the frame pointer, so we have to use the FP in the parent 27760b57cec5SDimitry Andric // function. 27770b57cec5SDimitry Andric (void) Subtarget; 27780fca6ea1SDimitry Andric assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(), 27790fca6ea1SDimitry Andric MF.getFunction().isVarArg()) && 27800b57cec5SDimitry Andric "Funclets should only be present on Win64"); 27810b57cec5SDimitry Andric UseFP = true; 27820b57cec5SDimitry Andric } else { 27830b57cec5SDimitry Andric // We have the choice between FP and (SP or BP). 27840b57cec5SDimitry Andric if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. 27850b57cec5SDimitry Andric UseFP = true; 27860b57cec5SDimitry Andric } 27870b57cec5SDimitry Andric } 27880b57cec5SDimitry Andric } 27890b57cec5SDimitry Andric 2790fe6060f1SDimitry Andric assert( 2791fe6060f1SDimitry Andric ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) && 27920b57cec5SDimitry Andric "In the presence of dynamic stack pointer realignment, " 27930b57cec5SDimitry Andric "non-argument/CSR objects cannot be accessed through the frame pointer"); 27940b57cec5SDimitry Andric 27958bcb0991SDimitry Andric if (isSVE) { 2796e8d8bef9SDimitry Andric StackOffset FPOffset = 2797e8d8bef9SDimitry Andric StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset); 2798e8d8bef9SDimitry Andric StackOffset SPOffset = 2799e8d8bef9SDimitry Andric SVEStackSize + 2800e8d8bef9SDimitry Andric StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(), 2801e8d8bef9SDimitry Andric ObjectOffset); 28028bcb0991SDimitry Andric // Always use the FP for SVE spills if available and beneficial. 2803fe6060f1SDimitry Andric if (hasFP(MF) && (SPOffset.getFixed() || 2804e8d8bef9SDimitry Andric FPOffset.getScalable() < SPOffset.getScalable() || 2805fe6060f1SDimitry Andric RegInfo->hasStackRealignment(MF))) { 28060b57cec5SDimitry Andric FrameReg = RegInfo->getFrameRegister(MF); 28070b57cec5SDimitry Andric return FPOffset; 28080b57cec5SDimitry Andric } 28090b57cec5SDimitry Andric 28108bcb0991SDimitry Andric FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() 28118bcb0991SDimitry Andric : (unsigned)AArch64::SP; 28128bcb0991SDimitry Andric return SPOffset; 28138bcb0991SDimitry Andric } 28148bcb0991SDimitry Andric 28158bcb0991SDimitry Andric StackOffset ScalableOffset = {}; 28168bcb0991SDimitry Andric if (UseFP && !(isFixed || isCSR)) 28178bcb0991SDimitry Andric ScalableOffset = -SVEStackSize; 28188bcb0991SDimitry Andric if (!UseFP && (isFixed || isCSR)) 28198bcb0991SDimitry Andric ScalableOffset = SVEStackSize; 28208bcb0991SDimitry Andric 28218bcb0991SDimitry Andric if (UseFP) { 28228bcb0991SDimitry Andric FrameReg = RegInfo->getFrameRegister(MF); 2823e8d8bef9SDimitry Andric return StackOffset::getFixed(FPOffset) + ScalableOffset; 28248bcb0991SDimitry Andric } 28258bcb0991SDimitry Andric 28260b57cec5SDimitry Andric // Use the base pointer if we have one. 28270b57cec5SDimitry Andric if (RegInfo->hasBasePointer(MF)) 28280b57cec5SDimitry Andric FrameReg = RegInfo->getBaseRegister(); 28290b57cec5SDimitry Andric else { 28300b57cec5SDimitry Andric assert(!MFI.hasVarSizedObjects() && 28310b57cec5SDimitry Andric "Can't use SP when we have var sized objects."); 28320b57cec5SDimitry Andric FrameReg = AArch64::SP; 28330b57cec5SDimitry Andric // If we're using the red zone for this function, the SP won't actually 28340b57cec5SDimitry Andric // be adjusted, so the offsets will be negative. They're also all 28350b57cec5SDimitry Andric // within range of the signed 9-bit immediate instructions. 28360b57cec5SDimitry Andric if (canUseRedZone(MF)) 28370b57cec5SDimitry Andric Offset -= AFI->getLocalStackSize(); 28380b57cec5SDimitry Andric } 28390b57cec5SDimitry Andric 2840e8d8bef9SDimitry Andric return StackOffset::getFixed(Offset) + ScalableOffset; 28410b57cec5SDimitry Andric } 28420b57cec5SDimitry Andric 28430b57cec5SDimitry Andric static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 28440b57cec5SDimitry Andric // Do not set a kill flag on values that are also marked as live-in. This 28450b57cec5SDimitry Andric // happens with the @llvm-returnaddress intrinsic and with arguments passed in 28460b57cec5SDimitry Andric // callee saved registers. 28470b57cec5SDimitry Andric // Omitting the kill flags is conservatively correct even if the live-in 28480b57cec5SDimitry Andric // is not used after all. 28490b57cec5SDimitry Andric bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 28500b57cec5SDimitry Andric return getKillRegState(!IsLiveIn); 28510b57cec5SDimitry Andric } 28520b57cec5SDimitry Andric 28530b57cec5SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF) { 28540b57cec5SDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 28550b57cec5SDimitry Andric AttributeList Attrs = MF.getFunction().getAttributes(); 28560b57cec5SDimitry Andric return Subtarget.isTargetMachO() && 28570b57cec5SDimitry Andric !(Subtarget.getTargetLowering()->supportSwiftError() && 2858fe6060f1SDimitry Andric Attrs.hasAttrSomewhere(Attribute::SwiftError)) && 2859*71ac745dSDimitry Andric MF.getFunction().getCallingConv() != CallingConv::SwiftTail && 2860*71ac745dSDimitry Andric !requiresSaveVG(MF); 28610b57cec5SDimitry Andric } 28620b57cec5SDimitry Andric 28630b57cec5SDimitry Andric static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, 2864bdd1243dSDimitry Andric bool NeedsWinCFI, bool IsFirst, 2865bdd1243dSDimitry Andric const TargetRegisterInfo *TRI) { 28660b57cec5SDimitry Andric // If we are generating register pairs for a Windows function that requires 28670b57cec5SDimitry Andric // EH support, then pair consecutive registers only. There are no unwind 28680b57cec5SDimitry Andric // opcodes for saves/restores of non-consectuve register pairs. 2869e8d8bef9SDimitry Andric // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x, 2870e8d8bef9SDimitry Andric // save_lrpair. 28710b57cec5SDimitry Andric // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling 28720b57cec5SDimitry Andric 2873480093f4SDimitry Andric if (Reg2 == AArch64::FP) 2874480093f4SDimitry Andric return true; 28750b57cec5SDimitry Andric if (!NeedsWinCFI) 28760b57cec5SDimitry Andric return false; 2877bdd1243dSDimitry Andric if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1) 28780b57cec5SDimitry Andric return false; 2879e8d8bef9SDimitry Andric // If pairing a GPR with LR, the pair can be described by the save_lrpair 2880e8d8bef9SDimitry Andric // opcode. If this is the first register pair, it would end up with a 2881e8d8bef9SDimitry Andric // predecrement, but there's no save_lrpair_x opcode, so we can only do this 2882e8d8bef9SDimitry Andric // if LR is paired with something else than the first register. 2883e8d8bef9SDimitry Andric // The save_lrpair opcode requires the first register to be an odd one. 2884e8d8bef9SDimitry Andric if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 && 2885e8d8bef9SDimitry Andric (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst) 2886e8d8bef9SDimitry Andric return false; 28870b57cec5SDimitry Andric return true; 28880b57cec5SDimitry Andric } 28890b57cec5SDimitry Andric 28908bcb0991SDimitry Andric /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction. 28918bcb0991SDimitry Andric /// WindowsCFI requires that only consecutive registers can be paired. 28928bcb0991SDimitry Andric /// LR and FP need to be allocated together when the frame needs to save 28938bcb0991SDimitry Andric /// the frame-record. This means any other register pairing with LR is invalid. 28948bcb0991SDimitry Andric static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, 2895e8d8bef9SDimitry Andric bool UsesWinAAPCS, bool NeedsWinCFI, 2896bdd1243dSDimitry Andric bool NeedsFrameRecord, bool IsFirst, 2897bdd1243dSDimitry Andric const TargetRegisterInfo *TRI) { 2898480093f4SDimitry Andric if (UsesWinAAPCS) 2899bdd1243dSDimitry Andric return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst, 2900bdd1243dSDimitry Andric TRI); 29018bcb0991SDimitry Andric 29028bcb0991SDimitry Andric // If we need to store the frame record, don't pair any register 29038bcb0991SDimitry Andric // with LR other than FP. 29048bcb0991SDimitry Andric if (NeedsFrameRecord) 29058bcb0991SDimitry Andric return Reg2 == AArch64::LR; 29068bcb0991SDimitry Andric 29078bcb0991SDimitry Andric return false; 29088bcb0991SDimitry Andric } 29098bcb0991SDimitry Andric 29100b57cec5SDimitry Andric namespace { 29110b57cec5SDimitry Andric 29120b57cec5SDimitry Andric struct RegPairInfo { 29130b57cec5SDimitry Andric unsigned Reg1 = AArch64::NoRegister; 29140b57cec5SDimitry Andric unsigned Reg2 = AArch64::NoRegister; 29150b57cec5SDimitry Andric int FrameIdx; 29160b57cec5SDimitry Andric int Offset; 29170fca6ea1SDimitry Andric enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type; 29180b57cec5SDimitry Andric 29190b57cec5SDimitry Andric RegPairInfo() = default; 29200b57cec5SDimitry Andric 29210b57cec5SDimitry Andric bool isPaired() const { return Reg2 != AArch64::NoRegister; } 2922480093f4SDimitry Andric 2923480093f4SDimitry Andric unsigned getScale() const { 2924480093f4SDimitry Andric switch (Type) { 2925480093f4SDimitry Andric case PPR: 2926480093f4SDimitry Andric return 2; 2927480093f4SDimitry Andric case GPR: 2928480093f4SDimitry Andric case FPR64: 29290fca6ea1SDimitry Andric case VG: 2930480093f4SDimitry Andric return 8; 2931480093f4SDimitry Andric case ZPR: 2932480093f4SDimitry Andric case FPR128: 2933480093f4SDimitry Andric return 16; 2934480093f4SDimitry Andric } 2935480093f4SDimitry Andric llvm_unreachable("Unsupported type"); 2936480093f4SDimitry Andric } 2937480093f4SDimitry Andric 2938480093f4SDimitry Andric bool isScalable() const { return Type == PPR || Type == ZPR; } 29390b57cec5SDimitry Andric }; 29400b57cec5SDimitry Andric 29410b57cec5SDimitry Andric } // end anonymous namespace 29420b57cec5SDimitry Andric 29430b57cec5SDimitry Andric static void computeCalleeSaveRegisterPairs( 29445ffd83dbSDimitry Andric MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI, 29450b57cec5SDimitry Andric const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs, 294681ad6265SDimitry Andric bool NeedsFrameRecord) { 29470b57cec5SDimitry Andric 29480b57cec5SDimitry Andric if (CSI.empty()) 29490b57cec5SDimitry Andric return; 29500b57cec5SDimitry Andric 2951480093f4SDimitry Andric bool IsWindows = isTargetWindows(MF); 29520b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 29530b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 29540b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 29550b57cec5SDimitry Andric CallingConv::ID CC = MF.getFunction().getCallingConv(); 29560b57cec5SDimitry Andric unsigned Count = CSI.size(); 29570b57cec5SDimitry Andric (void)CC; 29580b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 29590b57cec5SDimitry Andric // pairs. 2960bdd1243dSDimitry Andric assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || 296106c3fb27SDimitry Andric CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS || 296206c3fb27SDimitry Andric CC == CallingConv::Win64 || (Count & 1) == 0) && 29630b57cec5SDimitry Andric "Odd number of callee-saved regs to spill!"); 2964480093f4SDimitry Andric int ByteOffset = AFI->getCalleeSavedStackSize(); 2965e8d8bef9SDimitry Andric int StackFillDir = -1; 2966e8d8bef9SDimitry Andric int RegInc = 1; 2967e8d8bef9SDimitry Andric unsigned FirstReg = 0; 2968e8d8bef9SDimitry Andric if (NeedsWinCFI) { 2969e8d8bef9SDimitry Andric // For WinCFI, fill the stack from the bottom up. 2970e8d8bef9SDimitry Andric ByteOffset = 0; 2971e8d8bef9SDimitry Andric StackFillDir = 1; 2972e8d8bef9SDimitry Andric // As the CSI array is reversed to match PrologEpilogInserter, iterate 2973e8d8bef9SDimitry Andric // backwards, to pair up registers starting from lower numbered registers. 2974e8d8bef9SDimitry Andric RegInc = -1; 2975e8d8bef9SDimitry Andric FirstReg = Count - 1; 2976e8d8bef9SDimitry Andric } 2977480093f4SDimitry Andric int ScalableByteOffset = AFI->getSVECalleeSavedStackSize(); 2978fe6060f1SDimitry Andric bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace(); 29790fca6ea1SDimitry Andric Register LastReg = 0; 298075b4d546SDimitry Andric 2981e8d8bef9SDimitry Andric // When iterating backwards, the loop condition relies on unsigned wraparound. 2982e8d8bef9SDimitry Andric for (unsigned i = FirstReg; i < Count; i += RegInc) { 29830b57cec5SDimitry Andric RegPairInfo RPI; 29840b57cec5SDimitry Andric RPI.Reg1 = CSI[i].getReg(); 29850b57cec5SDimitry Andric 29860b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(RPI.Reg1)) 29870b57cec5SDimitry Andric RPI.Type = RegPairInfo::GPR; 29880b57cec5SDimitry Andric else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) 29890b57cec5SDimitry Andric RPI.Type = RegPairInfo::FPR64; 29900b57cec5SDimitry Andric else if (AArch64::FPR128RegClass.contains(RPI.Reg1)) 29910b57cec5SDimitry Andric RPI.Type = RegPairInfo::FPR128; 2992480093f4SDimitry Andric else if (AArch64::ZPRRegClass.contains(RPI.Reg1)) 2993480093f4SDimitry Andric RPI.Type = RegPairInfo::ZPR; 2994480093f4SDimitry Andric else if (AArch64::PPRRegClass.contains(RPI.Reg1)) 2995480093f4SDimitry Andric RPI.Type = RegPairInfo::PPR; 29960fca6ea1SDimitry Andric else if (RPI.Reg1 == AArch64::VG) 29970fca6ea1SDimitry Andric RPI.Type = RegPairInfo::VG; 29980b57cec5SDimitry Andric else 29990b57cec5SDimitry Andric llvm_unreachable("Unsupported register class."); 30000b57cec5SDimitry Andric 30010fca6ea1SDimitry Andric // Add the stack hazard size as we transition from GPR->FPR CSRs. 30020fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex() && 30030fca6ea1SDimitry Andric (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && 30040fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(RPI.Reg1)) 30050fca6ea1SDimitry Andric ByteOffset += StackFillDir * StackHazardSize; 30060fca6ea1SDimitry Andric LastReg = RPI.Reg1; 30070fca6ea1SDimitry Andric 30080b57cec5SDimitry Andric // Add the next reg to the pair if it is in the same register class. 30090fca6ea1SDimitry Andric if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) { 301004eeddc0SDimitry Andric Register NextReg = CSI[i + RegInc].getReg(); 3011e8d8bef9SDimitry Andric bool IsFirst = i == FirstReg; 30120b57cec5SDimitry Andric switch (RPI.Type) { 30130b57cec5SDimitry Andric case RegPairInfo::GPR: 30140b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(NextReg) && 3015e8d8bef9SDimitry Andric !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, 3016bdd1243dSDimitry Andric NeedsWinCFI, NeedsFrameRecord, IsFirst, 3017bdd1243dSDimitry Andric TRI)) 30180b57cec5SDimitry Andric RPI.Reg2 = NextReg; 30190b57cec5SDimitry Andric break; 30200b57cec5SDimitry Andric case RegPairInfo::FPR64: 30210b57cec5SDimitry Andric if (AArch64::FPR64RegClass.contains(NextReg) && 3022e8d8bef9SDimitry Andric !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, 3023bdd1243dSDimitry Andric IsFirst, TRI)) 30240b57cec5SDimitry Andric RPI.Reg2 = NextReg; 30250b57cec5SDimitry Andric break; 30260b57cec5SDimitry Andric case RegPairInfo::FPR128: 30270b57cec5SDimitry Andric if (AArch64::FPR128RegClass.contains(NextReg)) 30280b57cec5SDimitry Andric RPI.Reg2 = NextReg; 30290b57cec5SDimitry Andric break; 3030480093f4SDimitry Andric case RegPairInfo::PPR: 30310fca6ea1SDimitry Andric break; 3032480093f4SDimitry Andric case RegPairInfo::ZPR: 30330fca6ea1SDimitry Andric if (AFI->getPredicateRegForFillSpill() != 0) 30340fca6ea1SDimitry Andric if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) 30350fca6ea1SDimitry Andric RPI.Reg2 = NextReg; 30360fca6ea1SDimitry Andric break; 30370fca6ea1SDimitry Andric case RegPairInfo::VG: 3038480093f4SDimitry Andric break; 30390b57cec5SDimitry Andric } 30400b57cec5SDimitry Andric } 30410b57cec5SDimitry Andric 30420b57cec5SDimitry Andric // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 30430b57cec5SDimitry Andric // list to come in sorted by frame index so that we can issue the store 30440b57cec5SDimitry Andric // pair instructions directly. Assert if we see anything otherwise. 30450b57cec5SDimitry Andric // 30460b57cec5SDimitry Andric // The order of the registers in the list is controlled by 30470b57cec5SDimitry Andric // getCalleeSavedRegs(), so they will always be in-order, as well. 30480b57cec5SDimitry Andric assert((!RPI.isPaired() || 3049e8d8bef9SDimitry Andric (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) && 30500b57cec5SDimitry Andric "Out of order callee saved regs!"); 30510b57cec5SDimitry Andric 30528bcb0991SDimitry Andric assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP || 30538bcb0991SDimitry Andric RPI.Reg1 == AArch64::LR) && 30548bcb0991SDimitry Andric "FrameRecord must be allocated together with LR"); 30558bcb0991SDimitry Andric 3056480093f4SDimitry Andric // Windows AAPCS has FP and LR reversed. 3057480093f4SDimitry Andric assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP || 3058480093f4SDimitry Andric RPI.Reg2 == AArch64::LR) && 3059480093f4SDimitry Andric "FrameRecord must be allocated together with LR"); 3060480093f4SDimitry Andric 30610b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 30620b57cec5SDimitry Andric // adjacent register pairs. 3063bdd1243dSDimitry Andric assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || 306406c3fb27SDimitry Andric CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS || 306506c3fb27SDimitry Andric CC == CallingConv::Win64 || 30660b57cec5SDimitry Andric (RPI.isPaired() && 30670b57cec5SDimitry Andric ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 30680b57cec5SDimitry Andric RPI.Reg1 + 1 == RPI.Reg2))) && 30690b57cec5SDimitry Andric "Callee-save registers not saved as adjacent register pair!"); 30700b57cec5SDimitry Andric 30710b57cec5SDimitry Andric RPI.FrameIdx = CSI[i].getFrameIdx(); 3072e8d8bef9SDimitry Andric if (NeedsWinCFI && 3073e8d8bef9SDimitry Andric RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair 3074e8d8bef9SDimitry Andric RPI.FrameIdx = CSI[i + RegInc].getFrameIdx(); 3075480093f4SDimitry Andric int Scale = RPI.getScale(); 3076e8d8bef9SDimitry Andric 3077e8d8bef9SDimitry Andric int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset; 3078e8d8bef9SDimitry Andric assert(OffsetPre % Scale == 0); 3079e8d8bef9SDimitry Andric 3080480093f4SDimitry Andric if (RPI.isScalable()) 30810fca6ea1SDimitry Andric ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); 3082480093f4SDimitry Andric else 3083e8d8bef9SDimitry Andric ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); 3084480093f4SDimitry Andric 3085fe6060f1SDimitry Andric // Swift's async context is directly before FP, so allocate an extra 3086fe6060f1SDimitry Andric // 8 bytes for it. 3087fe6060f1SDimitry Andric if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() && 30885f757f3fSDimitry Andric ((!IsWindows && RPI.Reg2 == AArch64::FP) || 30895f757f3fSDimitry Andric (IsWindows && RPI.Reg2 == AArch64::LR))) 3090fe6060f1SDimitry Andric ByteOffset += StackFillDir * 8; 3091fe6060f1SDimitry Andric 30920b57cec5SDimitry Andric // Round up size of non-pair to pair size if we need to pad the 30930b57cec5SDimitry Andric // callee-save area to ensure 16-byte alignment. 30940fca6ea1SDimitry Andric if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() && 30950fca6ea1SDimitry Andric RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() && 30960fca6ea1SDimitry Andric ByteOffset % 16 != 0) { 3097e8d8bef9SDimitry Andric ByteOffset += 8 * StackFillDir; 30985ffd83dbSDimitry Andric assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16)); 3099e8d8bef9SDimitry Andric // A stack frame with a gap looks like this, bottom up: 3100e8d8bef9SDimitry Andric // d9, d8. x21, gap, x20, x19. 3101fe6060f1SDimitry Andric // Set extra alignment on the x21 object to create the gap above it. 31025ffd83dbSDimitry Andric MFI.setObjectAlignment(RPI.FrameIdx, Align(16)); 3103fe6060f1SDimitry Andric NeedGapToAlignStack = false; 31040b57cec5SDimitry Andric } 31050b57cec5SDimitry Andric 3106e8d8bef9SDimitry Andric int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset; 3107e8d8bef9SDimitry Andric assert(OffsetPost % Scale == 0); 3108e8d8bef9SDimitry Andric // If filling top down (default), we want the offset after incrementing it. 31095f757f3fSDimitry Andric // If filling bottom up (WinCFI) we need the original offset. 3110e8d8bef9SDimitry Andric int Offset = NeedsWinCFI ? OffsetPre : OffsetPost; 3111fe6060f1SDimitry Andric 3112fe6060f1SDimitry Andric // The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the 3113fe6060f1SDimitry Andric // Swift context can directly precede FP. 3114fe6060f1SDimitry Andric if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() && 31155f757f3fSDimitry Andric ((!IsWindows && RPI.Reg2 == AArch64::FP) || 31165f757f3fSDimitry Andric (IsWindows && RPI.Reg2 == AArch64::LR))) 3117fe6060f1SDimitry Andric Offset += 8; 31180b57cec5SDimitry Andric RPI.Offset = Offset / Scale; 3119480093f4SDimitry Andric 31200fca6ea1SDimitry Andric assert((!RPI.isPaired() || 31210fca6ea1SDimitry Andric (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) || 3122480093f4SDimitry Andric (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) && 31230b57cec5SDimitry Andric "Offset out of bounds for LDP/STP immediate"); 31240b57cec5SDimitry Andric 3125e8d8bef9SDimitry Andric // Save the offset to frame record so that the FP register can point to the 3126e8d8bef9SDimitry Andric // innermost frame record (spilled FP and LR registers). 31270fca6ea1SDimitry Andric if (NeedsFrameRecord && 31280fca6ea1SDimitry Andric ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 31290fca6ea1SDimitry Andric (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR))) 3130e8d8bef9SDimitry Andric AFI->setCalleeSaveBaseToFrameRecordOffset(Offset); 3131e8d8bef9SDimitry Andric 31320b57cec5SDimitry Andric RegPairs.push_back(RPI); 31330b57cec5SDimitry Andric if (RPI.isPaired()) 3134e8d8bef9SDimitry Andric i += RegInc; 3135e8d8bef9SDimitry Andric } 3136e8d8bef9SDimitry Andric if (NeedsWinCFI) { 3137e8d8bef9SDimitry Andric // If we need an alignment gap in the stack, align the topmost stack 3138e8d8bef9SDimitry Andric // object. A stack frame with a gap looks like this, bottom up: 3139e8d8bef9SDimitry Andric // x19, d8. d9, gap. 3140e8d8bef9SDimitry Andric // Set extra alignment on the topmost stack object (the first element in 3141e8d8bef9SDimitry Andric // CSI, which goes top down), to create the gap above it. 3142e8d8bef9SDimitry Andric if (AFI->hasCalleeSaveStackFreeSpace()) 3143e8d8bef9SDimitry Andric MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16)); 3144e8d8bef9SDimitry Andric // We iterated bottom up over the registers; flip RegPairs back to top 3145e8d8bef9SDimitry Andric // down order. 3146e8d8bef9SDimitry Andric std::reverse(RegPairs.begin(), RegPairs.end()); 31470b57cec5SDimitry Andric } 31480b57cec5SDimitry Andric } 31490b57cec5SDimitry Andric 31500b57cec5SDimitry Andric bool AArch64FrameLowering::spillCalleeSavedRegisters( 31510b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 31525ffd83dbSDimitry Andric ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 31530b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 31540b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 31550fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 31560b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 31570b57cec5SDimitry Andric DebugLoc DL; 31580b57cec5SDimitry Andric SmallVector<RegPairInfo, 8> RegPairs; 31590b57cec5SDimitry Andric 316081ad6265SDimitry Andric computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); 316181ad6265SDimitry Andric 31620fca6ea1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 31630fca6ea1SDimitry Andric // Refresh the reserved regs in case there are any potential changes since the 31640fca6ea1SDimitry Andric // last freeze. 31650fca6ea1SDimitry Andric MRI.freezeReservedRegs(); 31660fca6ea1SDimitry Andric 3167fe6060f1SDimitry Andric if (homogeneousPrologEpilog(MF)) { 3168fe6060f1SDimitry Andric auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog)) 3169fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 3170fe6060f1SDimitry Andric 3171fe6060f1SDimitry Andric for (auto &RPI : RegPairs) { 3172fe6060f1SDimitry Andric MIB.addReg(RPI.Reg1); 3173fe6060f1SDimitry Andric MIB.addReg(RPI.Reg2); 3174fe6060f1SDimitry Andric 3175fe6060f1SDimitry Andric // Update register live in. 3176fe6060f1SDimitry Andric if (!MRI.isReserved(RPI.Reg1)) 3177fe6060f1SDimitry Andric MBB.addLiveIn(RPI.Reg1); 31785f757f3fSDimitry Andric if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2)) 3179fe6060f1SDimitry Andric MBB.addLiveIn(RPI.Reg2); 3180fe6060f1SDimitry Andric } 3181fe6060f1SDimitry Andric return true; 3182fe6060f1SDimitry Andric } 31830fca6ea1SDimitry Andric bool PTrueCreated = false; 3184349cc55cSDimitry Andric for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) { 31850b57cec5SDimitry Andric unsigned Reg1 = RPI.Reg1; 31860b57cec5SDimitry Andric unsigned Reg2 = RPI.Reg2; 31870b57cec5SDimitry Andric unsigned StrOpc; 31880b57cec5SDimitry Andric 31890b57cec5SDimitry Andric // Issue sequence of spills for cs regs. The first spill may be converted 31900b57cec5SDimitry Andric // to a pre-decrement store later by emitPrologue if the callee-save stack 31910b57cec5SDimitry Andric // area allocation can't be combined with the local stack area allocation. 31920b57cec5SDimitry Andric // For example: 31930b57cec5SDimitry Andric // stp x22, x21, [sp, #0] // addImm(+0) 31940b57cec5SDimitry Andric // stp x20, x19, [sp, #16] // addImm(+2) 31950b57cec5SDimitry Andric // stp fp, lr, [sp, #32] // addImm(+4) 31960b57cec5SDimitry Andric // Rationale: This sequence saves uop updates compared to a sequence of 31970b57cec5SDimitry Andric // pre-increment spills like stp xi,xj,[sp,#-16]! 31980b57cec5SDimitry Andric // Note: Similar rationale and sequence for restores in epilog. 31995ffd83dbSDimitry Andric unsigned Size; 32005ffd83dbSDimitry Andric Align Alignment; 32010b57cec5SDimitry Andric switch (RPI.Type) { 32020b57cec5SDimitry Andric case RegPairInfo::GPR: 32030b57cec5SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 32040b57cec5SDimitry Andric Size = 8; 32055ffd83dbSDimitry Andric Alignment = Align(8); 32060b57cec5SDimitry Andric break; 32070b57cec5SDimitry Andric case RegPairInfo::FPR64: 32080b57cec5SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 32090b57cec5SDimitry Andric Size = 8; 32105ffd83dbSDimitry Andric Alignment = Align(8); 32110b57cec5SDimitry Andric break; 32120b57cec5SDimitry Andric case RegPairInfo::FPR128: 32130b57cec5SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui; 32140b57cec5SDimitry Andric Size = 16; 32155ffd83dbSDimitry Andric Alignment = Align(16); 32160b57cec5SDimitry Andric break; 3217480093f4SDimitry Andric case RegPairInfo::ZPR: 32180fca6ea1SDimitry Andric StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI; 3219480093f4SDimitry Andric Size = 16; 32205ffd83dbSDimitry Andric Alignment = Align(16); 3221480093f4SDimitry Andric break; 3222480093f4SDimitry Andric case RegPairInfo::PPR: 3223480093f4SDimitry Andric StrOpc = AArch64::STR_PXI; 3224480093f4SDimitry Andric Size = 2; 32255ffd83dbSDimitry Andric Alignment = Align(2); 3226480093f4SDimitry Andric break; 32270fca6ea1SDimitry Andric case RegPairInfo::VG: 32280fca6ea1SDimitry Andric StrOpc = AArch64::STRXui; 32290fca6ea1SDimitry Andric Size = 8; 32300fca6ea1SDimitry Andric Alignment = Align(8); 32310fca6ea1SDimitry Andric break; 32320b57cec5SDimitry Andric } 32330fca6ea1SDimitry Andric 32340fca6ea1SDimitry Andric unsigned X0Scratch = AArch64::NoRegister; 32350fca6ea1SDimitry Andric if (Reg1 == AArch64::VG) { 32360fca6ea1SDimitry Andric // Find an available register to store value of VG to. 32370fca6ea1SDimitry Andric Reg1 = findScratchNonCalleeSaveRegister(&MBB); 32380fca6ea1SDimitry Andric assert(Reg1 != AArch64::NoRegister); 32390fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 32400fca6ea1SDimitry Andric 32410fca6ea1SDimitry Andric if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() && 32420fca6ea1SDimitry Andric AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) { 32430fca6ea1SDimitry Andric // For locally-streaming functions, we need to store both the streaming 32440fca6ea1SDimitry Andric // & non-streaming VG. Spill the streaming value first. 32450fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1) 32460fca6ea1SDimitry Andric .addImm(1) 32470fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32480fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1) 32490fca6ea1SDimitry Andric .addReg(Reg1) 32500fca6ea1SDimitry Andric .addImm(3) 32510fca6ea1SDimitry Andric .addImm(63) 32520fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32530fca6ea1SDimitry Andric 32540fca6ea1SDimitry Andric AFI->setStreamingVGIdx(RPI.FrameIdx); 32550fca6ea1SDimitry Andric } else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) { 32560fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1) 32570fca6ea1SDimitry Andric .addImm(31) 32580fca6ea1SDimitry Andric .addImm(1) 32590fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32600fca6ea1SDimitry Andric AFI->setVGIdx(RPI.FrameIdx); 32610fca6ea1SDimitry Andric } else { 32620fca6ea1SDimitry Andric const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 32630fca6ea1SDimitry Andric if (llvm::any_of( 32640fca6ea1SDimitry Andric MBB.liveins(), 32650fca6ea1SDimitry Andric [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { 32660fca6ea1SDimitry Andric return STI.getRegisterInfo()->isSuperOrSubRegisterEq( 32670fca6ea1SDimitry Andric AArch64::X0, LiveIn.PhysReg); 32680fca6ea1SDimitry Andric })) 32690fca6ea1SDimitry Andric X0Scratch = Reg1; 32700fca6ea1SDimitry Andric 32710fca6ea1SDimitry Andric if (X0Scratch != AArch64::NoRegister) 32720fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1) 32730fca6ea1SDimitry Andric .addReg(AArch64::XZR) 32740fca6ea1SDimitry Andric .addReg(AArch64::X0, RegState::Undef) 32750fca6ea1SDimitry Andric .addReg(AArch64::X0, RegState::Implicit) 32760fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32770fca6ea1SDimitry Andric 32780fca6ea1SDimitry Andric const uint32_t *RegMask = TRI->getCallPreservedMask( 32790fca6ea1SDimitry Andric MF, 32800fca6ea1SDimitry Andric CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1); 32810fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::BL)) 32820fca6ea1SDimitry Andric .addExternalSymbol("__arm_get_current_vg") 32830fca6ea1SDimitry Andric .addRegMask(RegMask) 32840fca6ea1SDimitry Andric .addReg(AArch64::X0, RegState::ImplicitDefine) 32850fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 32860fca6ea1SDimitry Andric Reg1 = AArch64::X0; 32870fca6ea1SDimitry Andric AFI->setVGIdx(RPI.FrameIdx); 32880fca6ea1SDimitry Andric } 32890fca6ea1SDimitry Andric } 32900fca6ea1SDimitry Andric 32910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); 32920b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); 32930b57cec5SDimitry Andric dbgs() << ") -> fi#(" << RPI.FrameIdx; 32940b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; 32950b57cec5SDimitry Andric dbgs() << ")\n"); 32960b57cec5SDimitry Andric 32970b57cec5SDimitry Andric assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && 32980b57cec5SDimitry Andric "Windows unwdinding requires a consecutive (FP,LR) pair"); 32990b57cec5SDimitry Andric // Windows unwind codes require consecutive registers if registers are 33000b57cec5SDimitry Andric // paired. Make the switch here, so that the code below will save (x,x+1) 33010b57cec5SDimitry Andric // and not (x+1,x). 33020b57cec5SDimitry Andric unsigned FrameIdxReg1 = RPI.FrameIdx; 33030b57cec5SDimitry Andric unsigned FrameIdxReg2 = RPI.FrameIdx + 1; 33040b57cec5SDimitry Andric if (NeedsWinCFI && RPI.isPaired()) { 33050b57cec5SDimitry Andric std::swap(Reg1, Reg2); 33060b57cec5SDimitry Andric std::swap(FrameIdxReg1, FrameIdxReg2); 33070b57cec5SDimitry Andric } 33080fca6ea1SDimitry Andric 33090fca6ea1SDimitry Andric if (RPI.isPaired() && RPI.isScalable()) { 33100fca6ea1SDimitry Andric [[maybe_unused]] const AArch64Subtarget &Subtarget = 33110fca6ea1SDimitry Andric MF.getSubtarget<AArch64Subtarget>(); 33120fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 33130fca6ea1SDimitry Andric unsigned PnReg = AFI->getPredicateRegForFillSpill(); 33140fca6ea1SDimitry Andric assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) && 33150fca6ea1SDimitry Andric "Expects SVE2.1 or SME2 target and a predicate register"); 33160fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS 33170fca6ea1SDimitry Andric auto IsPPR = [](const RegPairInfo &c) { 33180fca6ea1SDimitry Andric return c.Reg1 == RegPairInfo::PPR; 33190fca6ea1SDimitry Andric }; 33200fca6ea1SDimitry Andric auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); 33210fca6ea1SDimitry Andric auto IsZPR = [](const RegPairInfo &c) { 33220fca6ea1SDimitry Andric return c.Type == RegPairInfo::ZPR; 33230fca6ea1SDimitry Andric }; 33240fca6ea1SDimitry Andric auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); 33250fca6ea1SDimitry Andric assert(!(PPRBegin < ZPRBegin) && 33260fca6ea1SDimitry Andric "Expected callee save predicate to be handled first"); 33270fca6ea1SDimitry Andric #endif 33280fca6ea1SDimitry Andric if (!PTrueCreated) { 33290fca6ea1SDimitry Andric PTrueCreated = true; 33300fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::PTRUE_C_B), PnReg) 33310fca6ea1SDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 33320fca6ea1SDimitry Andric } 33330fca6ea1SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 33340fca6ea1SDimitry Andric if (!MRI.isReserved(Reg1)) 33350fca6ea1SDimitry Andric MBB.addLiveIn(Reg1); 33360fca6ea1SDimitry Andric if (!MRI.isReserved(Reg2)) 33370fca6ea1SDimitry Andric MBB.addLiveIn(Reg2); 33380fca6ea1SDimitry Andric MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0)); 33390fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33400fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 33410fca6ea1SDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33420fca6ea1SDimitry Andric MIB.addReg(PnReg); 33430fca6ea1SDimitry Andric MIB.addReg(AArch64::SP) 33440fca6ea1SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale], 33450fca6ea1SDimitry Andric // where factor*scale is implicit 33460fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 33470fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33480fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 33490fca6ea1SDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33500fca6ea1SDimitry Andric if (NeedsWinCFI) 33510fca6ea1SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameSetup); 33520fca6ea1SDimitry Andric } else { // The code when the pair of ZReg is not present 33530b57cec5SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 33540b57cec5SDimitry Andric if (!MRI.isReserved(Reg1)) 33550b57cec5SDimitry Andric MBB.addLiveIn(Reg1); 33560b57cec5SDimitry Andric if (RPI.isPaired()) { 33570b57cec5SDimitry Andric if (!MRI.isReserved(Reg2)) 33580b57cec5SDimitry Andric MBB.addLiveIn(Reg2); 33590b57cec5SDimitry Andric MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); 33600b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33610b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 33625ffd83dbSDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33630b57cec5SDimitry Andric } 33640b57cec5SDimitry Andric MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 33650b57cec5SDimitry Andric .addReg(AArch64::SP) 33660b57cec5SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale], 33670b57cec5SDimitry Andric // where factor*scale is implicit 33680b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 33690b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 33700b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 33715ffd83dbSDimitry Andric MachineMemOperand::MOStore, Size, Alignment)); 33720b57cec5SDimitry Andric if (NeedsWinCFI) 33730b57cec5SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameSetup); 33740fca6ea1SDimitry Andric } 3375480093f4SDimitry Andric // Update the StackIDs of the SVE stack slots. 3376480093f4SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 33770fca6ea1SDimitry Andric if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) { 33780fca6ea1SDimitry Andric MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector); 33790fca6ea1SDimitry Andric if (RPI.isPaired()) 33800fca6ea1SDimitry Andric MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector); 33810fca6ea1SDimitry Andric } 3382480093f4SDimitry Andric 33830fca6ea1SDimitry Andric if (X0Scratch != AArch64::NoRegister) 33840fca6ea1SDimitry Andric BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0) 33850fca6ea1SDimitry Andric .addReg(AArch64::XZR) 33860fca6ea1SDimitry Andric .addReg(X0Scratch, RegState::Undef) 33870fca6ea1SDimitry Andric .addReg(X0Scratch, RegState::Implicit) 33880fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 33890b57cec5SDimitry Andric } 33900b57cec5SDimitry Andric return true; 33910b57cec5SDimitry Andric } 33920b57cec5SDimitry Andric 33930b57cec5SDimitry Andric bool AArch64FrameLowering::restoreCalleeSavedRegisters( 339481ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 33955ffd83dbSDimitry Andric MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 33960b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 33970b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 33980b57cec5SDimitry Andric DebugLoc DL; 33990b57cec5SDimitry Andric SmallVector<RegPairInfo, 8> RegPairs; 34000b57cec5SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 34010b57cec5SDimitry Andric 340281ad6265SDimitry Andric if (MBBI != MBB.end()) 340381ad6265SDimitry Andric DL = MBBI->getDebugLoc(); 34040b57cec5SDimitry Andric 340581ad6265SDimitry Andric computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); 34060fca6ea1SDimitry Andric if (homogeneousPrologEpilog(MF, &MBB)) { 34070fca6ea1SDimitry Andric auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) 34080fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 34090fca6ea1SDimitry Andric for (auto &RPI : RegPairs) { 34100fca6ea1SDimitry Andric MIB.addReg(RPI.Reg1, RegState::Define); 34110fca6ea1SDimitry Andric MIB.addReg(RPI.Reg2, RegState::Define); 34120fca6ea1SDimitry Andric } 34130fca6ea1SDimitry Andric return true; 34140fca6ea1SDimitry Andric } 34150b57cec5SDimitry Andric 34160fca6ea1SDimitry Andric // For performance reasons restore SVE register in increasing order 34170fca6ea1SDimitry Andric auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; 34180fca6ea1SDimitry Andric auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); 34190fca6ea1SDimitry Andric auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR); 34200fca6ea1SDimitry Andric std::reverse(PPRBegin, PPREnd); 34210fca6ea1SDimitry Andric auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; }; 34220fca6ea1SDimitry Andric auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); 34230fca6ea1SDimitry Andric auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR); 34240fca6ea1SDimitry Andric std::reverse(ZPRBegin, ZPREnd); 34250fca6ea1SDimitry Andric 34260fca6ea1SDimitry Andric bool PTrueCreated = false; 34270fca6ea1SDimitry Andric for (const RegPairInfo &RPI : RegPairs) { 34280b57cec5SDimitry Andric unsigned Reg1 = RPI.Reg1; 34290b57cec5SDimitry Andric unsigned Reg2 = RPI.Reg2; 34300b57cec5SDimitry Andric 34310b57cec5SDimitry Andric // Issue sequence of restores for cs regs. The last restore may be converted 34320b57cec5SDimitry Andric // to a post-increment load later by emitEpilogue if the callee-save stack 34330b57cec5SDimitry Andric // area allocation can't be combined with the local stack area allocation. 34340b57cec5SDimitry Andric // For example: 34350b57cec5SDimitry Andric // ldp fp, lr, [sp, #32] // addImm(+4) 34360b57cec5SDimitry Andric // ldp x20, x19, [sp, #16] // addImm(+2) 34370b57cec5SDimitry Andric // ldp x22, x21, [sp, #0] // addImm(+0) 34380b57cec5SDimitry Andric // Note: see comment in spillCalleeSavedRegisters() 34390b57cec5SDimitry Andric unsigned LdrOpc; 34405ffd83dbSDimitry Andric unsigned Size; 34415ffd83dbSDimitry Andric Align Alignment; 34420b57cec5SDimitry Andric switch (RPI.Type) { 34430b57cec5SDimitry Andric case RegPairInfo::GPR: 34440b57cec5SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 34450b57cec5SDimitry Andric Size = 8; 34465ffd83dbSDimitry Andric Alignment = Align(8); 34470b57cec5SDimitry Andric break; 34480b57cec5SDimitry Andric case RegPairInfo::FPR64: 34490b57cec5SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 34500b57cec5SDimitry Andric Size = 8; 34515ffd83dbSDimitry Andric Alignment = Align(8); 34520b57cec5SDimitry Andric break; 34530b57cec5SDimitry Andric case RegPairInfo::FPR128: 34540b57cec5SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui; 34550b57cec5SDimitry Andric Size = 16; 34565ffd83dbSDimitry Andric Alignment = Align(16); 34570b57cec5SDimitry Andric break; 3458480093f4SDimitry Andric case RegPairInfo::ZPR: 34590fca6ea1SDimitry Andric LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI; 3460480093f4SDimitry Andric Size = 16; 34615ffd83dbSDimitry Andric Alignment = Align(16); 3462480093f4SDimitry Andric break; 3463480093f4SDimitry Andric case RegPairInfo::PPR: 3464480093f4SDimitry Andric LdrOpc = AArch64::LDR_PXI; 3465480093f4SDimitry Andric Size = 2; 34665ffd83dbSDimitry Andric Alignment = Align(2); 3467480093f4SDimitry Andric break; 34680fca6ea1SDimitry Andric case RegPairInfo::VG: 34690fca6ea1SDimitry Andric continue; 34700b57cec5SDimitry Andric } 34710b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); 34720b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); 34730b57cec5SDimitry Andric dbgs() << ") -> fi#(" << RPI.FrameIdx; 34740b57cec5SDimitry Andric if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; 34750b57cec5SDimitry Andric dbgs() << ")\n"); 34760b57cec5SDimitry Andric 34770b57cec5SDimitry Andric // Windows unwind codes require consecutive registers if registers are 34780b57cec5SDimitry Andric // paired. Make the switch here, so that the code below will save (x,x+1) 34790b57cec5SDimitry Andric // and not (x+1,x). 34800b57cec5SDimitry Andric unsigned FrameIdxReg1 = RPI.FrameIdx; 34810b57cec5SDimitry Andric unsigned FrameIdxReg2 = RPI.FrameIdx + 1; 34820b57cec5SDimitry Andric if (NeedsWinCFI && RPI.isPaired()) { 34830b57cec5SDimitry Andric std::swap(Reg1, Reg2); 34840b57cec5SDimitry Andric std::swap(FrameIdxReg1, FrameIdxReg2); 34850b57cec5SDimitry Andric } 34860fca6ea1SDimitry Andric 34870fca6ea1SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 34880fca6ea1SDimitry Andric if (RPI.isPaired() && RPI.isScalable()) { 34890fca6ea1SDimitry Andric [[maybe_unused]] const AArch64Subtarget &Subtarget = 34900fca6ea1SDimitry Andric MF.getSubtarget<AArch64Subtarget>(); 34910fca6ea1SDimitry Andric unsigned PnReg = AFI->getPredicateRegForFillSpill(); 34920fca6ea1SDimitry Andric assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) && 34930fca6ea1SDimitry Andric "Expects SVE2.1 or SME2 target and a predicate register"); 34940fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS 34950fca6ea1SDimitry Andric assert(!(PPRBegin < ZPRBegin) && 34960fca6ea1SDimitry Andric "Expected callee save predicate to be handled first"); 34970fca6ea1SDimitry Andric #endif 34980fca6ea1SDimitry Andric if (!PTrueCreated) { 34990fca6ea1SDimitry Andric PTrueCreated = true; 35000fca6ea1SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::PTRUE_C_B), PnReg) 35010fca6ea1SDimitry Andric .setMIFlags(MachineInstr::FrameDestroy); 35020fca6ea1SDimitry Andric } 350381ad6265SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc)); 35040fca6ea1SDimitry Andric MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0), 35050fca6ea1SDimitry Andric getDefRegState(true)); 35060b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35070b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 35085ffd83dbSDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35090fca6ea1SDimitry Andric MIB.addReg(PnReg); 35100fca6ea1SDimitry Andric MIB.addReg(AArch64::SP) 35110b57cec5SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale] 35120b57cec5SDimitry Andric // where factor*scale is implicit 35130b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 35140b57cec5SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35150b57cec5SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 35165ffd83dbSDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35170b57cec5SDimitry Andric if (NeedsWinCFI) 35180b57cec5SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameDestroy); 351981ad6265SDimitry Andric } else { 35200fca6ea1SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc)); 35210fca6ea1SDimitry Andric if (RPI.isPaired()) { 35220fca6ea1SDimitry Andric MIB.addReg(Reg2, getDefRegState(true)); 35230fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35240fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), 35250fca6ea1SDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35260fca6ea1SDimitry Andric } 35270fca6ea1SDimitry Andric MIB.addReg(Reg1, getDefRegState(true)); 35280fca6ea1SDimitry Andric MIB.addReg(AArch64::SP) 35290fca6ea1SDimitry Andric .addImm(RPI.Offset) // [sp, #offset*scale] 35300fca6ea1SDimitry Andric // where factor*scale is implicit 35310fca6ea1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 35320fca6ea1SDimitry Andric MIB.addMemOperand(MF.getMachineMemOperand( 35330fca6ea1SDimitry Andric MachinePointerInfo::getFixedStack(MF, FrameIdxReg1), 35340fca6ea1SDimitry Andric MachineMemOperand::MOLoad, Size, Alignment)); 35350fca6ea1SDimitry Andric if (NeedsWinCFI) 35360fca6ea1SDimitry Andric InsertSEH(MIB, TII, MachineInstr::FrameDestroy); 35370fca6ea1SDimitry Andric } 35380fca6ea1SDimitry Andric } 35390fca6ea1SDimitry Andric return true; 35400fca6ea1SDimitry Andric } 35410fca6ea1SDimitry Andric 354262987288SDimitry Andric // Return the FrameID for a MMO. 354362987288SDimitry Andric static std::optional<int> getMMOFrameID(MachineMemOperand *MMO, 35440fca6ea1SDimitry Andric const MachineFrameInfo &MFI) { 35450fca6ea1SDimitry Andric auto *PSV = 35460fca6ea1SDimitry Andric dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue()); 35470fca6ea1SDimitry Andric if (PSV) 35480fca6ea1SDimitry Andric return std::optional<int>(PSV->getFrameIndex()); 35490fca6ea1SDimitry Andric 35500fca6ea1SDimitry Andric if (MMO->getValue()) { 35510fca6ea1SDimitry Andric if (auto *Al = dyn_cast<AllocaInst>(getUnderlyingObject(MMO->getValue()))) { 35520fca6ea1SDimitry Andric for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); 35530fca6ea1SDimitry Andric FI++) 35540fca6ea1SDimitry Andric if (MFI.getObjectAllocation(FI) == Al) 35550fca6ea1SDimitry Andric return FI; 355681ad6265SDimitry Andric } 35570b57cec5SDimitry Andric } 35580b57cec5SDimitry Andric 35590fca6ea1SDimitry Andric return std::nullopt; 35600fca6ea1SDimitry Andric } 35610fca6ea1SDimitry Andric 356262987288SDimitry Andric // Return the FrameID for a Load/Store instruction by looking at the first MMO. 356362987288SDimitry Andric static std::optional<int> getLdStFrameID(const MachineInstr &MI, 356462987288SDimitry Andric const MachineFrameInfo &MFI) { 356562987288SDimitry Andric if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) 356662987288SDimitry Andric return std::nullopt; 356762987288SDimitry Andric 356862987288SDimitry Andric return getMMOFrameID(*MI.memoperands_begin(), MFI); 356962987288SDimitry Andric } 357062987288SDimitry Andric 35710fca6ea1SDimitry Andric // Check if a Hazard slot is needed for the current function, and if so create 35720fca6ea1SDimitry Andric // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex, 35730fca6ea1SDimitry Andric // which can be used to determine if any hazard padding is needed. 35740fca6ea1SDimitry Andric void AArch64FrameLowering::determineStackHazardSlot( 35750fca6ea1SDimitry Andric MachineFunction &MF, BitVector &SavedRegs) const { 35760fca6ea1SDimitry Andric if (StackHazardSize == 0 || StackHazardSize % 16 != 0 || 35770fca6ea1SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex()) 35780fca6ea1SDimitry Andric return; 35790fca6ea1SDimitry Andric 35800fca6ea1SDimitry Andric // Stack hazards are only needed in streaming functions. 35810fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 35820fca6ea1SDimitry Andric if (!StackHazardInNonStreaming && Attrs.hasNonStreamingInterfaceAndBody()) 35830fca6ea1SDimitry Andric return; 35840fca6ea1SDimitry Andric 35850fca6ea1SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 35860fca6ea1SDimitry Andric 35870fca6ea1SDimitry Andric // Add a hazard slot if there are any CSR FPR registers, or are any fp-only 35880fca6ea1SDimitry Andric // stack objects. 35890fca6ea1SDimitry Andric bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) { 35900fca6ea1SDimitry Andric return AArch64::FPR64RegClass.contains(Reg) || 35910fca6ea1SDimitry Andric AArch64::FPR128RegClass.contains(Reg) || 35920fca6ea1SDimitry Andric AArch64::ZPRRegClass.contains(Reg) || 35930fca6ea1SDimitry Andric AArch64::PPRRegClass.contains(Reg); 35940fca6ea1SDimitry Andric }); 35950fca6ea1SDimitry Andric bool HasFPRStackObjects = false; 35960fca6ea1SDimitry Andric if (!HasFPRCSRs) { 35970fca6ea1SDimitry Andric std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd()); 35980fca6ea1SDimitry Andric for (auto &MBB : MF) { 35990fca6ea1SDimitry Andric for (auto &MI : MBB) { 36000fca6ea1SDimitry Andric std::optional<int> FI = getLdStFrameID(MI, MFI); 36010fca6ea1SDimitry Andric if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) { 36020fca6ea1SDimitry Andric if (MFI.getStackID(*FI) == TargetStackID::ScalableVector || 36030fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(MI)) 36040fca6ea1SDimitry Andric FrameObjects[*FI] |= 2; 36050fca6ea1SDimitry Andric else 36060fca6ea1SDimitry Andric FrameObjects[*FI] |= 1; 36070fca6ea1SDimitry Andric } 36080fca6ea1SDimitry Andric } 36090fca6ea1SDimitry Andric } 36100fca6ea1SDimitry Andric HasFPRStackObjects = 36110fca6ea1SDimitry Andric any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; }); 36120fca6ea1SDimitry Andric } 36130fca6ea1SDimitry Andric 36140fca6ea1SDimitry Andric if (HasFPRCSRs || HasFPRStackObjects) { 36150fca6ea1SDimitry Andric int ID = MFI.CreateStackObject(StackHazardSize, Align(16), false); 36160fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size " 36170fca6ea1SDimitry Andric << StackHazardSize << "\n"); 36180fca6ea1SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->setStackHazardSlotIndex(ID); 36190fca6ea1SDimitry Andric } 36200b57cec5SDimitry Andric } 36210b57cec5SDimitry Andric 36220b57cec5SDimitry Andric void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 36230b57cec5SDimitry Andric BitVector &SavedRegs, 36240b57cec5SDimitry Andric RegScavenger *RS) const { 36250b57cec5SDimitry Andric // All calls are tail calls in GHC calling conv, and functions have no 36260b57cec5SDimitry Andric // prologue/epilogue. 36270b57cec5SDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::GHC) 36280b57cec5SDimitry Andric return; 36290b57cec5SDimitry Andric 36300b57cec5SDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 36310b57cec5SDimitry Andric const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 36320b57cec5SDimitry Andric MF.getSubtarget().getRegisterInfo()); 36335ffd83dbSDimitry Andric const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 36340b57cec5SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 36350b57cec5SDimitry Andric unsigned UnspilledCSGPR = AArch64::NoRegister; 36360b57cec5SDimitry Andric unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 36370b57cec5SDimitry Andric 36380b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 36390b57cec5SDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 36400b57cec5SDimitry Andric 36410b57cec5SDimitry Andric unsigned BasePointerReg = RegInfo->hasBasePointer(MF) 36420b57cec5SDimitry Andric ? RegInfo->getBaseRegister() 36430b57cec5SDimitry Andric : (unsigned)AArch64::NoRegister; 36440b57cec5SDimitry Andric 36450b57cec5SDimitry Andric unsigned ExtraCSSpill = 0; 36465f757f3fSDimitry Andric bool HasUnpairedGPR64 = false; 36470b57cec5SDimitry Andric // Figure out which callee-saved registers to save/restore. 36480b57cec5SDimitry Andric for (unsigned i = 0; CSRegs[i]; ++i) { 36490b57cec5SDimitry Andric const unsigned Reg = CSRegs[i]; 36500b57cec5SDimitry Andric 36510b57cec5SDimitry Andric // Add the base pointer register to SavedRegs if it is callee-save. 36520b57cec5SDimitry Andric if (Reg == BasePointerReg) 36530b57cec5SDimitry Andric SavedRegs.set(Reg); 36540b57cec5SDimitry Andric 36550b57cec5SDimitry Andric bool RegUsed = SavedRegs.test(Reg); 3656480093f4SDimitry Andric unsigned PairedReg = AArch64::NoRegister; 36575f757f3fSDimitry Andric const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg); 36585f757f3fSDimitry Andric if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) || 36595f757f3fSDimitry Andric AArch64::FPR128RegClass.contains(Reg)) { 36605f757f3fSDimitry Andric // Compensate for odd numbers of GP CSRs. 36615f757f3fSDimitry Andric // For now, all the known cases of odd number of CSRs are of GPRs. 36625f757f3fSDimitry Andric if (HasUnpairedGPR64) 36635f757f3fSDimitry Andric PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1]; 36645f757f3fSDimitry Andric else 3665480093f4SDimitry Andric PairedReg = CSRegs[i ^ 1]; 36665f757f3fSDimitry Andric } 36675f757f3fSDimitry Andric 36685f757f3fSDimitry Andric // If the function requires all the GP registers to save (SavedRegs), 36695f757f3fSDimitry Andric // and there are an odd number of GP CSRs at the same time (CSRegs), 36705f757f3fSDimitry Andric // PairedReg could be in a different register class from Reg, which would 36715f757f3fSDimitry Andric // lead to a FPR (usually D8) accidentally being marked saved. 36725f757f3fSDimitry Andric if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) { 36735f757f3fSDimitry Andric PairedReg = AArch64::NoRegister; 36745f757f3fSDimitry Andric HasUnpairedGPR64 = true; 36755f757f3fSDimitry Andric } 36765f757f3fSDimitry Andric assert(PairedReg == AArch64::NoRegister || 36775f757f3fSDimitry Andric AArch64::GPR64RegClass.contains(Reg, PairedReg) || 36785f757f3fSDimitry Andric AArch64::FPR64RegClass.contains(Reg, PairedReg) || 36795f757f3fSDimitry Andric AArch64::FPR128RegClass.contains(Reg, PairedReg)); 3680480093f4SDimitry Andric 36810b57cec5SDimitry Andric if (!RegUsed) { 36820b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(Reg) && 36830b57cec5SDimitry Andric !RegInfo->isReservedReg(MF, Reg)) { 36840b57cec5SDimitry Andric UnspilledCSGPR = Reg; 36850b57cec5SDimitry Andric UnspilledCSGPRPaired = PairedReg; 36860b57cec5SDimitry Andric } 36870b57cec5SDimitry Andric continue; 36880b57cec5SDimitry Andric } 36890b57cec5SDimitry Andric 36900b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 36910b57cec5SDimitry Andric // pairs. 36920b57cec5SDimitry Andric // FIXME: the usual format is actually better if unwinding isn't needed. 3693fe6060f1SDimitry Andric if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister && 36940b57cec5SDimitry Andric !SavedRegs.test(PairedReg)) { 36950b57cec5SDimitry Andric SavedRegs.set(PairedReg); 36960b57cec5SDimitry Andric if (AArch64::GPR64RegClass.contains(PairedReg) && 36970b57cec5SDimitry Andric !RegInfo->isReservedReg(MF, PairedReg)) 36980b57cec5SDimitry Andric ExtraCSSpill = PairedReg; 36990b57cec5SDimitry Andric } 37000b57cec5SDimitry Andric } 37010b57cec5SDimitry Andric 37025ffd83dbSDimitry Andric if (MF.getFunction().getCallingConv() == CallingConv::Win64 && 37035ffd83dbSDimitry Andric !Subtarget.isTargetWindows()) { 37045ffd83dbSDimitry Andric // For Windows calling convention on a non-windows OS, where X18 is treated 37055ffd83dbSDimitry Andric // as reserved, back up X18 when entering non-windows code (marked with the 37065ffd83dbSDimitry Andric // Windows calling convention) and restore when returning regardless of 37075ffd83dbSDimitry Andric // whether the individual function uses it - it might call other functions 37085ffd83dbSDimitry Andric // that clobber it. 37095ffd83dbSDimitry Andric SavedRegs.set(AArch64::X18); 37105ffd83dbSDimitry Andric } 37115ffd83dbSDimitry Andric 37120b57cec5SDimitry Andric // Calculates the callee saved stack size. 37130b57cec5SDimitry Andric unsigned CSStackSize = 0; 3714480093f4SDimitry Andric unsigned SVECSStackSize = 0; 37150b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 37160b57cec5SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo(); 3717480093f4SDimitry Andric for (unsigned Reg : SavedRegs.set_bits()) { 3718480093f4SDimitry Andric auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8; 3719480093f4SDimitry Andric if (AArch64::PPRRegClass.contains(Reg) || 3720480093f4SDimitry Andric AArch64::ZPRRegClass.contains(Reg)) 3721480093f4SDimitry Andric SVECSStackSize += RegSize; 3722480093f4SDimitry Andric else 3723480093f4SDimitry Andric CSStackSize += RegSize; 3724480093f4SDimitry Andric } 37250b57cec5SDimitry Andric 37260fca6ea1SDimitry Andric // Increase the callee-saved stack size if the function has streaming mode 37270fca6ea1SDimitry Andric // changes, as we will need to spill the value of the VG register. 37280fca6ea1SDimitry Andric // For locally streaming functions, we spill both the streaming and 37290fca6ea1SDimitry Andric // non-streaming VG value. 37300fca6ea1SDimitry Andric const Function &F = MF.getFunction(); 37310fca6ea1SDimitry Andric SMEAttrs Attrs(F); 3732*71ac745dSDimitry Andric if (requiresSaveVG(MF)) { 37330fca6ea1SDimitry Andric if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) 37340fca6ea1SDimitry Andric CSStackSize += 16; 37350fca6ea1SDimitry Andric else 37360fca6ea1SDimitry Andric CSStackSize += 8; 37370fca6ea1SDimitry Andric } 37380fca6ea1SDimitry Andric 37390fca6ea1SDimitry Andric // Determine if a Hazard slot should be used, and increase the CSStackSize by 37400fca6ea1SDimitry Andric // StackHazardSize if so. 37410fca6ea1SDimitry Andric determineStackHazardSlot(MF, SavedRegs); 37420fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex()) 37430fca6ea1SDimitry Andric CSStackSize += StackHazardSize; 37440fca6ea1SDimitry Andric 37450b57cec5SDimitry Andric // Save number of saved regs, so we can easily update CSStackSize later. 37460b57cec5SDimitry Andric unsigned NumSavedRegs = SavedRegs.count(); 37470b57cec5SDimitry Andric 37480b57cec5SDimitry Andric // The frame record needs to be created by saving the appropriate registers 3749480093f4SDimitry Andric uint64_t EstimatedStackSize = MFI.estimateStackSize(MF); 37500b57cec5SDimitry Andric if (hasFP(MF) || 37510b57cec5SDimitry Andric windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) { 37520b57cec5SDimitry Andric SavedRegs.set(AArch64::FP); 37530b57cec5SDimitry Andric SavedRegs.set(AArch64::LR); 37540b57cec5SDimitry Andric } 37550b57cec5SDimitry Andric 37560fca6ea1SDimitry Andric LLVM_DEBUG({ 37570fca6ea1SDimitry Andric dbgs() << "*** determineCalleeSaves\nSaved CSRs:"; 37580fca6ea1SDimitry Andric for (unsigned Reg : SavedRegs.set_bits()) 37590fca6ea1SDimitry Andric dbgs() << ' ' << printReg(Reg, RegInfo); 37600fca6ea1SDimitry Andric dbgs() << "\n"; 37610fca6ea1SDimitry Andric }); 37620b57cec5SDimitry Andric 37630b57cec5SDimitry Andric // If any callee-saved registers are used, the frame cannot be eliminated. 37648bcb0991SDimitry Andric int64_t SVEStackSize = 3765480093f4SDimitry Andric alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16); 37668bcb0991SDimitry Andric bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize; 37670b57cec5SDimitry Andric 37680b57cec5SDimitry Andric // The CSR spill slots have not been allocated yet, so estimateStackSize 37690b57cec5SDimitry Andric // won't include them. 37700b57cec5SDimitry Andric unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); 37718bcb0991SDimitry Andric 377206c3fb27SDimitry Andric // We may address some of the stack above the canonical frame address, either 377306c3fb27SDimitry Andric // for our own arguments or during a call. Include that in calculating whether 377406c3fb27SDimitry Andric // we have complicated addressing concerns. 377506c3fb27SDimitry Andric int64_t CalleeStackUsed = 0; 377606c3fb27SDimitry Andric for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) { 377706c3fb27SDimitry Andric int64_t FixedOff = MFI.getObjectOffset(I); 37780fca6ea1SDimitry Andric if (FixedOff > CalleeStackUsed) 37790fca6ea1SDimitry Andric CalleeStackUsed = FixedOff; 378006c3fb27SDimitry Andric } 378106c3fb27SDimitry Andric 37828bcb0991SDimitry Andric // Conservatively always assume BigStack when there are SVE spills. 378306c3fb27SDimitry Andric bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize + 378406c3fb27SDimitry Andric CalleeStackUsed) > EstimatedStackSizeLimit; 37850b57cec5SDimitry Andric if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 37860b57cec5SDimitry Andric AFI->setHasStackFrame(true); 37870b57cec5SDimitry Andric 37880b57cec5SDimitry Andric // Estimate if we might need to scavenge a register at some point in order 37890b57cec5SDimitry Andric // to materialize a stack offset. If so, either spill one additional 37900b57cec5SDimitry Andric // callee-saved register or reserve a special spill slot to facilitate 37910b57cec5SDimitry Andric // register scavenging. If we already spilled an extra callee-saved register 37920b57cec5SDimitry Andric // above to keep the number of spills even, we don't need to do anything else 37930b57cec5SDimitry Andric // here. 37940b57cec5SDimitry Andric if (BigStack) { 37950b57cec5SDimitry Andric if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { 37960b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) 37970b57cec5SDimitry Andric << " to get a scratch register.\n"); 37980b57cec5SDimitry Andric SavedRegs.set(UnspilledCSGPR); 37995f757f3fSDimitry Andric ExtraCSSpill = UnspilledCSGPR; 38005f757f3fSDimitry Andric 38010b57cec5SDimitry Andric // MachO's compact unwind format relies on all registers being stored in 38020b57cec5SDimitry Andric // pairs, so if we need to spill one extra for BigStack, then we need to 38030b57cec5SDimitry Andric // store the pair. 38045f757f3fSDimitry Andric if (producePairRegisters(MF)) { 38055f757f3fSDimitry Andric if (UnspilledCSGPRPaired == AArch64::NoRegister) { 38065f757f3fSDimitry Andric // Failed to make a pair for compact unwind format, revert spilling. 38075f757f3fSDimitry Andric if (produceCompactUnwindFrame(MF)) { 38085f757f3fSDimitry Andric SavedRegs.reset(UnspilledCSGPR); 38095f757f3fSDimitry Andric ExtraCSSpill = AArch64::NoRegister; 38105f757f3fSDimitry Andric } 38115f757f3fSDimitry Andric } else 38120b57cec5SDimitry Andric SavedRegs.set(UnspilledCSGPRPaired); 38135f757f3fSDimitry Andric } 38140b57cec5SDimitry Andric } 38150b57cec5SDimitry Andric 38160b57cec5SDimitry Andric // If we didn't find an extra callee-saved register to spill, create 38170b57cec5SDimitry Andric // an emergency spill slot. 38180b57cec5SDimitry Andric if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { 38190b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 38200b57cec5SDimitry Andric const TargetRegisterClass &RC = AArch64::GPR64RegClass; 38210b57cec5SDimitry Andric unsigned Size = TRI->getSpillSize(RC); 38225ffd83dbSDimitry Andric Align Alignment = TRI->getSpillAlign(RC); 38235ffd83dbSDimitry Andric int FI = MFI.CreateStackObject(Size, Alignment, false); 38240b57cec5SDimitry Andric RS->addScavengingFrameIndex(FI); 38250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 38260b57cec5SDimitry Andric << " as the emergency spill slot.\n"); 38270b57cec5SDimitry Andric } 38280b57cec5SDimitry Andric } 38290b57cec5SDimitry Andric 38300b57cec5SDimitry Andric // Adding the size of additional 64bit GPR saves. 38310b57cec5SDimitry Andric CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs); 3832fe6060f1SDimitry Andric 3833fe6060f1SDimitry Andric // A Swift asynchronous context extends the frame record with a pointer 3834fe6060f1SDimitry Andric // directly before FP. 3835fe6060f1SDimitry Andric if (hasFP(MF) && AFI->hasSwiftAsyncContext()) 3836fe6060f1SDimitry Andric CSStackSize += 8; 3837fe6060f1SDimitry Andric 3838480093f4SDimitry Andric uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16); 38390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Estimated stack frame size: " 38400fca6ea1SDimitry Andric << EstimatedStackSize + AlignedCSStackSize << " bytes.\n"); 38410b57cec5SDimitry Andric 3842480093f4SDimitry Andric assert((!MFI.isCalleeSavedInfoValid() || 3843480093f4SDimitry Andric AFI->getCalleeSavedStackSize() == AlignedCSStackSize) && 3844480093f4SDimitry Andric "Should not invalidate callee saved info"); 3845480093f4SDimitry Andric 38460b57cec5SDimitry Andric // Round up to register pair alignment to avoid additional SP adjustment 38470b57cec5SDimitry Andric // instructions. 38480b57cec5SDimitry Andric AFI->setCalleeSavedStackSize(AlignedCSStackSize); 38490b57cec5SDimitry Andric AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize); 3850480093f4SDimitry Andric AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16)); 38510b57cec5SDimitry Andric } 38520b57cec5SDimitry Andric 3853e8d8bef9SDimitry Andric bool AArch64FrameLowering::assignCalleeSavedSpillSlots( 3854fe6060f1SDimitry Andric MachineFunction &MF, const TargetRegisterInfo *RegInfo, 3855fe6060f1SDimitry Andric std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex, 3856fe6060f1SDimitry Andric unsigned &MaxCSFrameIndex) const { 3857e8d8bef9SDimitry Andric bool NeedsWinCFI = needsWinCFI(MF); 3858e8d8bef9SDimitry Andric // To match the canonical windows frame layout, reverse the list of 3859e8d8bef9SDimitry Andric // callee saved registers to get them laid out by PrologEpilogInserter 3860e8d8bef9SDimitry Andric // in the right order. (PrologEpilogInserter allocates stack objects top 3861e8d8bef9SDimitry Andric // down. Windows canonical prologs store higher numbered registers at 3862e8d8bef9SDimitry Andric // the top, thus have the CSI array start from the highest registers.) 3863e8d8bef9SDimitry Andric if (NeedsWinCFI) 3864e8d8bef9SDimitry Andric std::reverse(CSI.begin(), CSI.end()); 3865fe6060f1SDimitry Andric 3866fe6060f1SDimitry Andric if (CSI.empty()) 3867fe6060f1SDimitry Andric return true; // Early exit if no callee saved registers are modified! 3868fe6060f1SDimitry Andric 3869fe6060f1SDimitry Andric // Now that we know which registers need to be saved and restored, allocate 3870fe6060f1SDimitry Andric // stack slots for them. 3871fe6060f1SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 3872fe6060f1SDimitry Andric auto *AFI = MF.getInfo<AArch64FunctionInfo>(); 387381ad6265SDimitry Andric 387481ad6265SDimitry Andric bool UsesWinAAPCS = isTargetWindows(MF); 387581ad6265SDimitry Andric if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) { 387681ad6265SDimitry Andric int FrameIdx = MFI.CreateStackObject(8, Align(16), true); 387781ad6265SDimitry Andric AFI->setSwiftAsyncContextFrameIdx(FrameIdx); 38780fca6ea1SDimitry Andric if ((unsigned)FrameIdx < MinCSFrameIndex) 38790fca6ea1SDimitry Andric MinCSFrameIndex = FrameIdx; 38800fca6ea1SDimitry Andric if ((unsigned)FrameIdx > MaxCSFrameIndex) 38810fca6ea1SDimitry Andric MaxCSFrameIndex = FrameIdx; 388281ad6265SDimitry Andric } 388381ad6265SDimitry Andric 38840fca6ea1SDimitry Andric // Insert VG into the list of CSRs, immediately before LR if saved. 3885*71ac745dSDimitry Andric if (requiresSaveVG(MF)) { 38860fca6ea1SDimitry Andric std::vector<CalleeSavedInfo> VGSaves; 38870fca6ea1SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 38880fca6ea1SDimitry Andric 38890fca6ea1SDimitry Andric auto VGInfo = CalleeSavedInfo(AArch64::VG); 38900fca6ea1SDimitry Andric VGInfo.setRestored(false); 38910fca6ea1SDimitry Andric VGSaves.push_back(VGInfo); 38920fca6ea1SDimitry Andric 38930fca6ea1SDimitry Andric // Add VG again if the function is locally-streaming, as we will spill two 38940fca6ea1SDimitry Andric // values. 38950fca6ea1SDimitry Andric if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) 38960fca6ea1SDimitry Andric VGSaves.push_back(VGInfo); 38970fca6ea1SDimitry Andric 38980fca6ea1SDimitry Andric bool InsertBeforeLR = false; 38990fca6ea1SDimitry Andric 39000fca6ea1SDimitry Andric for (unsigned I = 0; I < CSI.size(); I++) 39010fca6ea1SDimitry Andric if (CSI[I].getReg() == AArch64::LR) { 39020fca6ea1SDimitry Andric InsertBeforeLR = true; 39030fca6ea1SDimitry Andric CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end()); 39040fca6ea1SDimitry Andric break; 39050fca6ea1SDimitry Andric } 39060fca6ea1SDimitry Andric 39070fca6ea1SDimitry Andric if (!InsertBeforeLR) 39080fca6ea1SDimitry Andric CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end()); 39090fca6ea1SDimitry Andric } 39100fca6ea1SDimitry Andric 39110fca6ea1SDimitry Andric Register LastReg = 0; 39120fca6ea1SDimitry Andric int HazardSlotIndex = std::numeric_limits<int>::max(); 3913fe6060f1SDimitry Andric for (auto &CS : CSI) { 3914fe6060f1SDimitry Andric Register Reg = CS.getReg(); 3915fe6060f1SDimitry Andric const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); 3916fe6060f1SDimitry Andric 39170fca6ea1SDimitry Andric // Create a hazard slot as we switch between GPR and FPR CSRs. 39180fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex() && 39190fca6ea1SDimitry Andric (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) && 39200fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(Reg)) { 39210fca6ea1SDimitry Andric assert(HazardSlotIndex == std::numeric_limits<int>::max() && 39220fca6ea1SDimitry Andric "Unexpected register order for hazard slot"); 39230fca6ea1SDimitry Andric HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true); 39240fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex 39250fca6ea1SDimitry Andric << "\n"); 39260fca6ea1SDimitry Andric AFI->setStackHazardCSRSlotIndex(HazardSlotIndex); 39270fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex < MinCSFrameIndex) 39280fca6ea1SDimitry Andric MinCSFrameIndex = HazardSlotIndex; 39290fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex > MaxCSFrameIndex) 39300fca6ea1SDimitry Andric MaxCSFrameIndex = HazardSlotIndex; 39310fca6ea1SDimitry Andric } 39320fca6ea1SDimitry Andric 3933fe6060f1SDimitry Andric unsigned Size = RegInfo->getSpillSize(*RC); 3934fe6060f1SDimitry Andric Align Alignment(RegInfo->getSpillAlign(*RC)); 3935fe6060f1SDimitry Andric int FrameIdx = MFI.CreateStackObject(Size, Alignment, true); 3936fe6060f1SDimitry Andric CS.setFrameIdx(FrameIdx); 3937fe6060f1SDimitry Andric 39380fca6ea1SDimitry Andric if ((unsigned)FrameIdx < MinCSFrameIndex) 39390fca6ea1SDimitry Andric MinCSFrameIndex = FrameIdx; 39400fca6ea1SDimitry Andric if ((unsigned)FrameIdx > MaxCSFrameIndex) 39410fca6ea1SDimitry Andric MaxCSFrameIndex = FrameIdx; 3942fe6060f1SDimitry Andric 3943fe6060f1SDimitry Andric // Grab 8 bytes below FP for the extended asynchronous frame info. 394481ad6265SDimitry Andric if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS && 394581ad6265SDimitry Andric Reg == AArch64::FP) { 3946fe6060f1SDimitry Andric FrameIdx = MFI.CreateStackObject(8, Alignment, true); 3947fe6060f1SDimitry Andric AFI->setSwiftAsyncContextFrameIdx(FrameIdx); 39480fca6ea1SDimitry Andric if ((unsigned)FrameIdx < MinCSFrameIndex) 39490fca6ea1SDimitry Andric MinCSFrameIndex = FrameIdx; 39500fca6ea1SDimitry Andric if ((unsigned)FrameIdx > MaxCSFrameIndex) 39510fca6ea1SDimitry Andric MaxCSFrameIndex = FrameIdx; 3952fe6060f1SDimitry Andric } 39530fca6ea1SDimitry Andric LastReg = Reg; 3954fe6060f1SDimitry Andric } 39550fca6ea1SDimitry Andric 39560fca6ea1SDimitry Andric // Add hazard slot in the case where no FPR CSRs are present. 39570fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex() && 39580fca6ea1SDimitry Andric HazardSlotIndex == std::numeric_limits<int>::max()) { 39590fca6ea1SDimitry Andric HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true); 39600fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex 39610fca6ea1SDimitry Andric << "\n"); 39620fca6ea1SDimitry Andric AFI->setStackHazardCSRSlotIndex(HazardSlotIndex); 39630fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex < MinCSFrameIndex) 39640fca6ea1SDimitry Andric MinCSFrameIndex = HazardSlotIndex; 39650fca6ea1SDimitry Andric if ((unsigned)HazardSlotIndex > MaxCSFrameIndex) 39660fca6ea1SDimitry Andric MaxCSFrameIndex = HazardSlotIndex; 39670fca6ea1SDimitry Andric } 39680fca6ea1SDimitry Andric 3969fe6060f1SDimitry Andric return true; 3970e8d8bef9SDimitry Andric } 3971e8d8bef9SDimitry Andric 39720b57cec5SDimitry Andric bool AArch64FrameLowering::enableStackSlotScavenging( 39730b57cec5SDimitry Andric const MachineFunction &MF) const { 39740b57cec5SDimitry Andric const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 39755f757f3fSDimitry Andric // If the function has streaming-mode changes, don't scavenge a 39765f757f3fSDimitry Andric // spillslot in the callee-save area, as that might require an 39775f757f3fSDimitry Andric // 'addvl' in the streaming-mode-changing call-sequence when the 39785f757f3fSDimitry Andric // function doesn't use a FP. 39795f757f3fSDimitry Andric if (AFI->hasStreamingModeChanges() && !hasFP(MF)) 39805f757f3fSDimitry Andric return false; 39810fca6ea1SDimitry Andric // Don't allow register salvaging with hazard slots, in case it moves objects 39820fca6ea1SDimitry Andric // into the wrong place. 39830fca6ea1SDimitry Andric if (AFI->hasStackHazardSlotIndex()) 39840fca6ea1SDimitry Andric return false; 39850b57cec5SDimitry Andric return AFI->hasCalleeSaveStackFreeSpace(); 39860b57cec5SDimitry Andric } 39870b57cec5SDimitry Andric 3988480093f4SDimitry Andric /// returns true if there are any SVE callee saves. 3989480093f4SDimitry Andric static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, 3990480093f4SDimitry Andric int &Min, int &Max) { 3991480093f4SDimitry Andric Min = std::numeric_limits<int>::max(); 3992480093f4SDimitry Andric Max = std::numeric_limits<int>::min(); 3993480093f4SDimitry Andric 3994480093f4SDimitry Andric if (!MFI.isCalleeSavedInfoValid()) 3995480093f4SDimitry Andric return false; 3996480093f4SDimitry Andric 3997480093f4SDimitry Andric const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 3998480093f4SDimitry Andric for (auto &CS : CSI) { 3999480093f4SDimitry Andric if (AArch64::ZPRRegClass.contains(CS.getReg()) || 4000480093f4SDimitry Andric AArch64::PPRRegClass.contains(CS.getReg())) { 4001480093f4SDimitry Andric assert((Max == std::numeric_limits<int>::min() || 4002480093f4SDimitry Andric Max + 1 == CS.getFrameIdx()) && 4003480093f4SDimitry Andric "SVE CalleeSaves are not consecutive"); 4004480093f4SDimitry Andric 4005480093f4SDimitry Andric Min = std::min(Min, CS.getFrameIdx()); 4006480093f4SDimitry Andric Max = std::max(Max, CS.getFrameIdx()); 4007480093f4SDimitry Andric } 4008480093f4SDimitry Andric } 4009480093f4SDimitry Andric return Min != std::numeric_limits<int>::max(); 4010480093f4SDimitry Andric } 4011480093f4SDimitry Andric 4012480093f4SDimitry Andric // Process all the SVE stack objects and determine offsets for each 4013480093f4SDimitry Andric // object. If AssignOffsets is true, the offsets get assigned. 4014480093f4SDimitry Andric // Fills in the first and last callee-saved frame indices into 4015480093f4SDimitry Andric // Min/MaxCSFrameIndex, respectively. 4016480093f4SDimitry Andric // Returns the size of the stack. 4017480093f4SDimitry Andric static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, 4018480093f4SDimitry Andric int &MinCSFrameIndex, 4019480093f4SDimitry Andric int &MaxCSFrameIndex, 4020480093f4SDimitry Andric bool AssignOffsets) { 4021979e22ffSDimitry Andric #ifndef NDEBUG 4022480093f4SDimitry Andric // First process all fixed stack objects. 40238bcb0991SDimitry Andric for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) 4024e8d8bef9SDimitry Andric assert(MFI.getStackID(I) != TargetStackID::ScalableVector && 4025979e22ffSDimitry Andric "SVE vectors should never be passed on the stack by value, only by " 4026979e22ffSDimitry Andric "reference."); 4027979e22ffSDimitry Andric #endif 40288bcb0991SDimitry Andric 4029480093f4SDimitry Andric auto Assign = [&MFI](int FI, int64_t Offset) { 4030480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); 4031480093f4SDimitry Andric MFI.setObjectOffset(FI, Offset); 4032480093f4SDimitry Andric }; 4033480093f4SDimitry Andric 4034979e22ffSDimitry Andric int64_t Offset = 0; 4035979e22ffSDimitry Andric 4036480093f4SDimitry Andric // Then process all callee saved slots. 4037480093f4SDimitry Andric if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { 4038480093f4SDimitry Andric // Assign offsets to the callee save slots. 4039480093f4SDimitry Andric for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { 4040480093f4SDimitry Andric Offset += MFI.getObjectSize(I); 40415ffd83dbSDimitry Andric Offset = alignTo(Offset, MFI.getObjectAlign(I)); 4042480093f4SDimitry Andric if (AssignOffsets) 4043480093f4SDimitry Andric Assign(I, -Offset); 4044480093f4SDimitry Andric } 4045480093f4SDimitry Andric } 4046480093f4SDimitry Andric 4047979e22ffSDimitry Andric // Ensure that the Callee-save area is aligned to 16bytes. 4048979e22ffSDimitry Andric Offset = alignTo(Offset, Align(16U)); 4049979e22ffSDimitry Andric 4050480093f4SDimitry Andric // Create a buffer of SVE objects to allocate and sort it. 4051480093f4SDimitry Andric SmallVector<int, 8> ObjectsToAllocate; 40520eae32dcSDimitry Andric // If we have a stack protector, and we've previously decided that we have SVE 40530eae32dcSDimitry Andric // objects on the stack and thus need it to go in the SVE stack area, then it 40540eae32dcSDimitry Andric // needs to go first. 40550eae32dcSDimitry Andric int StackProtectorFI = -1; 40560eae32dcSDimitry Andric if (MFI.hasStackProtectorIndex()) { 40570eae32dcSDimitry Andric StackProtectorFI = MFI.getStackProtectorIndex(); 40580eae32dcSDimitry Andric if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector) 40590eae32dcSDimitry Andric ObjectsToAllocate.push_back(StackProtectorFI); 40600eae32dcSDimitry Andric } 4061480093f4SDimitry Andric for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { 4062480093f4SDimitry Andric unsigned StackID = MFI.getStackID(I); 4063e8d8bef9SDimitry Andric if (StackID != TargetStackID::ScalableVector) 4064480093f4SDimitry Andric continue; 40650eae32dcSDimitry Andric if (I == StackProtectorFI) 40660eae32dcSDimitry Andric continue; 4067480093f4SDimitry Andric if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex) 4068480093f4SDimitry Andric continue; 4069480093f4SDimitry Andric if (MFI.isDeadObjectIndex(I)) 4070480093f4SDimitry Andric continue; 4071480093f4SDimitry Andric 4072480093f4SDimitry Andric ObjectsToAllocate.push_back(I); 4073480093f4SDimitry Andric } 4074480093f4SDimitry Andric 4075480093f4SDimitry Andric // Allocate all SVE locals and spills 4076480093f4SDimitry Andric for (unsigned FI : ObjectsToAllocate) { 40775ffd83dbSDimitry Andric Align Alignment = MFI.getObjectAlign(FI); 4078480093f4SDimitry Andric // FIXME: Given that the length of SVE vectors is not necessarily a power of 4079480093f4SDimitry Andric // two, we'd need to align every object dynamically at runtime if the 4080480093f4SDimitry Andric // alignment is larger than 16. This is not yet supported. 40815ffd83dbSDimitry Andric if (Alignment > Align(16)) 4082480093f4SDimitry Andric report_fatal_error( 4083480093f4SDimitry Andric "Alignment of scalable vectors > 16 bytes is not yet supported"); 4084480093f4SDimitry Andric 40855ffd83dbSDimitry Andric Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment); 4086480093f4SDimitry Andric if (AssignOffsets) 4087480093f4SDimitry Andric Assign(FI, -Offset); 4088480093f4SDimitry Andric } 4089480093f4SDimitry Andric 40908bcb0991SDimitry Andric return Offset; 40918bcb0991SDimitry Andric } 40928bcb0991SDimitry Andric 4093480093f4SDimitry Andric int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets( 4094480093f4SDimitry Andric MachineFrameInfo &MFI) const { 4095480093f4SDimitry Andric int MinCSFrameIndex, MaxCSFrameIndex; 4096480093f4SDimitry Andric return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false); 4097480093f4SDimitry Andric } 4098480093f4SDimitry Andric 4099480093f4SDimitry Andric int64_t AArch64FrameLowering::assignSVEStackObjectOffsets( 4100480093f4SDimitry Andric MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const { 4101480093f4SDimitry Andric return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, 4102480093f4SDimitry Andric true); 4103480093f4SDimitry Andric } 4104480093f4SDimitry Andric 41050b57cec5SDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameFinalized( 41060b57cec5SDimitry Andric MachineFunction &MF, RegScavenger *RS) const { 41078bcb0991SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 41088bcb0991SDimitry Andric 41098bcb0991SDimitry Andric assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown && 41108bcb0991SDimitry Andric "Upwards growing stack unsupported"); 41118bcb0991SDimitry Andric 4112480093f4SDimitry Andric int MinCSFrameIndex, MaxCSFrameIndex; 4113480093f4SDimitry Andric int64_t SVEStackSize = 4114480093f4SDimitry Andric assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex); 41158bcb0991SDimitry Andric 41168bcb0991SDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 4117480093f4SDimitry Andric AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U)); 4118480093f4SDimitry Andric AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex); 41198bcb0991SDimitry Andric 41200b57cec5SDimitry Andric // If this function isn't doing Win64-style C++ EH, we don't need to do 41210b57cec5SDimitry Andric // anything. 41220b57cec5SDimitry Andric if (!MF.hasEHFunclets()) 41230b57cec5SDimitry Andric return; 41240b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 41250b57cec5SDimitry Andric WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); 41260b57cec5SDimitry Andric 41270b57cec5SDimitry Andric MachineBasicBlock &MBB = MF.front(); 41280b57cec5SDimitry Andric auto MBBI = MBB.begin(); 41290b57cec5SDimitry Andric while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) 41300b57cec5SDimitry Andric ++MBBI; 41310b57cec5SDimitry Andric 41320b57cec5SDimitry Andric // Create an UnwindHelp object. 413362cfcf62SDimitry Andric // The UnwindHelp object is allocated at the start of the fixed object area 413462cfcf62SDimitry Andric int64_t FixedObject = 413562cfcf62SDimitry Andric getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false); 413662cfcf62SDimitry Andric int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8, 413762cfcf62SDimitry Andric /*SPOffset*/ -FixedObject, 413862cfcf62SDimitry Andric /*IsImmutable=*/false); 41390b57cec5SDimitry Andric EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; 414062cfcf62SDimitry Andric 41410b57cec5SDimitry Andric // We need to store -2 into the UnwindHelp object at the start of the 41420b57cec5SDimitry Andric // function. 41430b57cec5SDimitry Andric DebugLoc DL; 41440b57cec5SDimitry Andric RS->enterBasicBlockEnd(MBB); 41455f757f3fSDimitry Andric RS->backward(MBBI); 414604eeddc0SDimitry Andric Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass); 41470b57cec5SDimitry Andric assert(DstReg && "There must be a free register after frame setup"); 41480b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); 41490b57cec5SDimitry Andric BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) 41500b57cec5SDimitry Andric .addReg(DstReg, getKillRegState(true)) 41510b57cec5SDimitry Andric .addFrameIndex(UnwindHelpFI) 41520b57cec5SDimitry Andric .addImm(0); 41530b57cec5SDimitry Andric } 41540b57cec5SDimitry Andric 41555ffd83dbSDimitry Andric namespace { 41565ffd83dbSDimitry Andric struct TagStoreInstr { 41575ffd83dbSDimitry Andric MachineInstr *MI; 41585ffd83dbSDimitry Andric int64_t Offset, Size; 41595ffd83dbSDimitry Andric explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size) 41605ffd83dbSDimitry Andric : MI(MI), Offset(Offset), Size(Size) {} 41615ffd83dbSDimitry Andric }; 41625ffd83dbSDimitry Andric 41635ffd83dbSDimitry Andric class TagStoreEdit { 41645ffd83dbSDimitry Andric MachineFunction *MF; 41655ffd83dbSDimitry Andric MachineBasicBlock *MBB; 41665ffd83dbSDimitry Andric MachineRegisterInfo *MRI; 41675ffd83dbSDimitry Andric // Tag store instructions that are being replaced. 41685ffd83dbSDimitry Andric SmallVector<TagStoreInstr, 8> TagStores; 41695ffd83dbSDimitry Andric // Combined memref arguments of the above instructions. 41705ffd83dbSDimitry Andric SmallVector<MachineMemOperand *, 8> CombinedMemRefs; 41715ffd83dbSDimitry Andric 41725ffd83dbSDimitry Andric // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg + 41735ffd83dbSDimitry Andric // FrameRegOffset + Size) with the address tag of SP. 41745ffd83dbSDimitry Andric Register FrameReg; 41755ffd83dbSDimitry Andric StackOffset FrameRegOffset; 41765ffd83dbSDimitry Andric int64_t Size; 417706c3fb27SDimitry Andric // If not std::nullopt, move FrameReg to (FrameReg + FrameRegUpdate) at the 417806c3fb27SDimitry Andric // end. 4179bdd1243dSDimitry Andric std::optional<int64_t> FrameRegUpdate; 41805ffd83dbSDimitry Andric // MIFlags for any FrameReg updating instructions. 41815ffd83dbSDimitry Andric unsigned FrameRegUpdateFlags; 41825ffd83dbSDimitry Andric 41835ffd83dbSDimitry Andric // Use zeroing instruction variants. 41845ffd83dbSDimitry Andric bool ZeroData; 41855ffd83dbSDimitry Andric DebugLoc DL; 41865ffd83dbSDimitry Andric 41875ffd83dbSDimitry Andric void emitUnrolled(MachineBasicBlock::iterator InsertI); 41885ffd83dbSDimitry Andric void emitLoop(MachineBasicBlock::iterator InsertI); 41895ffd83dbSDimitry Andric 41905ffd83dbSDimitry Andric public: 41915ffd83dbSDimitry Andric TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData) 41925ffd83dbSDimitry Andric : MBB(MBB), ZeroData(ZeroData) { 41935ffd83dbSDimitry Andric MF = MBB->getParent(); 41945ffd83dbSDimitry Andric MRI = &MF->getRegInfo(); 41955ffd83dbSDimitry Andric } 41965ffd83dbSDimitry Andric // Add an instruction to be replaced. Instructions must be added in the 41975ffd83dbSDimitry Andric // ascending order of Offset, and have to be adjacent. 41985ffd83dbSDimitry Andric void addInstruction(TagStoreInstr I) { 41995ffd83dbSDimitry Andric assert((TagStores.empty() || 42005ffd83dbSDimitry Andric TagStores.back().Offset + TagStores.back().Size == I.Offset) && 42015ffd83dbSDimitry Andric "Non-adjacent tag store instructions."); 42025ffd83dbSDimitry Andric TagStores.push_back(I); 42035ffd83dbSDimitry Andric } 42045ffd83dbSDimitry Andric void clear() { TagStores.clear(); } 42055ffd83dbSDimitry Andric // Emit equivalent code at the given location, and erase the current set of 42065ffd83dbSDimitry Andric // instructions. May skip if the replacement is not profitable. May invalidate 42075ffd83dbSDimitry Andric // the input iterator and replace it with a valid one. 42085ffd83dbSDimitry Andric void emitCode(MachineBasicBlock::iterator &InsertI, 420981ad6265SDimitry Andric const AArch64FrameLowering *TFI, bool TryMergeSPUpdate); 42105ffd83dbSDimitry Andric }; 42115ffd83dbSDimitry Andric 42125ffd83dbSDimitry Andric void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) { 42135ffd83dbSDimitry Andric const AArch64InstrInfo *TII = 42145ffd83dbSDimitry Andric MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); 42155ffd83dbSDimitry Andric 42165ffd83dbSDimitry Andric const int64_t kMinOffset = -256 * 16; 42175ffd83dbSDimitry Andric const int64_t kMaxOffset = 255 * 16; 42185ffd83dbSDimitry Andric 42195ffd83dbSDimitry Andric Register BaseReg = FrameReg; 4220e8d8bef9SDimitry Andric int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed(); 42215ffd83dbSDimitry Andric if (BaseRegOffsetBytes < kMinOffset || 422206c3fb27SDimitry Andric BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset || 422306c3fb27SDimitry Andric // BaseReg can be FP, which is not necessarily aligned to 16-bytes. In 422406c3fb27SDimitry Andric // that case, BaseRegOffsetBytes will not be aligned to 16 bytes, which 422506c3fb27SDimitry Andric // is required for the offset of ST2G. 422606c3fb27SDimitry Andric BaseRegOffsetBytes % 16 != 0) { 42275ffd83dbSDimitry Andric Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); 42285ffd83dbSDimitry Andric emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg, 4229e8d8bef9SDimitry Andric StackOffset::getFixed(BaseRegOffsetBytes), TII); 42305ffd83dbSDimitry Andric BaseReg = ScratchReg; 42315ffd83dbSDimitry Andric BaseRegOffsetBytes = 0; 42325ffd83dbSDimitry Andric } 42335ffd83dbSDimitry Andric 42345ffd83dbSDimitry Andric MachineInstr *LastI = nullptr; 42355ffd83dbSDimitry Andric while (Size) { 42365ffd83dbSDimitry Andric int64_t InstrSize = (Size > 16) ? 32 : 16; 42375ffd83dbSDimitry Andric unsigned Opcode = 42385ffd83dbSDimitry Andric InstrSize == 16 423906c3fb27SDimitry Andric ? (ZeroData ? AArch64::STZGi : AArch64::STGi) 424006c3fb27SDimitry Andric : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi); 424106c3fb27SDimitry Andric assert(BaseRegOffsetBytes % 16 == 0); 42425ffd83dbSDimitry Andric MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode)) 42435ffd83dbSDimitry Andric .addReg(AArch64::SP) 42445ffd83dbSDimitry Andric .addReg(BaseReg) 42455ffd83dbSDimitry Andric .addImm(BaseRegOffsetBytes / 16) 42465ffd83dbSDimitry Andric .setMemRefs(CombinedMemRefs); 42475ffd83dbSDimitry Andric // A store to [BaseReg, #0] should go last for an opportunity to fold the 42485ffd83dbSDimitry Andric // final SP adjustment in the epilogue. 42495ffd83dbSDimitry Andric if (BaseRegOffsetBytes == 0) 42505ffd83dbSDimitry Andric LastI = I; 42515ffd83dbSDimitry Andric BaseRegOffsetBytes += InstrSize; 42525ffd83dbSDimitry Andric Size -= InstrSize; 42535ffd83dbSDimitry Andric } 42545ffd83dbSDimitry Andric 42555ffd83dbSDimitry Andric if (LastI) 42565ffd83dbSDimitry Andric MBB->splice(InsertI, MBB, LastI); 42575ffd83dbSDimitry Andric } 42585ffd83dbSDimitry Andric 42595ffd83dbSDimitry Andric void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) { 42605ffd83dbSDimitry Andric const AArch64InstrInfo *TII = 42615ffd83dbSDimitry Andric MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); 42625ffd83dbSDimitry Andric 42635ffd83dbSDimitry Andric Register BaseReg = FrameRegUpdate 42645ffd83dbSDimitry Andric ? FrameReg 42655ffd83dbSDimitry Andric : MRI->createVirtualRegister(&AArch64::GPR64RegClass); 42665ffd83dbSDimitry Andric Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass); 42675ffd83dbSDimitry Andric 42685ffd83dbSDimitry Andric emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII); 42695ffd83dbSDimitry Andric 42705ffd83dbSDimitry Andric int64_t LoopSize = Size; 42715ffd83dbSDimitry Andric // If the loop size is not a multiple of 32, split off one 16-byte store at 42725ffd83dbSDimitry Andric // the end to fold BaseReg update into. 42735ffd83dbSDimitry Andric if (FrameRegUpdate && *FrameRegUpdate) 42745ffd83dbSDimitry Andric LoopSize -= LoopSize % 32; 42755ffd83dbSDimitry Andric MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL, 42765ffd83dbSDimitry Andric TII->get(ZeroData ? AArch64::STZGloop_wback 42775ffd83dbSDimitry Andric : AArch64::STGloop_wback)) 42785ffd83dbSDimitry Andric .addDef(SizeReg) 42795ffd83dbSDimitry Andric .addDef(BaseReg) 42805ffd83dbSDimitry Andric .addImm(LoopSize) 42815ffd83dbSDimitry Andric .addReg(BaseReg) 42825ffd83dbSDimitry Andric .setMemRefs(CombinedMemRefs); 42835ffd83dbSDimitry Andric if (FrameRegUpdate) 42845ffd83dbSDimitry Andric LoopI->setFlags(FrameRegUpdateFlags); 42855ffd83dbSDimitry Andric 42865ffd83dbSDimitry Andric int64_t ExtraBaseRegUpdate = 4287e8d8bef9SDimitry Andric FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0; 42885ffd83dbSDimitry Andric if (LoopSize < Size) { 42895ffd83dbSDimitry Andric assert(FrameRegUpdate); 42905ffd83dbSDimitry Andric assert(Size - LoopSize == 16); 42915ffd83dbSDimitry Andric // Tag 16 more bytes at BaseReg and update BaseReg. 42925ffd83dbSDimitry Andric BuildMI(*MBB, InsertI, DL, 42935ffd83dbSDimitry Andric TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex)) 42945ffd83dbSDimitry Andric .addDef(BaseReg) 42955ffd83dbSDimitry Andric .addReg(BaseReg) 42965ffd83dbSDimitry Andric .addReg(BaseReg) 42975ffd83dbSDimitry Andric .addImm(1 + ExtraBaseRegUpdate / 16) 42985ffd83dbSDimitry Andric .setMemRefs(CombinedMemRefs) 42995ffd83dbSDimitry Andric .setMIFlags(FrameRegUpdateFlags); 43005ffd83dbSDimitry Andric } else if (ExtraBaseRegUpdate) { 43015ffd83dbSDimitry Andric // Update BaseReg. 43025ffd83dbSDimitry Andric BuildMI( 43035ffd83dbSDimitry Andric *MBB, InsertI, DL, 43045ffd83dbSDimitry Andric TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri)) 43055ffd83dbSDimitry Andric .addDef(BaseReg) 43065ffd83dbSDimitry Andric .addReg(BaseReg) 43075ffd83dbSDimitry Andric .addImm(std::abs(ExtraBaseRegUpdate)) 43085ffd83dbSDimitry Andric .addImm(0) 43095ffd83dbSDimitry Andric .setMIFlags(FrameRegUpdateFlags); 43105ffd83dbSDimitry Andric } 43115ffd83dbSDimitry Andric } 43125ffd83dbSDimitry Andric 43135ffd83dbSDimitry Andric // Check if *II is a register update that can be merged into STGloop that ends 43145ffd83dbSDimitry Andric // at (Reg + Size). RemainingOffset is the required adjustment to Reg after the 43155ffd83dbSDimitry Andric // end of the loop. 43165ffd83dbSDimitry Andric bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg, 43175ffd83dbSDimitry Andric int64_t Size, int64_t *TotalOffset) { 43185ffd83dbSDimitry Andric MachineInstr &MI = *II; 43195ffd83dbSDimitry Andric if ((MI.getOpcode() == AArch64::ADDXri || 43205ffd83dbSDimitry Andric MI.getOpcode() == AArch64::SUBXri) && 43215ffd83dbSDimitry Andric MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) { 43225ffd83dbSDimitry Andric unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm()); 43235ffd83dbSDimitry Andric int64_t Offset = MI.getOperand(2).getImm() << Shift; 43245ffd83dbSDimitry Andric if (MI.getOpcode() == AArch64::SUBXri) 43255ffd83dbSDimitry Andric Offset = -Offset; 43265ffd83dbSDimitry Andric int64_t AbsPostOffset = std::abs(Offset - Size); 43275ffd83dbSDimitry Andric const int64_t kMaxOffset = 43285ffd83dbSDimitry Andric 0xFFF; // Max encoding for unshifted ADDXri / SUBXri 43295ffd83dbSDimitry Andric if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) { 43305ffd83dbSDimitry Andric *TotalOffset = Offset; 43315ffd83dbSDimitry Andric return true; 43325ffd83dbSDimitry Andric } 43335ffd83dbSDimitry Andric } 43345ffd83dbSDimitry Andric return false; 43355ffd83dbSDimitry Andric } 43365ffd83dbSDimitry Andric 43375ffd83dbSDimitry Andric void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE, 43385ffd83dbSDimitry Andric SmallVectorImpl<MachineMemOperand *> &MemRefs) { 43395ffd83dbSDimitry Andric MemRefs.clear(); 43405ffd83dbSDimitry Andric for (auto &TS : TSE) { 43415ffd83dbSDimitry Andric MachineInstr *MI = TS.MI; 43425ffd83dbSDimitry Andric // An instruction without memory operands may access anything. Be 43435ffd83dbSDimitry Andric // conservative and return an empty list. 43445ffd83dbSDimitry Andric if (MI->memoperands_empty()) { 43455ffd83dbSDimitry Andric MemRefs.clear(); 43465ffd83dbSDimitry Andric return; 43475ffd83dbSDimitry Andric } 43485ffd83dbSDimitry Andric MemRefs.append(MI->memoperands_begin(), MI->memoperands_end()); 43495ffd83dbSDimitry Andric } 43505ffd83dbSDimitry Andric } 43515ffd83dbSDimitry Andric 43525ffd83dbSDimitry Andric void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI, 435381ad6265SDimitry Andric const AArch64FrameLowering *TFI, 435481ad6265SDimitry Andric bool TryMergeSPUpdate) { 43555ffd83dbSDimitry Andric if (TagStores.empty()) 43565ffd83dbSDimitry Andric return; 43575ffd83dbSDimitry Andric TagStoreInstr &FirstTagStore = TagStores[0]; 43585ffd83dbSDimitry Andric TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1]; 43595ffd83dbSDimitry Andric Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size; 43605ffd83dbSDimitry Andric DL = TagStores[0].MI->getDebugLoc(); 43615ffd83dbSDimitry Andric 43625ffd83dbSDimitry Andric Register Reg; 43635ffd83dbSDimitry Andric FrameRegOffset = TFI->resolveFrameOffsetReference( 43645ffd83dbSDimitry Andric *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg, 43655ffd83dbSDimitry Andric /*PreferFP=*/false, /*ForSimm=*/true); 43665ffd83dbSDimitry Andric FrameReg = Reg; 4367bdd1243dSDimitry Andric FrameRegUpdate = std::nullopt; 43685ffd83dbSDimitry Andric 43695ffd83dbSDimitry Andric mergeMemRefs(TagStores, CombinedMemRefs); 43705ffd83dbSDimitry Andric 43710fca6ea1SDimitry Andric LLVM_DEBUG({ 43720fca6ea1SDimitry Andric dbgs() << "Replacing adjacent STG instructions:\n"; 43730fca6ea1SDimitry Andric for (const auto &Instr : TagStores) { 43740fca6ea1SDimitry Andric dbgs() << " " << *Instr.MI; 43750fca6ea1SDimitry Andric } 43760fca6ea1SDimitry Andric }); 43775ffd83dbSDimitry Andric 43785ffd83dbSDimitry Andric // Size threshold where a loop becomes shorter than a linear sequence of 43795ffd83dbSDimitry Andric // tagging instructions. 43805ffd83dbSDimitry Andric const int kSetTagLoopThreshold = 176; 43815ffd83dbSDimitry Andric if (Size < kSetTagLoopThreshold) { 43825ffd83dbSDimitry Andric if (TagStores.size() < 2) 43835ffd83dbSDimitry Andric return; 43845ffd83dbSDimitry Andric emitUnrolled(InsertI); 43855ffd83dbSDimitry Andric } else { 43865ffd83dbSDimitry Andric MachineInstr *UpdateInstr = nullptr; 438781ad6265SDimitry Andric int64_t TotalOffset = 0; 438881ad6265SDimitry Andric if (TryMergeSPUpdate) { 43895ffd83dbSDimitry Andric // See if we can merge base register update into the STGloop. 43905ffd83dbSDimitry Andric // This is done in AArch64LoadStoreOptimizer for "normal" stores, 43915ffd83dbSDimitry Andric // but STGloop is way too unusual for that, and also it only 43925ffd83dbSDimitry Andric // realistically happens in function epilogue. Also, STGloop is expanded 43935ffd83dbSDimitry Andric // before that pass. 43945ffd83dbSDimitry Andric if (InsertI != MBB->end() && 4395e8d8bef9SDimitry Andric canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size, 43965ffd83dbSDimitry Andric &TotalOffset)) { 43975ffd83dbSDimitry Andric UpdateInstr = &*InsertI++; 43985ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n " 43995ffd83dbSDimitry Andric << *UpdateInstr); 44005ffd83dbSDimitry Andric } 44015ffd83dbSDimitry Andric } 44025ffd83dbSDimitry Andric 44035ffd83dbSDimitry Andric if (!UpdateInstr && TagStores.size() < 2) 44045ffd83dbSDimitry Andric return; 44055ffd83dbSDimitry Andric 44065ffd83dbSDimitry Andric if (UpdateInstr) { 44075ffd83dbSDimitry Andric FrameRegUpdate = TotalOffset; 44085ffd83dbSDimitry Andric FrameRegUpdateFlags = UpdateInstr->getFlags(); 44095ffd83dbSDimitry Andric } 44105ffd83dbSDimitry Andric emitLoop(InsertI); 44115ffd83dbSDimitry Andric if (UpdateInstr) 44125ffd83dbSDimitry Andric UpdateInstr->eraseFromParent(); 44135ffd83dbSDimitry Andric } 44145ffd83dbSDimitry Andric 44155ffd83dbSDimitry Andric for (auto &TS : TagStores) 44165ffd83dbSDimitry Andric TS.MI->eraseFromParent(); 44175ffd83dbSDimitry Andric } 44185ffd83dbSDimitry Andric 44195ffd83dbSDimitry Andric bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset, 44205ffd83dbSDimitry Andric int64_t &Size, bool &ZeroData) { 44215ffd83dbSDimitry Andric MachineFunction &MF = *MI.getParent()->getParent(); 44225ffd83dbSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 44235ffd83dbSDimitry Andric 44245ffd83dbSDimitry Andric unsigned Opcode = MI.getOpcode(); 442506c3fb27SDimitry Andric ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi || 442606c3fb27SDimitry Andric Opcode == AArch64::STZ2Gi); 44275ffd83dbSDimitry Andric 44285ffd83dbSDimitry Andric if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) { 44295ffd83dbSDimitry Andric if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead()) 44305ffd83dbSDimitry Andric return false; 44315ffd83dbSDimitry Andric if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI()) 44325ffd83dbSDimitry Andric return false; 44335ffd83dbSDimitry Andric Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex()); 44345ffd83dbSDimitry Andric Size = MI.getOperand(2).getImm(); 44355ffd83dbSDimitry Andric return true; 44365ffd83dbSDimitry Andric } 44375ffd83dbSDimitry Andric 443806c3fb27SDimitry Andric if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi) 44395ffd83dbSDimitry Andric Size = 16; 444006c3fb27SDimitry Andric else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi) 44415ffd83dbSDimitry Andric Size = 32; 44425ffd83dbSDimitry Andric else 44435ffd83dbSDimitry Andric return false; 44445ffd83dbSDimitry Andric 44455ffd83dbSDimitry Andric if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI()) 44465ffd83dbSDimitry Andric return false; 44475ffd83dbSDimitry Andric 44485ffd83dbSDimitry Andric Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) + 44495ffd83dbSDimitry Andric 16 * MI.getOperand(2).getImm(); 44505ffd83dbSDimitry Andric return true; 44515ffd83dbSDimitry Andric } 44525ffd83dbSDimitry Andric 44535ffd83dbSDimitry Andric // Detect a run of memory tagging instructions for adjacent stack frame slots, 44545ffd83dbSDimitry Andric // and replace them with a shorter instruction sequence: 44555ffd83dbSDimitry Andric // * replace STG + STG with ST2G 44565ffd83dbSDimitry Andric // * replace STGloop + STGloop with STGloop 44575ffd83dbSDimitry Andric // This code needs to run when stack slot offsets are already known, but before 44585ffd83dbSDimitry Andric // FrameIndex operands in STG instructions are eliminated. 44595ffd83dbSDimitry Andric MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II, 44605ffd83dbSDimitry Andric const AArch64FrameLowering *TFI, 44615ffd83dbSDimitry Andric RegScavenger *RS) { 44625ffd83dbSDimitry Andric bool FirstZeroData; 44635ffd83dbSDimitry Andric int64_t Size, Offset; 44645ffd83dbSDimitry Andric MachineInstr &MI = *II; 44655ffd83dbSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 44665ffd83dbSDimitry Andric MachineBasicBlock::iterator NextI = ++II; 44675ffd83dbSDimitry Andric if (&MI == &MBB->instr_back()) 44685ffd83dbSDimitry Andric return II; 44695ffd83dbSDimitry Andric if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData)) 44705ffd83dbSDimitry Andric return II; 44715ffd83dbSDimitry Andric 44725ffd83dbSDimitry Andric SmallVector<TagStoreInstr, 4> Instrs; 44735ffd83dbSDimitry Andric Instrs.emplace_back(&MI, Offset, Size); 44745ffd83dbSDimitry Andric 44755ffd83dbSDimitry Andric constexpr int kScanLimit = 10; 44765ffd83dbSDimitry Andric int Count = 0; 44775ffd83dbSDimitry Andric for (MachineBasicBlock::iterator E = MBB->end(); 44785ffd83dbSDimitry Andric NextI != E && Count < kScanLimit; ++NextI) { 44795ffd83dbSDimitry Andric MachineInstr &MI = *NextI; 44805ffd83dbSDimitry Andric bool ZeroData; 44815ffd83dbSDimitry Andric int64_t Size, Offset; 44825ffd83dbSDimitry Andric // Collect instructions that update memory tags with a FrameIndex operand 44835ffd83dbSDimitry Andric // and (when applicable) constant size, and whose output registers are dead 44845ffd83dbSDimitry Andric // (the latter is almost always the case in practice). Since these 44855ffd83dbSDimitry Andric // instructions effectively have no inputs or outputs, we are free to skip 44865ffd83dbSDimitry Andric // any non-aliasing instructions in between without tracking used registers. 44875ffd83dbSDimitry Andric if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) { 44885ffd83dbSDimitry Andric if (ZeroData != FirstZeroData) 44895ffd83dbSDimitry Andric break; 44905ffd83dbSDimitry Andric Instrs.emplace_back(&MI, Offset, Size); 44915ffd83dbSDimitry Andric continue; 44925ffd83dbSDimitry Andric } 44935ffd83dbSDimitry Andric 44945ffd83dbSDimitry Andric // Only count non-transient, non-tagging instructions toward the scan 44955ffd83dbSDimitry Andric // limit. 44965ffd83dbSDimitry Andric if (!MI.isTransient()) 44975ffd83dbSDimitry Andric ++Count; 44985ffd83dbSDimitry Andric 44995ffd83dbSDimitry Andric // Just in case, stop before the epilogue code starts. 45005ffd83dbSDimitry Andric if (MI.getFlag(MachineInstr::FrameSetup) || 45015ffd83dbSDimitry Andric MI.getFlag(MachineInstr::FrameDestroy)) 45025ffd83dbSDimitry Andric break; 45035ffd83dbSDimitry Andric 45045ffd83dbSDimitry Andric // Reject anything that may alias the collected instructions. 45055ffd83dbSDimitry Andric if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects()) 45065ffd83dbSDimitry Andric break; 45075ffd83dbSDimitry Andric } 45085ffd83dbSDimitry Andric 45095ffd83dbSDimitry Andric // New code will be inserted after the last tagging instruction we've found. 45105ffd83dbSDimitry Andric MachineBasicBlock::iterator InsertI = Instrs.back().MI; 45115f757f3fSDimitry Andric 45125f757f3fSDimitry Andric // All the gathered stack tag instructions are merged and placed after 45135f757f3fSDimitry Andric // last tag store in the list. The check should be made if the nzcv 45145f757f3fSDimitry Andric // flag is live at the point where we are trying to insert. Otherwise 45155f757f3fSDimitry Andric // the nzcv flag might get clobbered if any stg loops are present. 45165f757f3fSDimitry Andric 45175f757f3fSDimitry Andric // FIXME : This approach of bailing out from merge is conservative in 45185f757f3fSDimitry Andric // some ways like even if stg loops are not present after merge the 45195f757f3fSDimitry Andric // insert list, this liveness check is done (which is not needed). 45205f757f3fSDimitry Andric LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo())); 45215f757f3fSDimitry Andric LiveRegs.addLiveOuts(*MBB); 45225f757f3fSDimitry Andric for (auto I = MBB->rbegin();; ++I) { 45235f757f3fSDimitry Andric MachineInstr &MI = *I; 45245f757f3fSDimitry Andric if (MI == InsertI) 45255f757f3fSDimitry Andric break; 45265f757f3fSDimitry Andric LiveRegs.stepBackward(*I); 45275f757f3fSDimitry Andric } 45285ffd83dbSDimitry Andric InsertI++; 45295f757f3fSDimitry Andric if (LiveRegs.contains(AArch64::NZCV)) 45305f757f3fSDimitry Andric return InsertI; 45315ffd83dbSDimitry Andric 45325ffd83dbSDimitry Andric llvm::stable_sort(Instrs, 45335ffd83dbSDimitry Andric [](const TagStoreInstr &Left, const TagStoreInstr &Right) { 45345ffd83dbSDimitry Andric return Left.Offset < Right.Offset; 45355ffd83dbSDimitry Andric }); 45365ffd83dbSDimitry Andric 45375ffd83dbSDimitry Andric // Make sure that we don't have any overlapping stores. 45385ffd83dbSDimitry Andric int64_t CurOffset = Instrs[0].Offset; 45395ffd83dbSDimitry Andric for (auto &Instr : Instrs) { 45405ffd83dbSDimitry Andric if (CurOffset > Instr.Offset) 45415ffd83dbSDimitry Andric return NextI; 45425ffd83dbSDimitry Andric CurOffset = Instr.Offset + Instr.Size; 45435ffd83dbSDimitry Andric } 45445ffd83dbSDimitry Andric 45455ffd83dbSDimitry Andric // Find contiguous runs of tagged memory and emit shorter instruction 45465ffd83dbSDimitry Andric // sequencies for them when possible. 45475ffd83dbSDimitry Andric TagStoreEdit TSE(MBB, FirstZeroData); 4548bdd1243dSDimitry Andric std::optional<int64_t> EndOffset; 45495ffd83dbSDimitry Andric for (auto &Instr : Instrs) { 45505ffd83dbSDimitry Andric if (EndOffset && *EndOffset != Instr.Offset) { 45515ffd83dbSDimitry Andric // Found a gap. 455281ad6265SDimitry Andric TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false); 45535ffd83dbSDimitry Andric TSE.clear(); 45545ffd83dbSDimitry Andric } 45555ffd83dbSDimitry Andric 45565ffd83dbSDimitry Andric TSE.addInstruction(Instr); 45575ffd83dbSDimitry Andric EndOffset = Instr.Offset + Instr.Size; 45585ffd83dbSDimitry Andric } 45595ffd83dbSDimitry Andric 4560bdd1243dSDimitry Andric const MachineFunction *MF = MBB->getParent(); 456181ad6265SDimitry Andric // Multiple FP/SP updates in a loop cannot be described by CFI instructions. 4562bdd1243dSDimitry Andric TSE.emitCode( 4563bdd1243dSDimitry Andric InsertI, TFI, /*TryMergeSPUpdate = */ 4564bdd1243dSDimitry Andric !MF->getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(*MF)); 45655ffd83dbSDimitry Andric 45665ffd83dbSDimitry Andric return InsertI; 45675ffd83dbSDimitry Andric } 45685ffd83dbSDimitry Andric } // namespace 45695ffd83dbSDimitry Andric 45700fca6ea1SDimitry Andric MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, 45710fca6ea1SDimitry Andric const AArch64FrameLowering *TFI) { 45720fca6ea1SDimitry Andric MachineInstr &MI = *II; 45730fca6ea1SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 45740fca6ea1SDimitry Andric MachineFunction *MF = MBB->getParent(); 45750fca6ea1SDimitry Andric 45760fca6ea1SDimitry Andric if (MI.getOpcode() != AArch64::VGSavePseudo && 45770fca6ea1SDimitry Andric MI.getOpcode() != AArch64::VGRestorePseudo) 45780fca6ea1SDimitry Andric return II; 45790fca6ea1SDimitry Andric 45800fca6ea1SDimitry Andric SMEAttrs FuncAttrs(MF->getFunction()); 45810fca6ea1SDimitry Andric bool LocallyStreaming = 45820fca6ea1SDimitry Andric FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface(); 45830fca6ea1SDimitry Andric const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>(); 45840fca6ea1SDimitry Andric const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 45850fca6ea1SDimitry Andric const AArch64InstrInfo *TII = 45860fca6ea1SDimitry Andric MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); 45870fca6ea1SDimitry Andric 45880fca6ea1SDimitry Andric int64_t VGFrameIdx = 45890fca6ea1SDimitry Andric LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx(); 45900fca6ea1SDimitry Andric assert(VGFrameIdx != std::numeric_limits<int>::max() && 45910fca6ea1SDimitry Andric "Expected FrameIdx for VG"); 45920fca6ea1SDimitry Andric 45930fca6ea1SDimitry Andric unsigned CFIIndex; 45940fca6ea1SDimitry Andric if (MI.getOpcode() == AArch64::VGSavePseudo) { 45950fca6ea1SDimitry Andric const MachineFrameInfo &MFI = MF->getFrameInfo(); 45960fca6ea1SDimitry Andric int64_t Offset = 45970fca6ea1SDimitry Andric MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea(); 45980fca6ea1SDimitry Andric CFIIndex = MF->addFrameInst(MCCFIInstruction::createOffset( 45990fca6ea1SDimitry Andric nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset)); 46000fca6ea1SDimitry Andric } else 46010fca6ea1SDimitry Andric CFIIndex = MF->addFrameInst(MCCFIInstruction::createRestore( 46020fca6ea1SDimitry Andric nullptr, TRI->getDwarfRegNum(AArch64::VG, true))); 46030fca6ea1SDimitry Andric 46040fca6ea1SDimitry Andric MachineInstr *UnwindInst = BuildMI(*MBB, II, II->getDebugLoc(), 46050fca6ea1SDimitry Andric TII->get(TargetOpcode::CFI_INSTRUCTION)) 46060fca6ea1SDimitry Andric .addCFIIndex(CFIIndex); 46070fca6ea1SDimitry Andric 46080fca6ea1SDimitry Andric MI.eraseFromParent(); 46090fca6ea1SDimitry Andric return UnwindInst->getIterator(); 46100fca6ea1SDimitry Andric } 46110fca6ea1SDimitry Andric 46125ffd83dbSDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( 46135ffd83dbSDimitry Andric MachineFunction &MF, RegScavenger *RS = nullptr) const { 46145ffd83dbSDimitry Andric for (auto &BB : MF) 46150fca6ea1SDimitry Andric for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { 4616*71ac745dSDimitry Andric if (requiresSaveVG(MF)) 46170fca6ea1SDimitry Andric II = emitVGSaveRestore(II, this); 46180fca6ea1SDimitry Andric if (StackTaggingMergeSetTag) 46195ffd83dbSDimitry Andric II = tryMergeAdjacentSTG(II, this, RS); 46205ffd83dbSDimitry Andric } 46210fca6ea1SDimitry Andric } 46225ffd83dbSDimitry Andric 46235ffd83dbSDimitry Andric /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP 46245ffd83dbSDimitry Andric /// before the update. This is easily retrieved as it is exactly the offset 46255ffd83dbSDimitry Andric /// that is set in processFunctionBeforeFrameFinalized. 4626e8d8bef9SDimitry Andric StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP( 46275ffd83dbSDimitry Andric const MachineFunction &MF, int FI, Register &FrameReg, 46280b57cec5SDimitry Andric bool IgnoreSPUpdates) const { 46290b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 463062cfcf62SDimitry Andric if (IgnoreSPUpdates) { 46310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " 46320b57cec5SDimitry Andric << MFI.getObjectOffset(FI) << "\n"); 46330b57cec5SDimitry Andric FrameReg = AArch64::SP; 4634e8d8bef9SDimitry Andric return StackOffset::getFixed(MFI.getObjectOffset(FI)); 46350b57cec5SDimitry Andric } 46360b57cec5SDimitry Andric 4637349cc55cSDimitry Andric // Go to common code if we cannot provide sp + offset. 4638349cc55cSDimitry Andric if (MFI.hasVarSizedObjects() || 4639349cc55cSDimitry Andric MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() || 4640349cc55cSDimitry Andric MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF)) 464162cfcf62SDimitry Andric return getFrameIndexReference(MF, FI, FrameReg); 4642349cc55cSDimitry Andric 4643349cc55cSDimitry Andric FrameReg = AArch64::SP; 4644349cc55cSDimitry Andric return getStackOffset(MF, MFI.getObjectOffset(FI)); 464562cfcf62SDimitry Andric } 464662cfcf62SDimitry Andric 46470b57cec5SDimitry Andric /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve 46480b57cec5SDimitry Andric /// the parent's frame pointer 46490b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHParentFrameOffset( 46500b57cec5SDimitry Andric const MachineFunction &MF) const { 46510b57cec5SDimitry Andric return 0; 46520b57cec5SDimitry Andric } 46530b57cec5SDimitry Andric 46540b57cec5SDimitry Andric /// Funclets only need to account for space for the callee saved registers, 46550b57cec5SDimitry Andric /// as the locals are accounted for in the parent's stack frame. 46560b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( 46570b57cec5SDimitry Andric const MachineFunction &MF) const { 46580b57cec5SDimitry Andric // This is the size of the pushed CSRs. 46590b57cec5SDimitry Andric unsigned CSSize = 46600b57cec5SDimitry Andric MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize(); 46610b57cec5SDimitry Andric // This is the amount of stack a funclet needs to allocate. 46620b57cec5SDimitry Andric return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), 46635ffd83dbSDimitry Andric getStackAlign()); 46640b57cec5SDimitry Andric } 4665e8d8bef9SDimitry Andric 4666e8d8bef9SDimitry Andric namespace { 4667e8d8bef9SDimitry Andric struct FrameObject { 4668e8d8bef9SDimitry Andric bool IsValid = false; 4669e8d8bef9SDimitry Andric // Index of the object in MFI. 4670e8d8bef9SDimitry Andric int ObjectIndex = 0; 4671e8d8bef9SDimitry Andric // Group ID this object belongs to. 4672e8d8bef9SDimitry Andric int GroupIndex = -1; 4673e8d8bef9SDimitry Andric // This object should be placed first (closest to SP). 4674e8d8bef9SDimitry Andric bool ObjectFirst = false; 4675e8d8bef9SDimitry Andric // This object's group (which always contains the object with 4676e8d8bef9SDimitry Andric // ObjectFirst==true) should be placed first. 4677e8d8bef9SDimitry Andric bool GroupFirst = false; 46780fca6ea1SDimitry Andric 46790fca6ea1SDimitry Andric // Used to distinguish between FP and GPR accesses. The values are decided so 46800fca6ea1SDimitry Andric // that they sort FPR < Hazard < GPR and they can be or'd together. 46810fca6ea1SDimitry Andric unsigned Accesses = 0; 46820fca6ea1SDimitry Andric enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 }; 4683e8d8bef9SDimitry Andric }; 4684e8d8bef9SDimitry Andric 4685e8d8bef9SDimitry Andric class GroupBuilder { 4686e8d8bef9SDimitry Andric SmallVector<int, 8> CurrentMembers; 4687e8d8bef9SDimitry Andric int NextGroupIndex = 0; 4688e8d8bef9SDimitry Andric std::vector<FrameObject> &Objects; 4689e8d8bef9SDimitry Andric 4690e8d8bef9SDimitry Andric public: 4691e8d8bef9SDimitry Andric GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {} 4692e8d8bef9SDimitry Andric void AddMember(int Index) { CurrentMembers.push_back(Index); } 4693e8d8bef9SDimitry Andric void EndCurrentGroup() { 4694e8d8bef9SDimitry Andric if (CurrentMembers.size() > 1) { 4695e8d8bef9SDimitry Andric // Create a new group with the current member list. This might remove them 4696e8d8bef9SDimitry Andric // from their pre-existing groups. That's OK, dealing with overlapping 4697e8d8bef9SDimitry Andric // groups is too hard and unlikely to make a difference. 4698e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "group:"); 4699e8d8bef9SDimitry Andric for (int Index : CurrentMembers) { 4700e8d8bef9SDimitry Andric Objects[Index].GroupIndex = NextGroupIndex; 4701e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " " << Index); 4702e8d8bef9SDimitry Andric } 4703e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 4704e8d8bef9SDimitry Andric NextGroupIndex++; 4705e8d8bef9SDimitry Andric } 4706e8d8bef9SDimitry Andric CurrentMembers.clear(); 4707e8d8bef9SDimitry Andric } 4708e8d8bef9SDimitry Andric }; 4709e8d8bef9SDimitry Andric 4710e8d8bef9SDimitry Andric bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) { 4711e8d8bef9SDimitry Andric // Objects at a lower index are closer to FP; objects at a higher index are 4712e8d8bef9SDimitry Andric // closer to SP. 4713e8d8bef9SDimitry Andric // 4714e8d8bef9SDimitry Andric // For consistency in our comparison, all invalid objects are placed 4715e8d8bef9SDimitry Andric // at the end. This also allows us to stop walking when we hit the 4716e8d8bef9SDimitry Andric // first invalid item after it's all sorted. 4717e8d8bef9SDimitry Andric // 47180fca6ea1SDimitry Andric // If we want to include a stack hazard region, order FPR accesses < the 47190fca6ea1SDimitry Andric // hazard object < GPRs accesses in order to create a separation between the 47200fca6ea1SDimitry Andric // two. For the Accesses field 1 = FPR, 2 = Hazard Object, 4 = GPR. 47210fca6ea1SDimitry Andric // 47220fca6ea1SDimitry Andric // Otherwise the "first" object goes first (closest to SP), followed by the 47230fca6ea1SDimitry Andric // members of the "first" group. 4724e8d8bef9SDimitry Andric // 4725e8d8bef9SDimitry Andric // The rest are sorted by the group index to keep the groups together. 4726e8d8bef9SDimitry Andric // Higher numbered groups are more likely to be around longer (i.e. untagged 4727e8d8bef9SDimitry Andric // in the function epilogue and not at some earlier point). Place them closer 4728e8d8bef9SDimitry Andric // to SP. 4729e8d8bef9SDimitry Andric // 4730e8d8bef9SDimitry Andric // If all else equal, sort by the object index to keep the objects in the 4731e8d8bef9SDimitry Andric // original order. 47320fca6ea1SDimitry Andric return std::make_tuple(!A.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst, 47330fca6ea1SDimitry Andric A.GroupIndex, A.ObjectIndex) < 47340fca6ea1SDimitry Andric std::make_tuple(!B.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst, 47350fca6ea1SDimitry Andric B.GroupIndex, B.ObjectIndex); 4736e8d8bef9SDimitry Andric } 4737e8d8bef9SDimitry Andric } // namespace 4738e8d8bef9SDimitry Andric 4739e8d8bef9SDimitry Andric void AArch64FrameLowering::orderFrameObjects( 4740e8d8bef9SDimitry Andric const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { 4741e8d8bef9SDimitry Andric if (!OrderFrameObjects || ObjectsToAllocate.empty()) 4742e8d8bef9SDimitry Andric return; 4743e8d8bef9SDimitry Andric 47440fca6ea1SDimitry Andric const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); 4745e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 4746e8d8bef9SDimitry Andric std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd()); 4747e8d8bef9SDimitry Andric for (auto &Obj : ObjectsToAllocate) { 4748e8d8bef9SDimitry Andric FrameObjects[Obj].IsValid = true; 4749e8d8bef9SDimitry Andric FrameObjects[Obj].ObjectIndex = Obj; 4750e8d8bef9SDimitry Andric } 4751e8d8bef9SDimitry Andric 47520fca6ea1SDimitry Andric // Identify FPR vs GPR slots for hazards, and stack slots that are tagged at 47530fca6ea1SDimitry Andric // the same time. 4754e8d8bef9SDimitry Andric GroupBuilder GB(FrameObjects); 4755e8d8bef9SDimitry Andric for (auto &MBB : MF) { 4756e8d8bef9SDimitry Andric for (auto &MI : MBB) { 4757e8d8bef9SDimitry Andric if (MI.isDebugInstr()) 4758e8d8bef9SDimitry Andric continue; 47590fca6ea1SDimitry Andric 47600fca6ea1SDimitry Andric if (AFI.hasStackHazardSlotIndex()) { 47610fca6ea1SDimitry Andric std::optional<int> FI = getLdStFrameID(MI, MFI); 47620fca6ea1SDimitry Andric if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) { 47630fca6ea1SDimitry Andric if (MFI.getStackID(*FI) == TargetStackID::ScalableVector || 47640fca6ea1SDimitry Andric AArch64InstrInfo::isFpOrNEON(MI)) 47650fca6ea1SDimitry Andric FrameObjects[*FI].Accesses |= FrameObject::AccessFPR; 47660fca6ea1SDimitry Andric else 47670fca6ea1SDimitry Andric FrameObjects[*FI].Accesses |= FrameObject::AccessGPR; 47680fca6ea1SDimitry Andric } 47690fca6ea1SDimitry Andric } 47700fca6ea1SDimitry Andric 4771e8d8bef9SDimitry Andric int OpIndex; 4772e8d8bef9SDimitry Andric switch (MI.getOpcode()) { 4773e8d8bef9SDimitry Andric case AArch64::STGloop: 4774e8d8bef9SDimitry Andric case AArch64::STZGloop: 4775e8d8bef9SDimitry Andric OpIndex = 3; 4776e8d8bef9SDimitry Andric break; 477706c3fb27SDimitry Andric case AArch64::STGi: 477806c3fb27SDimitry Andric case AArch64::STZGi: 477906c3fb27SDimitry Andric case AArch64::ST2Gi: 478006c3fb27SDimitry Andric case AArch64::STZ2Gi: 4781e8d8bef9SDimitry Andric OpIndex = 1; 4782e8d8bef9SDimitry Andric break; 4783e8d8bef9SDimitry Andric default: 4784e8d8bef9SDimitry Andric OpIndex = -1; 4785e8d8bef9SDimitry Andric } 4786e8d8bef9SDimitry Andric 4787e8d8bef9SDimitry Andric int TaggedFI = -1; 4788e8d8bef9SDimitry Andric if (OpIndex >= 0) { 4789e8d8bef9SDimitry Andric const MachineOperand &MO = MI.getOperand(OpIndex); 4790e8d8bef9SDimitry Andric if (MO.isFI()) { 4791e8d8bef9SDimitry Andric int FI = MO.getIndex(); 4792e8d8bef9SDimitry Andric if (FI >= 0 && FI < MFI.getObjectIndexEnd() && 4793e8d8bef9SDimitry Andric FrameObjects[FI].IsValid) 4794e8d8bef9SDimitry Andric TaggedFI = FI; 4795e8d8bef9SDimitry Andric } 4796e8d8bef9SDimitry Andric } 4797e8d8bef9SDimitry Andric 4798e8d8bef9SDimitry Andric // If this is a stack tagging instruction for a slot that is not part of a 4799e8d8bef9SDimitry Andric // group yet, either start a new group or add it to the current one. 4800e8d8bef9SDimitry Andric if (TaggedFI >= 0) 4801e8d8bef9SDimitry Andric GB.AddMember(TaggedFI); 4802e8d8bef9SDimitry Andric else 4803e8d8bef9SDimitry Andric GB.EndCurrentGroup(); 4804e8d8bef9SDimitry Andric } 4805e8d8bef9SDimitry Andric // Groups should never span multiple basic blocks. 4806e8d8bef9SDimitry Andric GB.EndCurrentGroup(); 4807e8d8bef9SDimitry Andric } 4808e8d8bef9SDimitry Andric 48090fca6ea1SDimitry Andric if (AFI.hasStackHazardSlotIndex()) { 48100fca6ea1SDimitry Andric FrameObjects[AFI.getStackHazardSlotIndex()].Accesses = 48110fca6ea1SDimitry Andric FrameObject::AccessHazard; 48120fca6ea1SDimitry Andric // If a stack object is unknown or both GPR and FPR, sort it into GPR. 48130fca6ea1SDimitry Andric for (auto &Obj : FrameObjects) 48140fca6ea1SDimitry Andric if (!Obj.Accesses || 48150fca6ea1SDimitry Andric Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR)) 48160fca6ea1SDimitry Andric Obj.Accesses = FrameObject::AccessGPR; 48170fca6ea1SDimitry Andric } 48180fca6ea1SDimitry Andric 4819e8d8bef9SDimitry Andric // If the function's tagged base pointer is pinned to a stack slot, we want to 4820e8d8bef9SDimitry Andric // put that slot first when possible. This will likely place it at SP + 0, 4821e8d8bef9SDimitry Andric // and save one instruction when generating the base pointer because IRG does 4822e8d8bef9SDimitry Andric // not allow an immediate offset. 4823bdd1243dSDimitry Andric std::optional<int> TBPI = AFI.getTaggedBasePointerIndex(); 4824e8d8bef9SDimitry Andric if (TBPI) { 4825e8d8bef9SDimitry Andric FrameObjects[*TBPI].ObjectFirst = true; 4826e8d8bef9SDimitry Andric FrameObjects[*TBPI].GroupFirst = true; 4827e8d8bef9SDimitry Andric int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex; 4828e8d8bef9SDimitry Andric if (FirstGroupIndex >= 0) 4829e8d8bef9SDimitry Andric for (FrameObject &Object : FrameObjects) 4830e8d8bef9SDimitry Andric if (Object.GroupIndex == FirstGroupIndex) 4831e8d8bef9SDimitry Andric Object.GroupFirst = true; 4832e8d8bef9SDimitry Andric } 4833e8d8bef9SDimitry Andric 4834e8d8bef9SDimitry Andric llvm::stable_sort(FrameObjects, FrameObjectCompare); 4835e8d8bef9SDimitry Andric 4836e8d8bef9SDimitry Andric int i = 0; 4837e8d8bef9SDimitry Andric for (auto &Obj : FrameObjects) { 4838e8d8bef9SDimitry Andric // All invalid items are sorted at the end, so it's safe to stop. 4839e8d8bef9SDimitry Andric if (!Obj.IsValid) 4840e8d8bef9SDimitry Andric break; 4841e8d8bef9SDimitry Andric ObjectsToAllocate[i++] = Obj.ObjectIndex; 4842e8d8bef9SDimitry Andric } 4843e8d8bef9SDimitry Andric 48440fca6ea1SDimitry Andric LLVM_DEBUG({ 48450fca6ea1SDimitry Andric dbgs() << "Final frame order:\n"; 48460fca6ea1SDimitry Andric for (auto &Obj : FrameObjects) { 4847e8d8bef9SDimitry Andric if (!Obj.IsValid) 4848e8d8bef9SDimitry Andric break; 4849e8d8bef9SDimitry Andric dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex; 4850e8d8bef9SDimitry Andric if (Obj.ObjectFirst) 4851e8d8bef9SDimitry Andric dbgs() << ", first"; 4852e8d8bef9SDimitry Andric if (Obj.GroupFirst) 4853e8d8bef9SDimitry Andric dbgs() << ", group-first"; 4854e8d8bef9SDimitry Andric dbgs() << "\n"; 48550fca6ea1SDimitry Andric } 4856e8d8bef9SDimitry Andric }); 4857e8d8bef9SDimitry Andric } 48585f757f3fSDimitry Andric 48595f757f3fSDimitry Andric /// Emit a loop to decrement SP until it is equal to TargetReg, with probes at 48605f757f3fSDimitry Andric /// least every ProbeSize bytes. Returns an iterator of the first instruction 48615f757f3fSDimitry Andric /// after the loop. The difference between SP and TargetReg must be an exact 48625f757f3fSDimitry Andric /// multiple of ProbeSize. 48635f757f3fSDimitry Andric MachineBasicBlock::iterator 48645f757f3fSDimitry Andric AArch64FrameLowering::inlineStackProbeLoopExactMultiple( 48655f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, int64_t ProbeSize, 48665f757f3fSDimitry Andric Register TargetReg) const { 48675f757f3fSDimitry Andric MachineBasicBlock &MBB = *MBBI->getParent(); 48685f757f3fSDimitry Andric MachineFunction &MF = *MBB.getParent(); 48695f757f3fSDimitry Andric const AArch64InstrInfo *TII = 48705f757f3fSDimitry Andric MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); 48715f757f3fSDimitry Andric DebugLoc DL = MBB.findDebugLoc(MBBI); 48725f757f3fSDimitry Andric 48735f757f3fSDimitry Andric MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 48745f757f3fSDimitry Andric MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 48755f757f3fSDimitry Andric MF.insert(MBBInsertPoint, LoopMBB); 48765f757f3fSDimitry Andric MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 48775f757f3fSDimitry Andric MF.insert(MBBInsertPoint, ExitMBB); 48785f757f3fSDimitry Andric 48795f757f3fSDimitry Andric // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable 48805f757f3fSDimitry Andric // in SUB). 48815f757f3fSDimitry Andric emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP, 48825f757f3fSDimitry Andric StackOffset::getFixed(-ProbeSize), TII, 48835f757f3fSDimitry Andric MachineInstr::FrameSetup); 48845f757f3fSDimitry Andric // STR XZR, [SP] 48855f757f3fSDimitry Andric BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui)) 48865f757f3fSDimitry Andric .addReg(AArch64::XZR) 48875f757f3fSDimitry Andric .addReg(AArch64::SP) 48885f757f3fSDimitry Andric .addImm(0) 48895f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 48905f757f3fSDimitry Andric // CMP SP, TargetReg 48915f757f3fSDimitry Andric BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64), 48925f757f3fSDimitry Andric AArch64::XZR) 48935f757f3fSDimitry Andric .addReg(AArch64::SP) 48945f757f3fSDimitry Andric .addReg(TargetReg) 48955f757f3fSDimitry Andric .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) 48965f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 48975f757f3fSDimitry Andric // B.CC Loop 48985f757f3fSDimitry Andric BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc)) 48995f757f3fSDimitry Andric .addImm(AArch64CC::NE) 49005f757f3fSDimitry Andric .addMBB(LoopMBB) 49015f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49025f757f3fSDimitry Andric 49035f757f3fSDimitry Andric LoopMBB->addSuccessor(ExitMBB); 49045f757f3fSDimitry Andric LoopMBB->addSuccessor(LoopMBB); 49055f757f3fSDimitry Andric // Synthesize the exit MBB. 49065f757f3fSDimitry Andric ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end()); 49075f757f3fSDimitry Andric ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 49085f757f3fSDimitry Andric MBB.addSuccessor(LoopMBB); 49095f757f3fSDimitry Andric // Update liveins. 49100fca6ea1SDimitry Andric fullyRecomputeLiveIns({ExitMBB, LoopMBB}); 49115f757f3fSDimitry Andric 49125f757f3fSDimitry Andric return ExitMBB->begin(); 49135f757f3fSDimitry Andric } 49145f757f3fSDimitry Andric 49155f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbeFixed( 49165f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize, 49175f757f3fSDimitry Andric StackOffset CFAOffset) const { 49185f757f3fSDimitry Andric MachineBasicBlock *MBB = MBBI->getParent(); 49195f757f3fSDimitry Andric MachineFunction &MF = *MBB->getParent(); 49205f757f3fSDimitry Andric const AArch64InstrInfo *TII = 49215f757f3fSDimitry Andric MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); 49225f757f3fSDimitry Andric AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 49235f757f3fSDimitry Andric bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF); 49245f757f3fSDimitry Andric bool HasFP = hasFP(MF); 49255f757f3fSDimitry Andric 49265f757f3fSDimitry Andric DebugLoc DL; 49275f757f3fSDimitry Andric int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize(); 49285f757f3fSDimitry Andric int64_t NumBlocks = FrameSize / ProbeSize; 49295f757f3fSDimitry Andric int64_t ResidualSize = FrameSize % ProbeSize; 49305f757f3fSDimitry Andric 49315f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, " 49325f757f3fSDimitry Andric << NumBlocks << " blocks of " << ProbeSize 49335f757f3fSDimitry Andric << " bytes, plus " << ResidualSize << " bytes\n"); 49345f757f3fSDimitry Andric 49355f757f3fSDimitry Andric // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or 49365f757f3fSDimitry Andric // ordinary loop. 49375f757f3fSDimitry Andric if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) { 49385f757f3fSDimitry Andric for (int i = 0; i < NumBlocks; ++i) { 49395f757f3fSDimitry Andric // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not 49405f757f3fSDimitry Andric // encodable in a SUB). 49415f757f3fSDimitry Andric emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP, 49425f757f3fSDimitry Andric StackOffset::getFixed(-ProbeSize), TII, 49435f757f3fSDimitry Andric MachineInstr::FrameSetup, false, false, nullptr, 49445f757f3fSDimitry Andric EmitAsyncCFI && !HasFP, CFAOffset); 49455f757f3fSDimitry Andric CFAOffset += StackOffset::getFixed(ProbeSize); 49465f757f3fSDimitry Andric // STR XZR, [SP] 49475f757f3fSDimitry Andric BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui)) 49485f757f3fSDimitry Andric .addReg(AArch64::XZR) 49495f757f3fSDimitry Andric .addReg(AArch64::SP) 49505f757f3fSDimitry Andric .addImm(0) 49515f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49525f757f3fSDimitry Andric } 49535f757f3fSDimitry Andric } else if (NumBlocks != 0) { 49545f757f3fSDimitry Andric // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not 49555f757f3fSDimitry Andric // encodable in ADD). ScrathReg may temporarily become the CFA register. 49565f757f3fSDimitry Andric emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP, 49575f757f3fSDimitry Andric StackOffset::getFixed(-ProbeSize * NumBlocks), TII, 49585f757f3fSDimitry Andric MachineInstr::FrameSetup, false, false, nullptr, 49595f757f3fSDimitry Andric EmitAsyncCFI && !HasFP, CFAOffset); 49605f757f3fSDimitry Andric CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks); 49615f757f3fSDimitry Andric MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg); 49625f757f3fSDimitry Andric MBB = MBBI->getParent(); 49635f757f3fSDimitry Andric if (EmitAsyncCFI && !HasFP) { 49645f757f3fSDimitry Andric // Set the CFA register back to SP. 49655f757f3fSDimitry Andric const AArch64RegisterInfo &RegInfo = 49665f757f3fSDimitry Andric *MF.getSubtarget<AArch64Subtarget>().getRegisterInfo(); 49675f757f3fSDimitry Andric unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true); 49685f757f3fSDimitry Andric unsigned CFIIndex = 49695f757f3fSDimitry Andric MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 49705f757f3fSDimitry Andric BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 49715f757f3fSDimitry Andric .addCFIIndex(CFIIndex) 49725f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49735f757f3fSDimitry Andric } 49745f757f3fSDimitry Andric } 49755f757f3fSDimitry Andric 49765f757f3fSDimitry Andric if (ResidualSize != 0) { 49775f757f3fSDimitry Andric // SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable 49785f757f3fSDimitry Andric // in SUB). 49795f757f3fSDimitry Andric emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP, 49805f757f3fSDimitry Andric StackOffset::getFixed(-ResidualSize), TII, 49815f757f3fSDimitry Andric MachineInstr::FrameSetup, false, false, nullptr, 49825f757f3fSDimitry Andric EmitAsyncCFI && !HasFP, CFAOffset); 49835f757f3fSDimitry Andric if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) { 49845f757f3fSDimitry Andric // STR XZR, [SP] 49855f757f3fSDimitry Andric BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui)) 49865f757f3fSDimitry Andric .addReg(AArch64::XZR) 49875f757f3fSDimitry Andric .addReg(AArch64::SP) 49885f757f3fSDimitry Andric .addImm(0) 49895f757f3fSDimitry Andric .setMIFlags(MachineInstr::FrameSetup); 49905f757f3fSDimitry Andric } 49915f757f3fSDimitry Andric } 49925f757f3fSDimitry Andric } 49935f757f3fSDimitry Andric 49945f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF, 49955f757f3fSDimitry Andric MachineBasicBlock &MBB) const { 49965f757f3fSDimitry Andric // Get the instructions that need to be replaced. We emit at most two of 49975f757f3fSDimitry Andric // these. Remember them in order to avoid complications coming from the need 49985f757f3fSDimitry Andric // to traverse the block while potentially creating more blocks. 49995f757f3fSDimitry Andric SmallVector<MachineInstr *, 4> ToReplace; 50005f757f3fSDimitry Andric for (MachineInstr &MI : MBB) 50015f757f3fSDimitry Andric if (MI.getOpcode() == AArch64::PROBED_STACKALLOC || 50025f757f3fSDimitry Andric MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR) 50035f757f3fSDimitry Andric ToReplace.push_back(&MI); 50045f757f3fSDimitry Andric 50055f757f3fSDimitry Andric for (MachineInstr *MI : ToReplace) { 50065f757f3fSDimitry Andric if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) { 50075f757f3fSDimitry Andric Register ScratchReg = MI->getOperand(0).getReg(); 50085f757f3fSDimitry Andric int64_t FrameSize = MI->getOperand(1).getImm(); 50095f757f3fSDimitry Andric StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(), 50105f757f3fSDimitry Andric MI->getOperand(3).getImm()); 50115f757f3fSDimitry Andric inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize, 50125f757f3fSDimitry Andric CFAOffset); 50135f757f3fSDimitry Andric } else { 50145f757f3fSDimitry Andric assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR && 50155f757f3fSDimitry Andric "Stack probe pseudo-instruction expected"); 50165f757f3fSDimitry Andric const AArch64InstrInfo *TII = 50175f757f3fSDimitry Andric MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo(); 50185f757f3fSDimitry Andric Register TargetReg = MI->getOperand(0).getReg(); 50195f757f3fSDimitry Andric (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true); 50205f757f3fSDimitry Andric } 50215f757f3fSDimitry Andric MI->eraseFromParent(); 50225f757f3fSDimitry Andric } 50235f757f3fSDimitry Andric } 502462987288SDimitry Andric 502562987288SDimitry Andric struct StackAccess { 502662987288SDimitry Andric enum AccessType { 502762987288SDimitry Andric NotAccessed = 0, // Stack object not accessed by load/store instructions. 502862987288SDimitry Andric GPR = 1 << 0, // A general purpose register. 502962987288SDimitry Andric PPR = 1 << 1, // A predicate register. 503062987288SDimitry Andric FPR = 1 << 2, // A floating point/Neon/SVE register. 503162987288SDimitry Andric }; 503262987288SDimitry Andric 503362987288SDimitry Andric int Idx; 503462987288SDimitry Andric StackOffset Offset; 503562987288SDimitry Andric int64_t Size; 503662987288SDimitry Andric unsigned AccessTypes; 503762987288SDimitry Andric 503862987288SDimitry Andric StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {} 503962987288SDimitry Andric 504062987288SDimitry Andric bool operator<(const StackAccess &Rhs) const { 504162987288SDimitry Andric return std::make_tuple(start(), Idx) < 504262987288SDimitry Andric std::make_tuple(Rhs.start(), Rhs.Idx); 504362987288SDimitry Andric } 504462987288SDimitry Andric 504562987288SDimitry Andric bool isCPU() const { 504662987288SDimitry Andric // Predicate register load and store instructions execute on the CPU. 504762987288SDimitry Andric return AccessTypes & (AccessType::GPR | AccessType::PPR); 504862987288SDimitry Andric } 504962987288SDimitry Andric bool isSME() const { return AccessTypes & AccessType::FPR; } 505062987288SDimitry Andric bool isMixed() const { return isCPU() && isSME(); } 505162987288SDimitry Andric 505262987288SDimitry Andric int64_t start() const { return Offset.getFixed() + Offset.getScalable(); } 505362987288SDimitry Andric int64_t end() const { return start() + Size; } 505462987288SDimitry Andric 505562987288SDimitry Andric std::string getTypeString() const { 505662987288SDimitry Andric switch (AccessTypes) { 505762987288SDimitry Andric case AccessType::FPR: 505862987288SDimitry Andric return "FPR"; 505962987288SDimitry Andric case AccessType::PPR: 506062987288SDimitry Andric return "PPR"; 506162987288SDimitry Andric case AccessType::GPR: 506262987288SDimitry Andric return "GPR"; 506362987288SDimitry Andric case AccessType::NotAccessed: 506462987288SDimitry Andric return "NA"; 506562987288SDimitry Andric default: 506662987288SDimitry Andric return "Mixed"; 506762987288SDimitry Andric } 506862987288SDimitry Andric } 506962987288SDimitry Andric 507062987288SDimitry Andric void print(raw_ostream &OS) const { 507162987288SDimitry Andric OS << getTypeString() << " stack object at [SP" 507262987288SDimitry Andric << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed(); 507362987288SDimitry Andric if (Offset.getScalable()) 507462987288SDimitry Andric OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable() 507562987288SDimitry Andric << " * vscale"; 507662987288SDimitry Andric OS << "]"; 507762987288SDimitry Andric } 507862987288SDimitry Andric }; 507962987288SDimitry Andric 508062987288SDimitry Andric static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) { 508162987288SDimitry Andric SA.print(OS); 508262987288SDimitry Andric return OS; 508362987288SDimitry Andric } 508462987288SDimitry Andric 508562987288SDimitry Andric void AArch64FrameLowering::emitRemarks( 508662987288SDimitry Andric const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const { 508762987288SDimitry Andric 508862987288SDimitry Andric SMEAttrs Attrs(MF.getFunction()); 508962987288SDimitry Andric if (Attrs.hasNonStreamingInterfaceAndBody()) 509062987288SDimitry Andric return; 509162987288SDimitry Andric 509262987288SDimitry Andric const uint64_t HazardSize = 509362987288SDimitry Andric (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize; 509462987288SDimitry Andric 509562987288SDimitry Andric if (HazardSize == 0) 509662987288SDimitry Andric return; 509762987288SDimitry Andric 509862987288SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 509962987288SDimitry Andric // Bail if function has no stack objects. 510062987288SDimitry Andric if (!MFI.hasStackObjects()) 510162987288SDimitry Andric return; 510262987288SDimitry Andric 510362987288SDimitry Andric std::vector<StackAccess> StackAccesses(MFI.getNumObjects()); 510462987288SDimitry Andric 510562987288SDimitry Andric size_t NumFPLdSt = 0; 510662987288SDimitry Andric size_t NumNonFPLdSt = 0; 510762987288SDimitry Andric 510862987288SDimitry Andric // Collect stack accesses via Load/Store instructions. 510962987288SDimitry Andric for (const MachineBasicBlock &MBB : MF) { 511062987288SDimitry Andric for (const MachineInstr &MI : MBB) { 511162987288SDimitry Andric if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1) 511262987288SDimitry Andric continue; 511362987288SDimitry Andric for (MachineMemOperand *MMO : MI.memoperands()) { 511462987288SDimitry Andric std::optional<int> FI = getMMOFrameID(MMO, MFI); 511562987288SDimitry Andric if (FI && !MFI.isDeadObjectIndex(*FI)) { 511662987288SDimitry Andric int FrameIdx = *FI; 511762987288SDimitry Andric 511862987288SDimitry Andric size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects(); 511962987288SDimitry Andric if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) { 512062987288SDimitry Andric StackAccesses[ArrIdx].Idx = FrameIdx; 512162987288SDimitry Andric StackAccesses[ArrIdx].Offset = 512262987288SDimitry Andric getFrameIndexReferenceFromSP(MF, FrameIdx); 512362987288SDimitry Andric StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx); 512462987288SDimitry Andric } 512562987288SDimitry Andric 512662987288SDimitry Andric unsigned RegTy = StackAccess::AccessType::GPR; 512762987288SDimitry Andric if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) { 512862987288SDimitry Andric if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg())) 512962987288SDimitry Andric RegTy = StackAccess::PPR; 513062987288SDimitry Andric else 513162987288SDimitry Andric RegTy = StackAccess::FPR; 513262987288SDimitry Andric } else if (AArch64InstrInfo::isFpOrNEON(MI)) { 513362987288SDimitry Andric RegTy = StackAccess::FPR; 513462987288SDimitry Andric } 513562987288SDimitry Andric 513662987288SDimitry Andric StackAccesses[ArrIdx].AccessTypes |= RegTy; 513762987288SDimitry Andric 513862987288SDimitry Andric if (RegTy == StackAccess::FPR) 513962987288SDimitry Andric ++NumFPLdSt; 514062987288SDimitry Andric else 514162987288SDimitry Andric ++NumNonFPLdSt; 514262987288SDimitry Andric } 514362987288SDimitry Andric } 514462987288SDimitry Andric } 514562987288SDimitry Andric } 514662987288SDimitry Andric 514762987288SDimitry Andric if (NumFPLdSt == 0 || NumNonFPLdSt == 0) 514862987288SDimitry Andric return; 514962987288SDimitry Andric 515062987288SDimitry Andric llvm::sort(StackAccesses); 515162987288SDimitry Andric StackAccesses.erase(llvm::remove_if(StackAccesses, 515262987288SDimitry Andric [](const StackAccess &S) { 515362987288SDimitry Andric return S.AccessTypes == 515462987288SDimitry Andric StackAccess::NotAccessed; 515562987288SDimitry Andric }), 515662987288SDimitry Andric StackAccesses.end()); 515762987288SDimitry Andric 515862987288SDimitry Andric SmallVector<const StackAccess *> MixedObjects; 515962987288SDimitry Andric SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs; 516062987288SDimitry Andric 516162987288SDimitry Andric if (StackAccesses.front().isMixed()) 516262987288SDimitry Andric MixedObjects.push_back(&StackAccesses.front()); 516362987288SDimitry Andric 516462987288SDimitry Andric for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end()); 516562987288SDimitry Andric It != End; ++It) { 516662987288SDimitry Andric const auto &First = *It; 516762987288SDimitry Andric const auto &Second = *(It + 1); 516862987288SDimitry Andric 516962987288SDimitry Andric if (Second.isMixed()) 517062987288SDimitry Andric MixedObjects.push_back(&Second); 517162987288SDimitry Andric 517262987288SDimitry Andric if ((First.isSME() && Second.isCPU()) || 517362987288SDimitry Andric (First.isCPU() && Second.isSME())) { 517462987288SDimitry Andric uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end()); 517562987288SDimitry Andric if (Distance < HazardSize) 517662987288SDimitry Andric HazardPairs.emplace_back(&First, &Second); 517762987288SDimitry Andric } 517862987288SDimitry Andric } 517962987288SDimitry Andric 518062987288SDimitry Andric auto EmitRemark = [&](llvm::StringRef Str) { 518162987288SDimitry Andric ORE->emit([&]() { 518262987288SDimitry Andric auto R = MachineOptimizationRemarkAnalysis( 518362987288SDimitry Andric "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front()); 518462987288SDimitry Andric return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str; 518562987288SDimitry Andric }); 518662987288SDimitry Andric }; 518762987288SDimitry Andric 518862987288SDimitry Andric for (const auto &P : HazardPairs) 518962987288SDimitry Andric EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str()); 519062987288SDimitry Andric 519162987288SDimitry Andric for (const auto *Obj : MixedObjects) 519262987288SDimitry Andric EmitRemark( 519362987288SDimitry Andric formatv("{0} accessed by both GP and FP instructions", *Obj).str()); 519462987288SDimitry Andric } 5195