xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (revision 71ac745d76c3ba442e753daff1870893f272b29d)
10b57cec5SDimitry Andric //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains the AArch64 implementation of TargetFrameLowering class.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // On AArch64, stack frames are structured as follows:
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // The stack grows downward.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // All of the individual frame areas on the frame below are optional, i.e. it's
160b57cec5SDimitry Andric // possible to create a function so that the particular area isn't present
170b57cec5SDimitry Andric // in the frame.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // At function entry, the "frame" looks as follows:
200b57cec5SDimitry Andric //
210b57cec5SDimitry Andric // |                                   | Higher address
220b57cec5SDimitry Andric // |-----------------------------------|
230b57cec5SDimitry Andric // |                                   |
240b57cec5SDimitry Andric // | arguments passed on the stack     |
250b57cec5SDimitry Andric // |                                   |
260b57cec5SDimitry Andric // |-----------------------------------| <- sp
270b57cec5SDimitry Andric // |                                   | Lower address
280b57cec5SDimitry Andric //
290b57cec5SDimitry Andric //
300b57cec5SDimitry Andric // After the prologue has run, the frame has the following general structure.
310b57cec5SDimitry Andric // Note that this doesn't depict the case where a red-zone is used. Also,
320b57cec5SDimitry Andric // technically the last frame area (VLAs) doesn't get created until in the
330b57cec5SDimitry Andric // main function body, after the prologue is run. However, it's depicted here
340b57cec5SDimitry Andric // for completeness.
350b57cec5SDimitry Andric //
360b57cec5SDimitry Andric // |                                   | Higher address
370b57cec5SDimitry Andric // |-----------------------------------|
380b57cec5SDimitry Andric // |                                   |
390b57cec5SDimitry Andric // | arguments passed on the stack     |
400b57cec5SDimitry Andric // |                                   |
410b57cec5SDimitry Andric // |-----------------------------------|
420b57cec5SDimitry Andric // |                                   |
430b57cec5SDimitry Andric // | (Win64 only) varargs from reg     |
440b57cec5SDimitry Andric // |                                   |
450b57cec5SDimitry Andric // |-----------------------------------|
460b57cec5SDimitry Andric // |                                   |
478bcb0991SDimitry Andric // | callee-saved gpr registers        | <--.
488bcb0991SDimitry Andric // |                                   |    | On Darwin platforms these
498bcb0991SDimitry Andric // |- - - - - - - - - - - - - - - - - -|    | callee saves are swapped,
50fe6060f1SDimitry Andric // | prev_lr                           |    | (frame record first)
51fe6060f1SDimitry Andric // | prev_fp                           | <--'
52fe6060f1SDimitry Andric // | async context if needed           |
530b57cec5SDimitry Andric // | (a.k.a. "frame record")           |
540b57cec5SDimitry Andric // |-----------------------------------| <- fp(=x29)
550fca6ea1SDimitry Andric // |   <hazard padding>                |
560fca6ea1SDimitry Andric // |-----------------------------------|
570b57cec5SDimitry Andric // |                                   |
588bcb0991SDimitry Andric // | callee-saved fp/simd/SVE regs     |
598bcb0991SDimitry Andric // |                                   |
608bcb0991SDimitry Andric // |-----------------------------------|
618bcb0991SDimitry Andric // |                                   |
628bcb0991SDimitry Andric // |        SVE stack objects          |
630b57cec5SDimitry Andric // |                                   |
640b57cec5SDimitry Andric // |-----------------------------------|
650b57cec5SDimitry Andric // |.empty.space.to.make.part.below....|
660b57cec5SDimitry Andric // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
670b57cec5SDimitry Andric // |.the.standard.16-byte.alignment....|  compile time; if present)
680b57cec5SDimitry Andric // |-----------------------------------|
690b57cec5SDimitry Andric // | local variables of fixed size     |
700b57cec5SDimitry Andric // | including spill slots             |
710fca6ea1SDimitry Andric // |   <FPR>                           |
720fca6ea1SDimitry Andric // |   <hazard padding>                |
730fca6ea1SDimitry Andric // |   <GPR>                           |
740b57cec5SDimitry Andric // |-----------------------------------| <- bp(not defined by ABI,
750b57cec5SDimitry Andric // |.variable-sized.local.variables....|       LLVM chooses X19)
760b57cec5SDimitry Andric // |.(VLAs)............................| (size of this area is unknown at
770b57cec5SDimitry Andric // |...................................|  compile time)
780b57cec5SDimitry Andric // |-----------------------------------| <- sp
790b57cec5SDimitry Andric // |                                   | Lower address
800b57cec5SDimitry Andric //
810b57cec5SDimitry Andric //
820b57cec5SDimitry Andric // To access the data in a frame, at-compile time, a constant offset must be
830b57cec5SDimitry Andric // computable from one of the pointers (fp, bp, sp) to access it. The size
840b57cec5SDimitry Andric // of the areas with a dotted background cannot be computed at compile-time
850b57cec5SDimitry Andric // if they are present, making it required to have all three of fp, bp and
860b57cec5SDimitry Andric // sp to be set up to be able to access all contents in the frame areas,
870b57cec5SDimitry Andric // assuming all of the frame areas are non-empty.
880b57cec5SDimitry Andric //
890b57cec5SDimitry Andric // For most functions, some of the frame areas are empty. For those functions,
900b57cec5SDimitry Andric // it may not be necessary to set up fp or bp:
910b57cec5SDimitry Andric // * A base pointer is definitely needed when there are both VLAs and local
920b57cec5SDimitry Andric //   variables with more-than-default alignment requirements.
930b57cec5SDimitry Andric // * A frame pointer is definitely needed when there are local variables with
940b57cec5SDimitry Andric //   more-than-default alignment requirements.
950b57cec5SDimitry Andric //
968bcb0991SDimitry Andric // For Darwin platforms the frame-record (fp, lr) is stored at the top of the
978bcb0991SDimitry Andric // callee-saved area, since the unwind encoding does not allow for encoding
988bcb0991SDimitry Andric // this dynamically and existing tools depend on this layout. For other
998bcb0991SDimitry Andric // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
1008bcb0991SDimitry Andric // area to allow SVE stack objects (allocated directly below the callee-saves,
1018bcb0991SDimitry Andric // if available) to be accessed directly from the framepointer.
1028bcb0991SDimitry Andric // The SVE spill/fill instructions have VL-scaled addressing modes such
1038bcb0991SDimitry Andric // as:
1048bcb0991SDimitry Andric //    ldr z8, [fp, #-7 mul vl]
1058bcb0991SDimitry Andric // For SVE the size of the vector length (VL) is not known at compile-time, so
1068bcb0991SDimitry Andric // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
1078bcb0991SDimitry Andric // layout, we don't need to add an unscaled offset to the framepointer before
1088bcb0991SDimitry Andric // accessing the SVE object in the frame.
1098bcb0991SDimitry Andric //
1100b57cec5SDimitry Andric // In some cases when a base pointer is not strictly needed, it is generated
1110b57cec5SDimitry Andric // anyway when offsets from the frame pointer to access local variables become
1120b57cec5SDimitry Andric // so large that the offset can't be encoded in the immediate fields of loads
1130b57cec5SDimitry Andric // or stores.
1140b57cec5SDimitry Andric //
115fe6060f1SDimitry Andric // Outgoing function arguments must be at the bottom of the stack frame when
116fe6060f1SDimitry Andric // calling another function. If we do not have variable-sized stack objects, we
117fe6060f1SDimitry Andric // can allocate a "reserved call frame" area at the bottom of the local
118fe6060f1SDimitry Andric // variable area, large enough for all outgoing calls. If we do have VLAs, then
119fe6060f1SDimitry Andric // the stack pointer must be decremented and incremented around each call to
120fe6060f1SDimitry Andric // make space for the arguments below the VLAs.
121fe6060f1SDimitry Andric //
1220b57cec5SDimitry Andric // FIXME: also explain the redzone concept.
1230b57cec5SDimitry Andric //
1240fca6ea1SDimitry Andric // About stack hazards: Under some SME contexts, a coprocessor with its own
1250fca6ea1SDimitry Andric // separate cache can used for FP operations. This can create hazards if the CPU
1260fca6ea1SDimitry Andric // and the SME unit try to access the same area of memory, including if the
1270fca6ea1SDimitry Andric // access is to an area of the stack. To try to alleviate this we attempt to
1280fca6ea1SDimitry Andric // introduce extra padding into the stack frame between FP and GPR accesses,
1290fca6ea1SDimitry Andric // controlled by the StackHazardSize option. Without changing the layout of the
1300fca6ea1SDimitry Andric // stack frame in the diagram above, a stack object of size StackHazardSize is
1310fca6ea1SDimitry Andric // added between GPR and FPR CSRs. Another is added to the stack objects
1320fca6ea1SDimitry Andric // section, and stack objects are sorted so that FPR > Hazard padding slot >
1330fca6ea1SDimitry Andric // GPRs (where possible). Unfortunately some things are not handled well (VLA
1340fca6ea1SDimitry Andric // area, arguments on the stack, object with both GPR and FPR accesses), but if
1350fca6ea1SDimitry Andric // those are controlled by the user then the entire stack frame becomes GPR at
1360fca6ea1SDimitry Andric // the start/end with FPR in the middle, surrounded by Hazard padding.
1370fca6ea1SDimitry Andric //
13881ad6265SDimitry Andric // An example of the prologue:
13981ad6265SDimitry Andric //
14081ad6265SDimitry Andric //     .globl __foo
14181ad6265SDimitry Andric //     .align 2
14281ad6265SDimitry Andric //  __foo:
14381ad6265SDimitry Andric // Ltmp0:
14481ad6265SDimitry Andric //     .cfi_startproc
14581ad6265SDimitry Andric //     .cfi_personality 155, ___gxx_personality_v0
14681ad6265SDimitry Andric // Leh_func_begin:
14781ad6265SDimitry Andric //     .cfi_lsda 16, Lexception33
14881ad6265SDimitry Andric //
14981ad6265SDimitry Andric //     stp  xa,bx, [sp, -#offset]!
15081ad6265SDimitry Andric //     ...
15181ad6265SDimitry Andric //     stp  x28, x27, [sp, #offset-32]
15281ad6265SDimitry Andric //     stp  fp, lr, [sp, #offset-16]
15381ad6265SDimitry Andric //     add  fp, sp, #offset - 16
15481ad6265SDimitry Andric //     sub  sp, sp, #1360
15581ad6265SDimitry Andric //
15681ad6265SDimitry Andric // The Stack:
15781ad6265SDimitry Andric //       +-------------------------------------------+
15881ad6265SDimitry Andric // 10000 | ........ | ........ | ........ | ........ |
15981ad6265SDimitry Andric // 10004 | ........ | ........ | ........ | ........ |
16081ad6265SDimitry Andric //       +-------------------------------------------+
16181ad6265SDimitry Andric // 10008 | ........ | ........ | ........ | ........ |
16281ad6265SDimitry Andric // 1000c | ........ | ........ | ........ | ........ |
16381ad6265SDimitry Andric //       +===========================================+
16481ad6265SDimitry Andric // 10010 |                X28 Register               |
16581ad6265SDimitry Andric // 10014 |                X28 Register               |
16681ad6265SDimitry Andric //       +-------------------------------------------+
16781ad6265SDimitry Andric // 10018 |                X27 Register               |
16881ad6265SDimitry Andric // 1001c |                X27 Register               |
16981ad6265SDimitry Andric //       +===========================================+
17081ad6265SDimitry Andric // 10020 |                Frame Pointer              |
17181ad6265SDimitry Andric // 10024 |                Frame Pointer              |
17281ad6265SDimitry Andric //       +-------------------------------------------+
17381ad6265SDimitry Andric // 10028 |                Link Register              |
17481ad6265SDimitry Andric // 1002c |                Link Register              |
17581ad6265SDimitry Andric //       +===========================================+
17681ad6265SDimitry Andric // 10030 | ........ | ........ | ........ | ........ |
17781ad6265SDimitry Andric // 10034 | ........ | ........ | ........ | ........ |
17881ad6265SDimitry Andric //       +-------------------------------------------+
17981ad6265SDimitry Andric // 10038 | ........ | ........ | ........ | ........ |
18081ad6265SDimitry Andric // 1003c | ........ | ........ | ........ | ........ |
18181ad6265SDimitry Andric //       +-------------------------------------------+
18281ad6265SDimitry Andric //
18381ad6265SDimitry Andric //     [sp] = 10030        ::    >>initial value<<
18481ad6265SDimitry Andric //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
18581ad6265SDimitry Andric //     fp = sp == 10020    ::  mov fp, sp
18681ad6265SDimitry Andric //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
18781ad6265SDimitry Andric //     sp == 10010         ::    >>final value<<
18881ad6265SDimitry Andric //
18981ad6265SDimitry Andric // The frame pointer (w29) points to address 10020. If we use an offset of
19081ad6265SDimitry Andric // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
19181ad6265SDimitry Andric // for w27, and -32 for w28:
19281ad6265SDimitry Andric //
19381ad6265SDimitry Andric //  Ltmp1:
19481ad6265SDimitry Andric //     .cfi_def_cfa w29, 16
19581ad6265SDimitry Andric //  Ltmp2:
19681ad6265SDimitry Andric //     .cfi_offset w30, -8
19781ad6265SDimitry Andric //  Ltmp3:
19881ad6265SDimitry Andric //     .cfi_offset w29, -16
19981ad6265SDimitry Andric //  Ltmp4:
20081ad6265SDimitry Andric //     .cfi_offset w27, -24
20181ad6265SDimitry Andric //  Ltmp5:
20281ad6265SDimitry Andric //     .cfi_offset w28, -32
20381ad6265SDimitry Andric //
2040b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric #include "AArch64FrameLowering.h"
2070b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
2080b57cec5SDimitry Andric #include "AArch64MachineFunctionInfo.h"
2090b57cec5SDimitry Andric #include "AArch64RegisterInfo.h"
2100b57cec5SDimitry Andric #include "AArch64Subtarget.h"
2110b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
2120b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
21381ad6265SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h"
2140b57cec5SDimitry Andric #include "llvm/ADT/ScopeExit.h"
2150b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
2160b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
2170fca6ea1SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
2180b57cec5SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h"
2190b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
2200b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
2210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
2220b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
2230b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
2240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
2250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
2260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
2270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
2280b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
2290b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
2300b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
2310b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
2320b57cec5SDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h"
2330b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
2340b57cec5SDimitry Andric #include "llvm/IR/CallingConv.h"
2350b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
2360b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
2370b57cec5SDimitry Andric #include "llvm/IR/Function.h"
2380b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
2390b57cec5SDimitry Andric #include "llvm/MC/MCDwarf.h"
2400b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
2410b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
2420b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
24362987288SDimitry Andric #include "llvm/Support/FormatVariadic.h"
2440b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
2450b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
2460b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
2470b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
2480b57cec5SDimitry Andric #include <cassert>
2490b57cec5SDimitry Andric #include <cstdint>
2500b57cec5SDimitry Andric #include <iterator>
251bdd1243dSDimitry Andric #include <optional>
2520b57cec5SDimitry Andric #include <vector>
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric using namespace llvm;
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info"
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric static cl::opt<bool> EnableRedZone("aarch64-redzone",
2590b57cec5SDimitry Andric                                    cl::desc("enable use of redzone on AArch64"),
2600b57cec5SDimitry Andric                                    cl::init(false), cl::Hidden);
2610b57cec5SDimitry Andric 
2625ffd83dbSDimitry Andric static cl::opt<bool> StackTaggingMergeSetTag(
2635ffd83dbSDimitry Andric     "stack-tagging-merge-settag",
2645ffd83dbSDimitry Andric     cl::desc("merge settag instruction in function epilog"), cl::init(true),
2655ffd83dbSDimitry Andric     cl::Hidden);
2665ffd83dbSDimitry Andric 
267e8d8bef9SDimitry Andric static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
268e8d8bef9SDimitry Andric                                        cl::desc("sort stack allocations"),
269e8d8bef9SDimitry Andric                                        cl::init(true), cl::Hidden);
270e8d8bef9SDimitry Andric 
271fe6060f1SDimitry Andric cl::opt<bool> EnableHomogeneousPrologEpilog(
27281ad6265SDimitry Andric     "homogeneous-prolog-epilog", cl::Hidden,
273fe6060f1SDimitry Andric     cl::desc("Emit homogeneous prologue and epilogue for the size "
274fe6060f1SDimitry Andric              "optimization (default = off)"));
275fe6060f1SDimitry Andric 
2760fca6ea1SDimitry Andric // Stack hazard padding size. 0 = disabled.
2770fca6ea1SDimitry Andric static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
2780fca6ea1SDimitry Andric                                          cl::init(0), cl::Hidden);
27962987288SDimitry Andric // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
28062987288SDimitry Andric static cl::opt<unsigned>
28162987288SDimitry Andric     StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
28262987288SDimitry Andric                           cl::Hidden);
2830fca6ea1SDimitry Andric // Whether to insert padding into non-streaming functions (for testing).
2840fca6ea1SDimitry Andric static cl::opt<bool>
2850fca6ea1SDimitry Andric     StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
2860fca6ea1SDimitry Andric                               cl::init(false), cl::Hidden);
2870fca6ea1SDimitry Andric 
2880b57cec5SDimitry Andric STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
2890b57cec5SDimitry Andric 
290fe6060f1SDimitry Andric /// Returns how much of the incoming argument stack area (in bytes) we should
291fe6060f1SDimitry Andric /// clean up in an epilogue. For the C calling convention this will be 0, for
292fe6060f1SDimitry Andric /// guaranteed tail call conventions it can be positive (a normal return or a
293fe6060f1SDimitry Andric /// tail call to a function that uses less stack space for arguments) or
294fe6060f1SDimitry Andric /// negative (for a tail call to a function that needs more stack space than us
295fe6060f1SDimitry Andric /// for arguments).
296fe6060f1SDimitry Andric static int64_t getArgumentStackToRestore(MachineFunction &MF,
2975ffd83dbSDimitry Andric                                          MachineBasicBlock &MBB) {
2985ffd83dbSDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
2995ffd83dbSDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
3005f757f3fSDimitry Andric   bool IsTailCallReturn = (MBB.end() != MBBI)
3015f757f3fSDimitry Andric                               ? AArch64InstrInfo::isTailCallReturnInst(*MBBI)
3025f757f3fSDimitry Andric                               : false;
3035ffd83dbSDimitry Andric 
304fe6060f1SDimitry Andric   int64_t ArgumentPopSize = 0;
3055ffd83dbSDimitry Andric   if (IsTailCallReturn) {
3065ffd83dbSDimitry Andric     MachineOperand &StackAdjust = MBBI->getOperand(1);
3075ffd83dbSDimitry Andric 
3085ffd83dbSDimitry Andric     // For a tail-call in a callee-pops-arguments environment, some or all of
3095ffd83dbSDimitry Andric     // the stack may actually be in use for the call's arguments, this is
3105ffd83dbSDimitry Andric     // calculated during LowerCall and consumed here...
3115ffd83dbSDimitry Andric     ArgumentPopSize = StackAdjust.getImm();
3125ffd83dbSDimitry Andric   } else {
3135ffd83dbSDimitry Andric     // ... otherwise the amount to pop is *all* of the argument space,
3145ffd83dbSDimitry Andric     // conveniently stored in the MachineFunctionInfo by
3155ffd83dbSDimitry Andric     // LowerFormalArguments. This will, of course, be zero for the C calling
3165ffd83dbSDimitry Andric     // convention.
3175ffd83dbSDimitry Andric     ArgumentPopSize = AFI->getArgumentStackToRestore();
3185ffd83dbSDimitry Andric   }
3195ffd83dbSDimitry Andric 
3205ffd83dbSDimitry Andric   return ArgumentPopSize;
3215ffd83dbSDimitry Andric }
3225ffd83dbSDimitry Andric 
323fe6060f1SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF);
324fe6060f1SDimitry Andric static bool needsWinCFI(const MachineFunction &MF);
325fe6060f1SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF);
3260fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
327fe6060f1SDimitry Andric 
328fe6060f1SDimitry Andric /// Returns true if a homogeneous prolog or epilog code can be emitted
329fe6060f1SDimitry Andric /// for the size optimization. If possible, a frame helper call is injected.
330fe6060f1SDimitry Andric /// When Exit block is given, this check is for epilog.
331fe6060f1SDimitry Andric bool AArch64FrameLowering::homogeneousPrologEpilog(
332fe6060f1SDimitry Andric     MachineFunction &MF, MachineBasicBlock *Exit) const {
333fe6060f1SDimitry Andric   if (!MF.getFunction().hasMinSize())
334fe6060f1SDimitry Andric     return false;
335fe6060f1SDimitry Andric   if (!EnableHomogeneousPrologEpilog)
336fe6060f1SDimitry Andric     return false;
337fe6060f1SDimitry Andric   if (EnableRedZone)
338fe6060f1SDimitry Andric     return false;
339fe6060f1SDimitry Andric 
340fe6060f1SDimitry Andric   // TODO: Window is supported yet.
341fe6060f1SDimitry Andric   if (needsWinCFI(MF))
342fe6060f1SDimitry Andric     return false;
343fe6060f1SDimitry Andric   // TODO: SVE is not supported yet.
344fe6060f1SDimitry Andric   if (getSVEStackSize(MF))
345fe6060f1SDimitry Andric     return false;
346fe6060f1SDimitry Andric 
347fe6060f1SDimitry Andric   // Bail on stack adjustment needed on return for simplicity.
348fe6060f1SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
349fe6060f1SDimitry Andric   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
350fe6060f1SDimitry Andric   if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
351fe6060f1SDimitry Andric     return false;
352fe6060f1SDimitry Andric   if (Exit && getArgumentStackToRestore(MF, *Exit))
353fe6060f1SDimitry Andric     return false;
354fe6060f1SDimitry Andric 
3555f757f3fSDimitry Andric   auto *AFI = MF.getInfo<AArch64FunctionInfo>();
3560fca6ea1SDimitry Andric   if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
3575f757f3fSDimitry Andric     return false;
3585f757f3fSDimitry Andric 
3595f757f3fSDimitry Andric   // If there are an odd number of GPRs before LR and FP in the CSRs list,
3605f757f3fSDimitry Andric   // they will not be paired into one RegPairInfo, which is incompatible with
3615f757f3fSDimitry Andric   // the assumption made by the homogeneous prolog epilog pass.
3625f757f3fSDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
3635f757f3fSDimitry Andric   unsigned NumGPRs = 0;
3645f757f3fSDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I) {
3655f757f3fSDimitry Andric     Register Reg = CSRegs[I];
3665f757f3fSDimitry Andric     if (Reg == AArch64::LR) {
3675f757f3fSDimitry Andric       assert(CSRegs[I + 1] == AArch64::FP);
3685f757f3fSDimitry Andric       if (NumGPRs % 2 != 0)
3695f757f3fSDimitry Andric         return false;
3705f757f3fSDimitry Andric       break;
3715f757f3fSDimitry Andric     }
3725f757f3fSDimitry Andric     if (AArch64::GPR64RegClass.contains(Reg))
3735f757f3fSDimitry Andric       ++NumGPRs;
3745f757f3fSDimitry Andric   }
3755f757f3fSDimitry Andric 
376fe6060f1SDimitry Andric   return true;
377fe6060f1SDimitry Andric }
378fe6060f1SDimitry Andric 
379fe6060f1SDimitry Andric /// Returns true if CSRs should be paired.
380fe6060f1SDimitry Andric bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
381fe6060f1SDimitry Andric   return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
382fe6060f1SDimitry Andric }
383fe6060f1SDimitry Andric 
3840b57cec5SDimitry Andric /// This is the biggest offset to the stack pointer we can encode in aarch64
3850b57cec5SDimitry Andric /// instructions (without using a separate calculation and a temp register).
3860b57cec5SDimitry Andric /// Note that the exception here are vector stores/loads which cannot encode any
3870b57cec5SDimitry Andric /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
3880b57cec5SDimitry Andric static const unsigned DefaultSafeSPDisplacement = 255;
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric /// Look at each instruction that references stack frames and return the stack
3910b57cec5SDimitry Andric /// size limit beyond which some of these instructions will require a scratch
3920b57cec5SDimitry Andric /// register during their expansion later.
3930b57cec5SDimitry Andric static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
3940b57cec5SDimitry Andric   // FIXME: For now, just conservatively guestimate based on unscaled indexing
3950b57cec5SDimitry Andric   // range. We'll end up allocating an unnecessary spill slot a lot, but
3960b57cec5SDimitry Andric   // realistically that's not a big deal at this stage of the game.
3970b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
3980b57cec5SDimitry Andric     for (MachineInstr &MI : MBB) {
3990b57cec5SDimitry Andric       if (MI.isDebugInstr() || MI.isPseudo() ||
4000b57cec5SDimitry Andric           MI.getOpcode() == AArch64::ADDXri ||
4010b57cec5SDimitry Andric           MI.getOpcode() == AArch64::ADDSXri)
4020b57cec5SDimitry Andric         continue;
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric       for (const MachineOperand &MO : MI.operands()) {
4050b57cec5SDimitry Andric         if (!MO.isFI())
4060b57cec5SDimitry Andric           continue;
4070b57cec5SDimitry Andric 
4088bcb0991SDimitry Andric         StackOffset Offset;
4090b57cec5SDimitry Andric         if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
4100b57cec5SDimitry Andric             AArch64FrameOffsetCannotUpdate)
4110b57cec5SDimitry Andric           return 0;
4120b57cec5SDimitry Andric       }
4130b57cec5SDimitry Andric     }
4140b57cec5SDimitry Andric   }
4150b57cec5SDimitry Andric   return DefaultSafeSPDisplacement;
4160b57cec5SDimitry Andric }
4170b57cec5SDimitry Andric 
418480093f4SDimitry Andric TargetStackID::Value
419480093f4SDimitry Andric AArch64FrameLowering::getStackIDForScalableVectors() const {
420e8d8bef9SDimitry Andric   return TargetStackID::ScalableVector;
421480093f4SDimitry Andric }
422480093f4SDimitry Andric 
42362cfcf62SDimitry Andric /// Returns the size of the fixed object area (allocated next to sp on entry)
42462cfcf62SDimitry Andric /// On Win64 this may include a var args area and an UnwindHelp object for EH.
42562cfcf62SDimitry Andric static unsigned getFixedObjectSize(const MachineFunction &MF,
42662cfcf62SDimitry Andric                                    const AArch64FunctionInfo *AFI, bool IsWin64,
42762cfcf62SDimitry Andric                                    bool IsFunclet) {
42862cfcf62SDimitry Andric   if (!IsWin64 || IsFunclet) {
429fe6060f1SDimitry Andric     return AFI->getTailCallReservedStack();
43062cfcf62SDimitry Andric   } else {
4310fca6ea1SDimitry Andric     if (AFI->getTailCallReservedStack() != 0 &&
4320fca6ea1SDimitry Andric         !MF.getFunction().getAttributes().hasAttrSomewhere(
4330fca6ea1SDimitry Andric             Attribute::SwiftAsync))
434fe6060f1SDimitry Andric       report_fatal_error("cannot generate ABI-changing tail call for Win64");
43562cfcf62SDimitry Andric     // Var args are stored here in the primary function.
43662cfcf62SDimitry Andric     const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
43762cfcf62SDimitry Andric     // To support EH funclets we allocate an UnwindHelp object
43862cfcf62SDimitry Andric     const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
4390fca6ea1SDimitry Andric     return AFI->getTailCallReservedStack() +
4400fca6ea1SDimitry Andric            alignTo(VarArgsArea + UnwindHelpObject, 16);
44162cfcf62SDimitry Andric   }
44262cfcf62SDimitry Andric }
44362cfcf62SDimitry Andric 
4448bcb0991SDimitry Andric /// Returns the size of the entire SVE stackframe (calleesaves + spills).
4458bcb0991SDimitry Andric static StackOffset getSVEStackSize(const MachineFunction &MF) {
4468bcb0991SDimitry Andric   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
447e8d8bef9SDimitry Andric   return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
4488bcb0991SDimitry Andric }
4498bcb0991SDimitry Andric 
4500b57cec5SDimitry Andric bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
4510b57cec5SDimitry Andric   if (!EnableRedZone)
4520b57cec5SDimitry Andric     return false;
453fe6060f1SDimitry Andric 
4540b57cec5SDimitry Andric   // Don't use the red zone if the function explicitly asks us not to.
4550b57cec5SDimitry Andric   // This is typically used for kernel code.
456fe6060f1SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
457fe6060f1SDimitry Andric   const unsigned RedZoneSize =
458fe6060f1SDimitry Andric       Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
459fe6060f1SDimitry Andric   if (!RedZoneSize)
4600b57cec5SDimitry Andric     return false;
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
4630b57cec5SDimitry Andric   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
464480093f4SDimitry Andric   uint64_t NumBytes = AFI->getLocalStackSize();
4650b57cec5SDimitry Andric 
4660fca6ea1SDimitry Andric   // If neither NEON or SVE are available, a COPY from one Q-reg to
4670fca6ea1SDimitry Andric   // another requires a spill -> reload sequence. We can do that
4680fca6ea1SDimitry Andric   // using a pre-decrementing store/post-decrementing load, but
4690fca6ea1SDimitry Andric   // if we do so, we can't use the Red Zone.
4700fca6ea1SDimitry Andric   bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
4710fca6ea1SDimitry Andric                                  !Subtarget.isNeonAvailable() &&
4720fca6ea1SDimitry Andric                                  !Subtarget.hasSVE();
4730fca6ea1SDimitry Andric 
474fe6060f1SDimitry Andric   return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
4750fca6ea1SDimitry Andric            getSVEStackSize(MF) || LowerQRegCopyThroughMem);
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric /// hasFP - Return true if the specified function should have a dedicated frame
4790b57cec5SDimitry Andric /// pointer register.
4800b57cec5SDimitry Andric bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
4810b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
4820b57cec5SDimitry Andric   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
4835f757f3fSDimitry Andric 
4840b57cec5SDimitry Andric   // Win64 EH requires a frame pointer if funclets are present, as the locals
4850b57cec5SDimitry Andric   // are accessed off the frame pointer in both the parent function and the
4860b57cec5SDimitry Andric   // funclets.
4870b57cec5SDimitry Andric   if (MF.hasEHFunclets())
4880b57cec5SDimitry Andric     return true;
4890b57cec5SDimitry Andric   // Retain behavior of always omitting the FP for leaf functions when possible.
490480093f4SDimitry Andric   if (MF.getTarget().Options.DisableFramePointerElim(MF))
4910b57cec5SDimitry Andric     return true;
4920b57cec5SDimitry Andric   if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
4930b57cec5SDimitry Andric       MFI.hasStackMap() || MFI.hasPatchPoint() ||
494fe6060f1SDimitry Andric       RegInfo->hasStackRealignment(MF))
4950b57cec5SDimitry Andric     return true;
4960b57cec5SDimitry Andric   // With large callframes around we may need to use FP to access the scavenging
4970b57cec5SDimitry Andric   // emergency spillslot.
4980b57cec5SDimitry Andric   //
4990b57cec5SDimitry Andric   // Unfortunately some calls to hasFP() like machine verifier ->
5000b57cec5SDimitry Andric   // getReservedReg() -> hasFP in the middle of global isel are too early
5010b57cec5SDimitry Andric   // to know the max call frame size. Hopefully conservatively returning "true"
5020b57cec5SDimitry Andric   // in those cases is fine.
5030b57cec5SDimitry Andric   // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
5040b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed() ||
5050b57cec5SDimitry Andric       MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
5060b57cec5SDimitry Andric     return true;
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   return false;
5090b57cec5SDimitry Andric }
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
5120b57cec5SDimitry Andric /// not required, we reserve argument space for call sites in the function
5130b57cec5SDimitry Andric /// immediately on entry to the current function.  This eliminates the need for
5140b57cec5SDimitry Andric /// add/sub sp brackets around call sites.  Returns true if the call frame is
5150b57cec5SDimitry Andric /// included as part of the stack frame.
5160fca6ea1SDimitry Andric bool AArch64FrameLowering::hasReservedCallFrame(
5170fca6ea1SDimitry Andric     const MachineFunction &MF) const {
5185f757f3fSDimitry Andric   // The stack probing code for the dynamically allocated outgoing arguments
5195f757f3fSDimitry Andric   // area assumes that the stack is probed at the top - either by the prologue
5205f757f3fSDimitry Andric   // code, which issues a probe if `hasVarSizedObjects` return true, or by the
5215f757f3fSDimitry Andric   // most recent variable-sized object allocation. Changing the condition here
5225f757f3fSDimitry Andric   // may need to be followed up by changes to the probe issuing logic.
5230b57cec5SDimitry Andric   return !MF.getFrameInfo().hasVarSizedObjects();
5240b57cec5SDimitry Andric }
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
5270b57cec5SDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB,
5280b57cec5SDimitry Andric     MachineBasicBlock::iterator I) const {
5290b57cec5SDimitry Andric   const AArch64InstrInfo *TII =
5300b57cec5SDimitry Andric       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
5315f757f3fSDimitry Andric   const AArch64TargetLowering *TLI =
5325f757f3fSDimitry Andric       MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
5335f757f3fSDimitry Andric   [[maybe_unused]] MachineFrameInfo &MFI = MF.getFrameInfo();
5340b57cec5SDimitry Andric   DebugLoc DL = I->getDebugLoc();
5350b57cec5SDimitry Andric   unsigned Opc = I->getOpcode();
5360b57cec5SDimitry Andric   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
5370b57cec5SDimitry Andric   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric   if (!hasReservedCallFrame(MF)) {
5400b57cec5SDimitry Andric     int64_t Amount = I->getOperand(0).getImm();
5415ffd83dbSDimitry Andric     Amount = alignTo(Amount, getStackAlign());
5420b57cec5SDimitry Andric     if (!IsDestroy)
5430b57cec5SDimitry Andric       Amount = -Amount;
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
5460b57cec5SDimitry Andric     // doesn't have to pop anything), then the first operand will be zero too so
5470b57cec5SDimitry Andric     // this adjustment is a no-op.
5480b57cec5SDimitry Andric     if (CalleePopAmount == 0) {
5490b57cec5SDimitry Andric       // FIXME: in-function stack adjustment for calls is limited to 24-bits
5500b57cec5SDimitry Andric       // because there's no guaranteed temporary register available.
5510b57cec5SDimitry Andric       //
5520b57cec5SDimitry Andric       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
5530b57cec5SDimitry Andric       // 1) For offset <= 12-bit, we use LSL #0
5540b57cec5SDimitry Andric       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
5550b57cec5SDimitry Andric       // LSL #0, and the other uses LSL #12.
5560b57cec5SDimitry Andric       //
5570b57cec5SDimitry Andric       // Most call frames will be allocated at the start of a function so
5580b57cec5SDimitry Andric       // this is OK, but it is a limitation that needs dealing with.
5590b57cec5SDimitry Andric       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
5605f757f3fSDimitry Andric 
5615f757f3fSDimitry Andric       if (TLI->hasInlineStackProbe(MF) &&
5625f757f3fSDimitry Andric           -Amount >= AArch64::StackProbeMaxUnprobedStack) {
5635f757f3fSDimitry Andric         // When stack probing is enabled, the decrement of SP may need to be
5645f757f3fSDimitry Andric         // probed. We only need to do this if the call site needs 1024 bytes of
5655f757f3fSDimitry Andric         // space or more, because a region smaller than that is allowed to be
5665f757f3fSDimitry Andric         // unprobed at an ABI boundary. We rely on the fact that SP has been
5675f757f3fSDimitry Andric         // probed exactly at this point, either by the prologue or most recent
5685f757f3fSDimitry Andric         // dynamic allocation.
5695f757f3fSDimitry Andric         assert(MFI.hasVarSizedObjects() &&
5705f757f3fSDimitry Andric                "non-reserved call frame without var sized objects?");
5715f757f3fSDimitry Andric         Register ScratchReg =
5725f757f3fSDimitry Andric             MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
5735f757f3fSDimitry Andric         inlineStackProbeFixed(I, ScratchReg, -Amount, StackOffset::get(0, 0));
5745f757f3fSDimitry Andric       } else {
575e8d8bef9SDimitry Andric         emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
576e8d8bef9SDimitry Andric                         StackOffset::getFixed(Amount), TII);
5770b57cec5SDimitry Andric       }
5785f757f3fSDimitry Andric     }
5790b57cec5SDimitry Andric   } else if (CalleePopAmount != 0) {
5800b57cec5SDimitry Andric     // If the calling convention demands that the callee pops arguments from the
5810b57cec5SDimitry Andric     // stack, we want to add it back if we have a reserved call frame.
5820b57cec5SDimitry Andric     assert(CalleePopAmount < 0xffffff && "call frame too large");
5838bcb0991SDimitry Andric     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
584e8d8bef9SDimitry Andric                     StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
5850b57cec5SDimitry Andric   }
5860b57cec5SDimitry Andric   return MBB.erase(I);
5870b57cec5SDimitry Andric }
5880b57cec5SDimitry Andric 
58981ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRLocations(
5900b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
5910b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
5920b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
5930fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5940fca6ea1SDimitry Andric   SMEAttrs Attrs(MF.getFunction());
5950fca6ea1SDimitry Andric   bool LocallyStreaming =
5960fca6ea1SDimitry Andric       Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
59781ad6265SDimitry Andric 
59881ad6265SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
59981ad6265SDimitry Andric   if (CSI.empty())
60081ad6265SDimitry Andric     return;
60181ad6265SDimitry Andric 
6020b57cec5SDimitry Andric   const TargetSubtargetInfo &STI = MF.getSubtarget();
60381ad6265SDimitry Andric   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
60481ad6265SDimitry Andric   const TargetInstrInfo &TII = *STI.getInstrInfo();
6050b57cec5SDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
6060b57cec5SDimitry Andric 
60781ad6265SDimitry Andric   for (const auto &Info : CSI) {
6080fca6ea1SDimitry Andric     unsigned FrameIdx = Info.getFrameIdx();
6090fca6ea1SDimitry Andric     if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
61081ad6265SDimitry Andric       continue;
61181ad6265SDimitry Andric 
61281ad6265SDimitry Andric     assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
6130fca6ea1SDimitry Andric     int64_t DwarfReg = TRI.getDwarfRegNum(Info.getReg(), true);
6140fca6ea1SDimitry Andric     int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea();
61581ad6265SDimitry Andric 
6160fca6ea1SDimitry Andric     // The location of VG will be emitted before each streaming-mode change in
6170fca6ea1SDimitry Andric     // the function. Only locally-streaming functions require emitting the
6180fca6ea1SDimitry Andric     // non-streaming VG location here.
6190fca6ea1SDimitry Andric     if ((LocallyStreaming && FrameIdx == AFI->getStreamingVGIdx()) ||
6200fca6ea1SDimitry Andric         (!LocallyStreaming &&
6210fca6ea1SDimitry Andric          DwarfReg == TRI.getDwarfRegNum(AArch64::VG, true)))
6220fca6ea1SDimitry Andric       continue;
6230fca6ea1SDimitry Andric 
62481ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(
62581ad6265SDimitry Andric         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
62681ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
62781ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
62881ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
62981ad6265SDimitry Andric   }
63081ad6265SDimitry Andric }
63181ad6265SDimitry Andric 
63281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVELocations(
63381ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
63481ad6265SDimitry Andric   MachineFunction &MF = *MBB.getParent();
63581ad6265SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
63681ad6265SDimitry Andric 
6370b57cec5SDimitry Andric   // Add callee saved registers to move list.
6380b57cec5SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
6390b57cec5SDimitry Andric   if (CSI.empty())
6400b57cec5SDimitry Andric     return;
6410b57cec5SDimitry Andric 
64281ad6265SDimitry Andric   const TargetSubtargetInfo &STI = MF.getSubtarget();
64381ad6265SDimitry Andric   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
64481ad6265SDimitry Andric   const TargetInstrInfo &TII = *STI.getInstrInfo();
64581ad6265SDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
64681ad6265SDimitry Andric   AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
64781ad6265SDimitry Andric 
6480b57cec5SDimitry Andric   for (const auto &Info : CSI) {
64981ad6265SDimitry Andric     if (!(MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
65081ad6265SDimitry Andric       continue;
65175b4d546SDimitry Andric 
65275b4d546SDimitry Andric     // Not all unwinders may know about SVE registers, so assume the lowest
65375b4d546SDimitry Andric     // common demoninator.
65481ad6265SDimitry Andric     assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
65581ad6265SDimitry Andric     unsigned Reg = Info.getReg();
65681ad6265SDimitry Andric     if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
65775b4d546SDimitry Andric       continue;
65875b4d546SDimitry Andric 
65981ad6265SDimitry Andric     StackOffset Offset =
660e8d8bef9SDimitry Andric         StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
66181ad6265SDimitry Andric         StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
66281ad6265SDimitry Andric 
66381ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(createCFAOffset(TRI, Reg, Offset));
66481ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
6650b57cec5SDimitry Andric         .addCFIIndex(CFIIndex)
6660b57cec5SDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
6670b57cec5SDimitry Andric   }
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
67081ad6265SDimitry Andric static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF,
67181ad6265SDimitry Andric                                MachineBasicBlock &MBB,
67281ad6265SDimitry Andric                                MachineBasicBlock::iterator InsertPt,
67381ad6265SDimitry Andric                                unsigned DwarfReg) {
67481ad6265SDimitry Andric   unsigned CFIIndex =
67581ad6265SDimitry Andric       MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg));
67681ad6265SDimitry Andric   BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex);
67781ad6265SDimitry Andric }
67881ad6265SDimitry Andric 
67981ad6265SDimitry Andric void AArch64FrameLowering::resetCFIToInitialState(
68081ad6265SDimitry Andric     MachineBasicBlock &MBB) const {
68181ad6265SDimitry Andric 
68281ad6265SDimitry Andric   MachineFunction &MF = *MBB.getParent();
68381ad6265SDimitry Andric   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
68481ad6265SDimitry Andric   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
68581ad6265SDimitry Andric   const auto &TRI =
68681ad6265SDimitry Andric       static_cast<const AArch64RegisterInfo &>(*Subtarget.getRegisterInfo());
68781ad6265SDimitry Andric   const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
68881ad6265SDimitry Andric 
68981ad6265SDimitry Andric   const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION);
69081ad6265SDimitry Andric   DebugLoc DL;
69181ad6265SDimitry Andric 
69281ad6265SDimitry Andric   // Reset the CFA to `SP + 0`.
69381ad6265SDimitry Andric   MachineBasicBlock::iterator InsertPt = MBB.begin();
69481ad6265SDimitry Andric   unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
69581ad6265SDimitry Andric       nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0));
69681ad6265SDimitry Andric   BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
69781ad6265SDimitry Andric 
69881ad6265SDimitry Andric   // Flip the RA sign state.
699bdd1243dSDimitry Andric   if (MFI.shouldSignReturnAddress(MF)) {
70081ad6265SDimitry Andric     CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
70181ad6265SDimitry Andric     BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
70281ad6265SDimitry Andric   }
70381ad6265SDimitry Andric 
70481ad6265SDimitry Andric   // Shadow call stack uses X18, reset it.
7055f757f3fSDimitry Andric   if (MFI.needsShadowCallStackPrologueEpilogue(MF))
70681ad6265SDimitry Andric     insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
70781ad6265SDimitry Andric                        TRI.getDwarfRegNum(AArch64::X18, true));
70881ad6265SDimitry Andric 
70981ad6265SDimitry Andric   // Emit .cfi_same_value for callee-saved registers.
71081ad6265SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI =
71181ad6265SDimitry Andric       MF.getFrameInfo().getCalleeSavedInfo();
71281ad6265SDimitry Andric   for (const auto &Info : CSI) {
71381ad6265SDimitry Andric     unsigned Reg = Info.getReg();
71481ad6265SDimitry Andric     if (!TRI.regNeedsCFI(Reg, Reg))
71581ad6265SDimitry Andric       continue;
71681ad6265SDimitry Andric     insertCFISameValue(CFIDesc, MF, MBB, InsertPt,
71781ad6265SDimitry Andric                        TRI.getDwarfRegNum(Reg, true));
71881ad6265SDimitry Andric   }
71981ad6265SDimitry Andric }
72081ad6265SDimitry Andric 
72181ad6265SDimitry Andric static void emitCalleeSavedRestores(MachineBasicBlock &MBB,
72281ad6265SDimitry Andric                                     MachineBasicBlock::iterator MBBI,
72381ad6265SDimitry Andric                                     bool SVE) {
72481ad6265SDimitry Andric   MachineFunction &MF = *MBB.getParent();
72581ad6265SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
72681ad6265SDimitry Andric 
72781ad6265SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
72881ad6265SDimitry Andric   if (CSI.empty())
72981ad6265SDimitry Andric     return;
73081ad6265SDimitry Andric 
73181ad6265SDimitry Andric   const TargetSubtargetInfo &STI = MF.getSubtarget();
73281ad6265SDimitry Andric   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
73381ad6265SDimitry Andric   const TargetInstrInfo &TII = *STI.getInstrInfo();
73481ad6265SDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
73581ad6265SDimitry Andric 
73681ad6265SDimitry Andric   for (const auto &Info : CSI) {
73781ad6265SDimitry Andric     if (SVE !=
73881ad6265SDimitry Andric         (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector))
73981ad6265SDimitry Andric       continue;
74081ad6265SDimitry Andric 
74181ad6265SDimitry Andric     unsigned Reg = Info.getReg();
74281ad6265SDimitry Andric     if (SVE &&
74381ad6265SDimitry Andric         !static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
74481ad6265SDimitry Andric       continue;
74581ad6265SDimitry Andric 
7460fca6ea1SDimitry Andric     if (!Info.isRestored())
7470fca6ea1SDimitry Andric       continue;
7480fca6ea1SDimitry Andric 
74981ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
75081ad6265SDimitry Andric         nullptr, TRI.getDwarfRegNum(Info.getReg(), true)));
75181ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
75281ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
75381ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameDestroy);
75481ad6265SDimitry Andric   }
75581ad6265SDimitry Andric }
75681ad6265SDimitry Andric 
75781ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedGPRRestores(
75881ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
75981ad6265SDimitry Andric   emitCalleeSavedRestores(MBB, MBBI, false);
76081ad6265SDimitry Andric }
76181ad6265SDimitry Andric 
76281ad6265SDimitry Andric void AArch64FrameLowering::emitCalleeSavedSVERestores(
76381ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
76481ad6265SDimitry Andric   emitCalleeSavedRestores(MBB, MBBI, true);
76581ad6265SDimitry Andric }
76681ad6265SDimitry Andric 
7675f757f3fSDimitry Andric // Return the maximum possible number of bytes for `Size` due to the
7685f757f3fSDimitry Andric // architectural limit on the size of a SVE register.
7695f757f3fSDimitry Andric static int64_t upperBound(StackOffset Size) {
7705f757f3fSDimitry Andric   static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
7715f757f3fSDimitry Andric   return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
7725f757f3fSDimitry Andric }
7735f757f3fSDimitry Andric 
7745f757f3fSDimitry Andric void AArch64FrameLowering::allocateStackSpace(
7755f757f3fSDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
7765f757f3fSDimitry Andric     int64_t RealignmentPadding, StackOffset AllocSize, bool NeedsWinCFI,
7775f757f3fSDimitry Andric     bool *HasWinCFI, bool EmitCFI, StackOffset InitialOffset,
7785f757f3fSDimitry Andric     bool FollowupAllocs) const {
7795f757f3fSDimitry Andric 
7805f757f3fSDimitry Andric   if (!AllocSize)
7815f757f3fSDimitry Andric     return;
7825f757f3fSDimitry Andric 
7835f757f3fSDimitry Andric   DebugLoc DL;
7845f757f3fSDimitry Andric   MachineFunction &MF = *MBB.getParent();
7855f757f3fSDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
7865f757f3fSDimitry Andric   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
7875f757f3fSDimitry Andric   AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
7885f757f3fSDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
7895f757f3fSDimitry Andric 
7905f757f3fSDimitry Andric   const int64_t MaxAlign = MFI.getMaxAlign().value();
7915f757f3fSDimitry Andric   const uint64_t AndMask = ~(MaxAlign - 1);
7925f757f3fSDimitry Andric 
7935f757f3fSDimitry Andric   if (!Subtarget.getTargetLowering()->hasInlineStackProbe(MF)) {
7945f757f3fSDimitry Andric     Register TargetReg = RealignmentPadding
7955f757f3fSDimitry Andric                              ? findScratchNonCalleeSaveRegister(&MBB)
7965f757f3fSDimitry Andric                              : AArch64::SP;
7975f757f3fSDimitry Andric     // SUB Xd/SP, SP, AllocSize
7985f757f3fSDimitry Andric     emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
7995f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
8005f757f3fSDimitry Andric                     EmitCFI, InitialOffset);
8015f757f3fSDimitry Andric 
8025f757f3fSDimitry Andric     if (RealignmentPadding) {
8035f757f3fSDimitry Andric       // AND SP, X9, 0b11111...0000
8045f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
8055f757f3fSDimitry Andric           .addReg(TargetReg, RegState::Kill)
8065f757f3fSDimitry Andric           .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
8075f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8085f757f3fSDimitry Andric       AFI.setStackRealigned(true);
8095f757f3fSDimitry Andric 
8105f757f3fSDimitry Andric       // No need for SEH instructions here; if we're realigning the stack,
8115f757f3fSDimitry Andric       // we've set a frame pointer and already finished the SEH prologue.
8125f757f3fSDimitry Andric       assert(!NeedsWinCFI);
8135f757f3fSDimitry Andric     }
8145f757f3fSDimitry Andric     return;
8155f757f3fSDimitry Andric   }
8165f757f3fSDimitry Andric 
8175f757f3fSDimitry Andric   //
8185f757f3fSDimitry Andric   // Stack probing allocation.
8195f757f3fSDimitry Andric   //
8205f757f3fSDimitry Andric 
8215f757f3fSDimitry Andric   // Fixed length allocation. If we don't need to re-align the stack and don't
8225f757f3fSDimitry Andric   // have SVE objects, we can use a more efficient sequence for stack probing.
8235f757f3fSDimitry Andric   if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
8245f757f3fSDimitry Andric     Register ScratchReg = findScratchNonCalleeSaveRegister(&MBB);
8255f757f3fSDimitry Andric     assert(ScratchReg != AArch64::NoRegister);
8265f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC))
8275f757f3fSDimitry Andric         .addDef(ScratchReg)
8285f757f3fSDimitry Andric         .addImm(AllocSize.getFixed())
8295f757f3fSDimitry Andric         .addImm(InitialOffset.getFixed())
8305f757f3fSDimitry Andric         .addImm(InitialOffset.getScalable());
8315f757f3fSDimitry Andric     // The fixed allocation may leave unprobed bytes at the top of the
8325f757f3fSDimitry Andric     // stack. If we have subsequent alocation (e.g. if we have variable-sized
8335f757f3fSDimitry Andric     // objects), we need to issue an extra probe, so these allocations start in
8345f757f3fSDimitry Andric     // a known state.
8355f757f3fSDimitry Andric     if (FollowupAllocs) {
8365f757f3fSDimitry Andric       // STR XZR, [SP]
8375f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
8385f757f3fSDimitry Andric           .addReg(AArch64::XZR)
8395f757f3fSDimitry Andric           .addReg(AArch64::SP)
8405f757f3fSDimitry Andric           .addImm(0)
8415f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8425f757f3fSDimitry Andric     }
8435f757f3fSDimitry Andric 
8445f757f3fSDimitry Andric     return;
8455f757f3fSDimitry Andric   }
8465f757f3fSDimitry Andric 
8475f757f3fSDimitry Andric   // Variable length allocation.
8485f757f3fSDimitry Andric 
8495f757f3fSDimitry Andric   // If the (unknown) allocation size cannot exceed the probe size, decrement
8505f757f3fSDimitry Andric   // the stack pointer right away.
8515f757f3fSDimitry Andric   int64_t ProbeSize = AFI.getStackProbeSize();
8525f757f3fSDimitry Andric   if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
8535f757f3fSDimitry Andric     Register ScratchReg = RealignmentPadding
8545f757f3fSDimitry Andric                               ? findScratchNonCalleeSaveRegister(&MBB)
8555f757f3fSDimitry Andric                               : AArch64::SP;
8565f757f3fSDimitry Andric     assert(ScratchReg != AArch64::NoRegister);
8575f757f3fSDimitry Andric     // SUB Xd, SP, AllocSize
8585f757f3fSDimitry Andric     emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, &TII,
8595f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
8605f757f3fSDimitry Andric                     EmitCFI, InitialOffset);
8615f757f3fSDimitry Andric     if (RealignmentPadding) {
8625f757f3fSDimitry Andric       // AND SP, Xn, 0b11111...0000
8635f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), AArch64::SP)
8645f757f3fSDimitry Andric           .addReg(ScratchReg, RegState::Kill)
8655f757f3fSDimitry Andric           .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
8665f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8675f757f3fSDimitry Andric       AFI.setStackRealigned(true);
8685f757f3fSDimitry Andric     }
8695f757f3fSDimitry Andric     if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
8705f757f3fSDimitry Andric                               AArch64::StackProbeMaxUnprobedStack) {
8715f757f3fSDimitry Andric       // STR XZR, [SP]
8725f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXui))
8735f757f3fSDimitry Andric           .addReg(AArch64::XZR)
8745f757f3fSDimitry Andric           .addReg(AArch64::SP)
8755f757f3fSDimitry Andric           .addImm(0)
8765f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
8775f757f3fSDimitry Andric     }
8785f757f3fSDimitry Andric     return;
8795f757f3fSDimitry Andric   }
8805f757f3fSDimitry Andric 
8815f757f3fSDimitry Andric   // Emit a variable-length allocation probing loop.
8825f757f3fSDimitry Andric   // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
8835f757f3fSDimitry Andric   // each of them guaranteed to adjust the stack by less than the probe size.
8845f757f3fSDimitry Andric   Register TargetReg = findScratchNonCalleeSaveRegister(&MBB);
8855f757f3fSDimitry Andric   assert(TargetReg != AArch64::NoRegister);
8865f757f3fSDimitry Andric   // SUB Xd, SP, AllocSize
8875f757f3fSDimitry Andric   emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, &TII,
8885f757f3fSDimitry Andric                   MachineInstr::FrameSetup, false, NeedsWinCFI, HasWinCFI,
8895f757f3fSDimitry Andric                   EmitCFI, InitialOffset);
8905f757f3fSDimitry Andric   if (RealignmentPadding) {
8915f757f3fSDimitry Andric     // AND Xn, Xn, 0b11111...0000
8925f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::ANDXri), TargetReg)
8935f757f3fSDimitry Andric         .addReg(TargetReg, RegState::Kill)
8945f757f3fSDimitry Andric         .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64))
8955f757f3fSDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
8965f757f3fSDimitry Andric   }
8975f757f3fSDimitry Andric 
8985f757f3fSDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::PROBED_STACKALLOC_VAR))
8995f757f3fSDimitry Andric       .addReg(TargetReg);
9005f757f3fSDimitry Andric   if (EmitCFI) {
9015f757f3fSDimitry Andric     // Set the CFA register back to SP.
9025f757f3fSDimitry Andric     unsigned Reg =
9035f757f3fSDimitry Andric         Subtarget.getRegisterInfo()->getDwarfRegNum(AArch64::SP, true);
9045f757f3fSDimitry Andric     unsigned CFIIndex =
9055f757f3fSDimitry Andric         MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
9065f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
9075f757f3fSDimitry Andric         .addCFIIndex(CFIIndex)
9085f757f3fSDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
9095f757f3fSDimitry Andric   }
9105f757f3fSDimitry Andric   if (RealignmentPadding)
9115f757f3fSDimitry Andric     AFI.setStackRealigned(true);
9125f757f3fSDimitry Andric }
9135f757f3fSDimitry Andric 
91481ad6265SDimitry Andric static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE) {
91581ad6265SDimitry Andric   switch (Reg.id()) {
91681ad6265SDimitry Andric   default:
91781ad6265SDimitry Andric     // The called routine is expected to preserve r19-r28
91881ad6265SDimitry Andric     // r29 and r30 are used as frame pointer and link register resp.
91981ad6265SDimitry Andric     return 0;
92081ad6265SDimitry Andric 
92181ad6265SDimitry Andric     // GPRs
92281ad6265SDimitry Andric #define CASE(n)                                                                \
92381ad6265SDimitry Andric   case AArch64::W##n:                                                          \
92481ad6265SDimitry Andric   case AArch64::X##n:                                                          \
92581ad6265SDimitry Andric     return AArch64::X##n
92681ad6265SDimitry Andric   CASE(0);
92781ad6265SDimitry Andric   CASE(1);
92881ad6265SDimitry Andric   CASE(2);
92981ad6265SDimitry Andric   CASE(3);
93081ad6265SDimitry Andric   CASE(4);
93181ad6265SDimitry Andric   CASE(5);
93281ad6265SDimitry Andric   CASE(6);
93381ad6265SDimitry Andric   CASE(7);
93481ad6265SDimitry Andric   CASE(8);
93581ad6265SDimitry Andric   CASE(9);
93681ad6265SDimitry Andric   CASE(10);
93781ad6265SDimitry Andric   CASE(11);
93881ad6265SDimitry Andric   CASE(12);
93981ad6265SDimitry Andric   CASE(13);
94081ad6265SDimitry Andric   CASE(14);
94181ad6265SDimitry Andric   CASE(15);
94281ad6265SDimitry Andric   CASE(16);
94381ad6265SDimitry Andric   CASE(17);
94481ad6265SDimitry Andric   CASE(18);
94581ad6265SDimitry Andric #undef CASE
94681ad6265SDimitry Andric 
94781ad6265SDimitry Andric     // FPRs
94881ad6265SDimitry Andric #define CASE(n)                                                                \
94981ad6265SDimitry Andric   case AArch64::B##n:                                                          \
95081ad6265SDimitry Andric   case AArch64::H##n:                                                          \
95181ad6265SDimitry Andric   case AArch64::S##n:                                                          \
95281ad6265SDimitry Andric   case AArch64::D##n:                                                          \
95381ad6265SDimitry Andric   case AArch64::Q##n:                                                          \
95481ad6265SDimitry Andric     return HasSVE ? AArch64::Z##n : AArch64::Q##n
95581ad6265SDimitry Andric   CASE(0);
95681ad6265SDimitry Andric   CASE(1);
95781ad6265SDimitry Andric   CASE(2);
95881ad6265SDimitry Andric   CASE(3);
95981ad6265SDimitry Andric   CASE(4);
96081ad6265SDimitry Andric   CASE(5);
96181ad6265SDimitry Andric   CASE(6);
96281ad6265SDimitry Andric   CASE(7);
96381ad6265SDimitry Andric   CASE(8);
96481ad6265SDimitry Andric   CASE(9);
96581ad6265SDimitry Andric   CASE(10);
96681ad6265SDimitry Andric   CASE(11);
96781ad6265SDimitry Andric   CASE(12);
96881ad6265SDimitry Andric   CASE(13);
96981ad6265SDimitry Andric   CASE(14);
97081ad6265SDimitry Andric   CASE(15);
97181ad6265SDimitry Andric   CASE(16);
97281ad6265SDimitry Andric   CASE(17);
97381ad6265SDimitry Andric   CASE(18);
97481ad6265SDimitry Andric   CASE(19);
97581ad6265SDimitry Andric   CASE(20);
97681ad6265SDimitry Andric   CASE(21);
97781ad6265SDimitry Andric   CASE(22);
97881ad6265SDimitry Andric   CASE(23);
97981ad6265SDimitry Andric   CASE(24);
98081ad6265SDimitry Andric   CASE(25);
98181ad6265SDimitry Andric   CASE(26);
98281ad6265SDimitry Andric   CASE(27);
98381ad6265SDimitry Andric   CASE(28);
98481ad6265SDimitry Andric   CASE(29);
98581ad6265SDimitry Andric   CASE(30);
98681ad6265SDimitry Andric   CASE(31);
98781ad6265SDimitry Andric #undef CASE
98881ad6265SDimitry Andric   }
98981ad6265SDimitry Andric }
99081ad6265SDimitry Andric 
99181ad6265SDimitry Andric void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
99281ad6265SDimitry Andric                                                 MachineBasicBlock &MBB) const {
99381ad6265SDimitry Andric   // Insertion point.
99481ad6265SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
99581ad6265SDimitry Andric 
99681ad6265SDimitry Andric   // Fake a debug loc.
99781ad6265SDimitry Andric   DebugLoc DL;
99881ad6265SDimitry Andric   if (MBBI != MBB.end())
99981ad6265SDimitry Andric     DL = MBBI->getDebugLoc();
100081ad6265SDimitry Andric 
100181ad6265SDimitry Andric   const MachineFunction &MF = *MBB.getParent();
100281ad6265SDimitry Andric   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
100381ad6265SDimitry Andric   const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
100481ad6265SDimitry Andric 
100581ad6265SDimitry Andric   BitVector GPRsToZero(TRI.getNumRegs());
100681ad6265SDimitry Andric   BitVector FPRsToZero(TRI.getNumRegs());
100781ad6265SDimitry Andric   bool HasSVE = STI.hasSVE();
100881ad6265SDimitry Andric   for (MCRegister Reg : RegsToZero.set_bits()) {
100981ad6265SDimitry Andric     if (TRI.isGeneralPurposeRegister(MF, Reg)) {
101081ad6265SDimitry Andric       // For GPRs, we only care to clear out the 64-bit register.
101181ad6265SDimitry Andric       if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
101281ad6265SDimitry Andric         GPRsToZero.set(XReg);
10130fca6ea1SDimitry Andric     } else if (AArch64InstrInfo::isFpOrNEON(Reg)) {
101481ad6265SDimitry Andric       // For FPRs,
101581ad6265SDimitry Andric       if (MCRegister XReg = getRegisterOrZero(Reg, HasSVE))
101681ad6265SDimitry Andric         FPRsToZero.set(XReg);
101781ad6265SDimitry Andric     }
101881ad6265SDimitry Andric   }
101981ad6265SDimitry Andric 
102081ad6265SDimitry Andric   const AArch64InstrInfo &TII = *STI.getInstrInfo();
102181ad6265SDimitry Andric 
102281ad6265SDimitry Andric   // Zero out GPRs.
102381ad6265SDimitry Andric   for (MCRegister Reg : GPRsToZero.set_bits())
10245f757f3fSDimitry Andric     TII.buildClearRegister(Reg, MBB, MBBI, DL);
102581ad6265SDimitry Andric 
102681ad6265SDimitry Andric   // Zero out FP/vector registers.
102781ad6265SDimitry Andric   for (MCRegister Reg : FPRsToZero.set_bits())
10285f757f3fSDimitry Andric     TII.buildClearRegister(Reg, MBB, MBBI, DL);
102981ad6265SDimitry Andric 
103081ad6265SDimitry Andric   if (HasSVE) {
103181ad6265SDimitry Andric     for (MCRegister PReg :
103281ad6265SDimitry Andric          {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
103381ad6265SDimitry Andric           AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
103481ad6265SDimitry Andric           AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
103581ad6265SDimitry Andric           AArch64::P15}) {
103681ad6265SDimitry Andric       if (RegsToZero[PReg])
103781ad6265SDimitry Andric         BuildMI(MBB, MBBI, DL, TII.get(AArch64::PFALSE), PReg);
103881ad6265SDimitry Andric     }
103981ad6265SDimitry Andric   }
104081ad6265SDimitry Andric }
104181ad6265SDimitry Andric 
10425f757f3fSDimitry Andric static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
10435f757f3fSDimitry Andric                                    const MachineBasicBlock &MBB) {
10445f757f3fSDimitry Andric   const MachineFunction *MF = MBB.getParent();
10455f757f3fSDimitry Andric   LiveRegs.addLiveIns(MBB);
10465f757f3fSDimitry Andric   // Mark callee saved registers as used so we will not choose them.
10475f757f3fSDimitry Andric   const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
10485f757f3fSDimitry Andric   for (unsigned i = 0; CSRegs[i]; ++i)
10495f757f3fSDimitry Andric     LiveRegs.addReg(CSRegs[i]);
10505f757f3fSDimitry Andric }
10515f757f3fSDimitry Andric 
10520b57cec5SDimitry Andric // Find a scratch register that we can use at the start of the prologue to
10530b57cec5SDimitry Andric // re-align the stack pointer.  We avoid using callee-save registers since they
10540b57cec5SDimitry Andric // may appear to be free when this is called from canUseAsPrologue (during
10550b57cec5SDimitry Andric // shrink wrapping), but then no longer be free when this is called from
10560b57cec5SDimitry Andric // emitPrologue.
10570b57cec5SDimitry Andric //
10580b57cec5SDimitry Andric // FIXME: This is a bit conservative, since in the above case we could use one
10590b57cec5SDimitry Andric // of the callee-save registers as a scratch temp to re-align the stack pointer,
10600b57cec5SDimitry Andric // but we would then have to make sure that we were in fact saving at least one
10610b57cec5SDimitry Andric // callee-save register in the prologue, which is additional complexity that
10620b57cec5SDimitry Andric // doesn't seem worth the benefit.
10630fca6ea1SDimitry Andric static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
10640b57cec5SDimitry Andric   MachineFunction *MF = MBB->getParent();
10650b57cec5SDimitry Andric 
10660b57cec5SDimitry Andric   // If MBB is an entry block, use X9 as the scratch register
10670fca6ea1SDimitry Andric   // preserve_none functions may be using X9 to pass arguments,
10680fca6ea1SDimitry Andric   // so prefer to pick an available register below.
10690fca6ea1SDimitry Andric   if (&MF->front() == MBB &&
10700fca6ea1SDimitry Andric       MF->getFunction().getCallingConv() != CallingConv::PreserveNone)
10710b57cec5SDimitry Andric     return AArch64::X9;
10720b57cec5SDimitry Andric 
10730b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
10740b57cec5SDimitry Andric   const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
10750b57cec5SDimitry Andric   LivePhysRegs LiveRegs(TRI);
10765f757f3fSDimitry Andric   getLiveRegsForEntryMBB(LiveRegs, *MBB);
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric   // Prefer X9 since it was historically used for the prologue scratch reg.
10790b57cec5SDimitry Andric   const MachineRegisterInfo &MRI = MF->getRegInfo();
10800b57cec5SDimitry Andric   if (LiveRegs.available(MRI, AArch64::X9))
10810b57cec5SDimitry Andric     return AArch64::X9;
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric   for (unsigned Reg : AArch64::GPR64RegClass) {
10840b57cec5SDimitry Andric     if (LiveRegs.available(MRI, Reg))
10850b57cec5SDimitry Andric       return Reg;
10860b57cec5SDimitry Andric   }
10870b57cec5SDimitry Andric   return AArch64::NoRegister;
10880b57cec5SDimitry Andric }
10890b57cec5SDimitry Andric 
10900b57cec5SDimitry Andric bool AArch64FrameLowering::canUseAsPrologue(
10910b57cec5SDimitry Andric     const MachineBasicBlock &MBB) const {
10920b57cec5SDimitry Andric   const MachineFunction *MF = MBB.getParent();
10930b57cec5SDimitry Andric   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
10940b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
10950b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
10965f757f3fSDimitry Andric   const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
10975f757f3fSDimitry Andric   const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
10980b57cec5SDimitry Andric 
10995f757f3fSDimitry Andric   if (AFI->hasSwiftAsyncContext()) {
11005f757f3fSDimitry Andric     const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
11015f757f3fSDimitry Andric     const MachineRegisterInfo &MRI = MF->getRegInfo();
11025f757f3fSDimitry Andric     LivePhysRegs LiveRegs(TRI);
11035f757f3fSDimitry Andric     getLiveRegsForEntryMBB(LiveRegs, MBB);
11045f757f3fSDimitry Andric     // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are
11055f757f3fSDimitry Andric     // available.
11065f757f3fSDimitry Andric     if (!LiveRegs.available(MRI, AArch64::X16) ||
11075f757f3fSDimitry Andric         !LiveRegs.available(MRI, AArch64::X17))
11085f757f3fSDimitry Andric       return false;
11095f757f3fSDimitry Andric   }
11105f757f3fSDimitry Andric 
11110fca6ea1SDimitry Andric   // Certain stack probing sequences might clobber flags, then we can't use
11120fca6ea1SDimitry Andric   // the block as a prologue if the flags register is a live-in.
11130fca6ea1SDimitry Andric   if (MF->getInfo<AArch64FunctionInfo>()->hasStackProbing() &&
11140fca6ea1SDimitry Andric       MBB.isLiveIn(AArch64::NZCV))
11150fca6ea1SDimitry Andric     return false;
11160fca6ea1SDimitry Andric 
11175f757f3fSDimitry Andric   // Don't need a scratch register if we're not going to re-align the stack or
11185f757f3fSDimitry Andric   // emit stack probes.
11190fca6ea1SDimitry Andric   if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF))
11200b57cec5SDimitry Andric     return true;
11210b57cec5SDimitry Andric   // Otherwise, we can use any block as long as it has a scratch register
11220b57cec5SDimitry Andric   // available.
11230b57cec5SDimitry Andric   return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
11240b57cec5SDimitry Andric }
11250b57cec5SDimitry Andric 
11260b57cec5SDimitry Andric static bool windowsRequiresStackProbe(MachineFunction &MF,
1127480093f4SDimitry Andric                                       uint64_t StackSizeInBytes) {
11280b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
11295f757f3fSDimitry Andric   const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
11300b57cec5SDimitry Andric   // TODO: When implementing stack protectors, take that into account
11310b57cec5SDimitry Andric   // for the probe threshold.
11325f757f3fSDimitry Andric   return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
11335f757f3fSDimitry Andric          StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
11340b57cec5SDimitry Andric }
11350b57cec5SDimitry Andric 
1136e8d8bef9SDimitry Andric static bool needsWinCFI(const MachineFunction &MF) {
1137e8d8bef9SDimitry Andric   const Function &F = MF.getFunction();
1138e8d8bef9SDimitry Andric   return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1139e8d8bef9SDimitry Andric          F.needsUnwindTableEntry();
1140e8d8bef9SDimitry Andric }
1141e8d8bef9SDimitry Andric 
11420b57cec5SDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1143480093f4SDimitry Andric     MachineFunction &MF, uint64_t StackBumpBytes) const {
11440b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
11450b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
11460b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
11470b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1148fe6060f1SDimitry Andric   if (homogeneousPrologEpilog(MF))
1149fe6060f1SDimitry Andric     return false;
11500b57cec5SDimitry Andric 
11510b57cec5SDimitry Andric   if (AFI->getLocalStackSize() == 0)
11520b57cec5SDimitry Andric     return false;
11530b57cec5SDimitry Andric 
1154e8d8bef9SDimitry Andric   // For WinCFI, if optimizing for size, prefer to not combine the stack bump
1155e8d8bef9SDimitry Andric   // (to force a stp with predecrement) to match the packed unwind format,
1156e8d8bef9SDimitry Andric   // provided that there actually are any callee saved registers to merge the
1157e8d8bef9SDimitry Andric   // decrement with.
1158e8d8bef9SDimitry Andric   // This is potentially marginally slower, but allows using the packed
1159e8d8bef9SDimitry Andric   // unwind format for functions that both have a local area and callee saved
1160e8d8bef9SDimitry Andric   // registers. Using the packed unwind format notably reduces the size of
1161e8d8bef9SDimitry Andric   // the unwind info.
1162e8d8bef9SDimitry Andric   if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
1163e8d8bef9SDimitry Andric       MF.getFunction().hasOptSize())
1164e8d8bef9SDimitry Andric     return false;
1165e8d8bef9SDimitry Andric 
11660b57cec5SDimitry Andric   // 512 is the maximum immediate for stp/ldp that will be used for
11670b57cec5SDimitry Andric   // callee-save save/restores
11680b57cec5SDimitry Andric   if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
11690b57cec5SDimitry Andric     return false;
11700b57cec5SDimitry Andric 
11710b57cec5SDimitry Andric   if (MFI.hasVarSizedObjects())
11720b57cec5SDimitry Andric     return false;
11730b57cec5SDimitry Andric 
1174fe6060f1SDimitry Andric   if (RegInfo->hasStackRealignment(MF))
11750b57cec5SDimitry Andric     return false;
11760b57cec5SDimitry Andric 
11770b57cec5SDimitry Andric   // This isn't strictly necessary, but it simplifies things a bit since the
11780b57cec5SDimitry Andric   // current RedZone handling code assumes the SP is adjusted by the
11790b57cec5SDimitry Andric   // callee-save save/restore code.
11800b57cec5SDimitry Andric   if (canUseRedZone(MF))
11810b57cec5SDimitry Andric     return false;
11820b57cec5SDimitry Andric 
11838bcb0991SDimitry Andric   // When there is an SVE area on the stack, always allocate the
11848bcb0991SDimitry Andric   // callee-saves and spills/locals separately.
11858bcb0991SDimitry Andric   if (getSVEStackSize(MF))
11868bcb0991SDimitry Andric     return false;
11878bcb0991SDimitry Andric 
11880b57cec5SDimitry Andric   return true;
11890b57cec5SDimitry Andric }
11900b57cec5SDimitry Andric 
11915ffd83dbSDimitry Andric bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
11925ffd83dbSDimitry Andric     MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
11935ffd83dbSDimitry Andric   if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
11945ffd83dbSDimitry Andric     return false;
11955ffd83dbSDimitry Andric 
11965ffd83dbSDimitry Andric   if (MBB.empty())
11975ffd83dbSDimitry Andric     return true;
11985ffd83dbSDimitry Andric 
11995ffd83dbSDimitry Andric   // Disable combined SP bump if the last instruction is an MTE tag store. It
12005ffd83dbSDimitry Andric   // is almost always better to merge SP adjustment into those instructions.
12015ffd83dbSDimitry Andric   MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
12025ffd83dbSDimitry Andric   MachineBasicBlock::iterator Begin = MBB.begin();
12035ffd83dbSDimitry Andric   while (LastI != Begin) {
12045ffd83dbSDimitry Andric     --LastI;
12055ffd83dbSDimitry Andric     if (LastI->isTransient())
12065ffd83dbSDimitry Andric       continue;
12075ffd83dbSDimitry Andric     if (!LastI->getFlag(MachineInstr::FrameDestroy))
12085ffd83dbSDimitry Andric       break;
12095ffd83dbSDimitry Andric   }
12105ffd83dbSDimitry Andric   switch (LastI->getOpcode()) {
12115ffd83dbSDimitry Andric   case AArch64::STGloop:
12125ffd83dbSDimitry Andric   case AArch64::STZGloop:
121306c3fb27SDimitry Andric   case AArch64::STGi:
121406c3fb27SDimitry Andric   case AArch64::STZGi:
121506c3fb27SDimitry Andric   case AArch64::ST2Gi:
121606c3fb27SDimitry Andric   case AArch64::STZ2Gi:
12175ffd83dbSDimitry Andric     return false;
12185ffd83dbSDimitry Andric   default:
12195ffd83dbSDimitry Andric     return true;
12205ffd83dbSDimitry Andric   }
12215ffd83dbSDimitry Andric   llvm_unreachable("unreachable");
12225ffd83dbSDimitry Andric }
12235ffd83dbSDimitry Andric 
12240b57cec5SDimitry Andric // Given a load or a store instruction, generate an appropriate unwinding SEH
12250b57cec5SDimitry Andric // code on Windows.
12260b57cec5SDimitry Andric static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
12270b57cec5SDimitry Andric                                              const TargetInstrInfo &TII,
12280b57cec5SDimitry Andric                                              MachineInstr::MIFlag Flag) {
12290b57cec5SDimitry Andric   unsigned Opc = MBBI->getOpcode();
12300b57cec5SDimitry Andric   MachineBasicBlock *MBB = MBBI->getParent();
12310b57cec5SDimitry Andric   MachineFunction &MF = *MBB->getParent();
12320b57cec5SDimitry Andric   DebugLoc DL = MBBI->getDebugLoc();
12330b57cec5SDimitry Andric   unsigned ImmIdx = MBBI->getNumOperands() - 1;
12340b57cec5SDimitry Andric   int Imm = MBBI->getOperand(ImmIdx).getImm();
12350b57cec5SDimitry Andric   MachineInstrBuilder MIB;
12360b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
12370b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
12380b57cec5SDimitry Andric 
12390b57cec5SDimitry Andric   switch (Opc) {
12400b57cec5SDimitry Andric   default:
12410b57cec5SDimitry Andric     llvm_unreachable("No SEH Opcode for this instruction");
12420b57cec5SDimitry Andric   case AArch64::LDPDpost:
12430b57cec5SDimitry Andric     Imm = -Imm;
1244bdd1243dSDimitry Andric     [[fallthrough]];
12450b57cec5SDimitry Andric   case AArch64::STPDpre: {
12460b57cec5SDimitry Andric     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12470b57cec5SDimitry Andric     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
12480b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
12490b57cec5SDimitry Andric               .addImm(Reg0)
12500b57cec5SDimitry Andric               .addImm(Reg1)
12510b57cec5SDimitry Andric               .addImm(Imm * 8)
12520b57cec5SDimitry Andric               .setMIFlag(Flag);
12530b57cec5SDimitry Andric     break;
12540b57cec5SDimitry Andric   }
12550b57cec5SDimitry Andric   case AArch64::LDPXpost:
12560b57cec5SDimitry Andric     Imm = -Imm;
1257bdd1243dSDimitry Andric     [[fallthrough]];
12580b57cec5SDimitry Andric   case AArch64::STPXpre: {
12598bcb0991SDimitry Andric     Register Reg0 = MBBI->getOperand(1).getReg();
12608bcb0991SDimitry Andric     Register Reg1 = MBBI->getOperand(2).getReg();
12610b57cec5SDimitry Andric     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
12620b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
12630b57cec5SDimitry Andric                 .addImm(Imm * 8)
12640b57cec5SDimitry Andric                 .setMIFlag(Flag);
12650b57cec5SDimitry Andric     else
12660b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
12670b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg0))
12680b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg1))
12690b57cec5SDimitry Andric                 .addImm(Imm * 8)
12700b57cec5SDimitry Andric                 .setMIFlag(Flag);
12710b57cec5SDimitry Andric     break;
12720b57cec5SDimitry Andric   }
12730b57cec5SDimitry Andric   case AArch64::LDRDpost:
12740b57cec5SDimitry Andric     Imm = -Imm;
1275bdd1243dSDimitry Andric     [[fallthrough]];
12760b57cec5SDimitry Andric   case AArch64::STRDpre: {
12770b57cec5SDimitry Andric     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12780b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
12790b57cec5SDimitry Andric               .addImm(Reg)
12800b57cec5SDimitry Andric               .addImm(Imm)
12810b57cec5SDimitry Andric               .setMIFlag(Flag);
12820b57cec5SDimitry Andric     break;
12830b57cec5SDimitry Andric   }
12840b57cec5SDimitry Andric   case AArch64::LDRXpost:
12850b57cec5SDimitry Andric     Imm = -Imm;
1286bdd1243dSDimitry Andric     [[fallthrough]];
12870b57cec5SDimitry Andric   case AArch64::STRXpre: {
12880b57cec5SDimitry Andric     unsigned Reg =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12890b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
12900b57cec5SDimitry Andric               .addImm(Reg)
12910b57cec5SDimitry Andric               .addImm(Imm)
12920b57cec5SDimitry Andric               .setMIFlag(Flag);
12930b57cec5SDimitry Andric     break;
12940b57cec5SDimitry Andric   }
12950b57cec5SDimitry Andric   case AArch64::STPDi:
12960b57cec5SDimitry Andric   case AArch64::LDPDi: {
12970b57cec5SDimitry Andric     unsigned Reg0 =  RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
12980b57cec5SDimitry Andric     unsigned Reg1 =  RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
12990b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
13000b57cec5SDimitry Andric               .addImm(Reg0)
13010b57cec5SDimitry Andric               .addImm(Reg1)
13020b57cec5SDimitry Andric               .addImm(Imm * 8)
13030b57cec5SDimitry Andric               .setMIFlag(Flag);
13040b57cec5SDimitry Andric     break;
13050b57cec5SDimitry Andric   }
13060b57cec5SDimitry Andric   case AArch64::STPXi:
13070b57cec5SDimitry Andric   case AArch64::LDPXi: {
13088bcb0991SDimitry Andric     Register Reg0 = MBBI->getOperand(0).getReg();
13098bcb0991SDimitry Andric     Register Reg1 = MBBI->getOperand(1).getReg();
13100b57cec5SDimitry Andric     if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
13110b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
13120b57cec5SDimitry Andric                 .addImm(Imm * 8)
13130b57cec5SDimitry Andric                 .setMIFlag(Flag);
13140b57cec5SDimitry Andric     else
13150b57cec5SDimitry Andric       MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
13160b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg0))
13170b57cec5SDimitry Andric                 .addImm(RegInfo->getSEHRegNum(Reg1))
13180b57cec5SDimitry Andric                 .addImm(Imm * 8)
13190b57cec5SDimitry Andric                 .setMIFlag(Flag);
13200b57cec5SDimitry Andric     break;
13210b57cec5SDimitry Andric   }
13220b57cec5SDimitry Andric   case AArch64::STRXui:
13230b57cec5SDimitry Andric   case AArch64::LDRXui: {
13240b57cec5SDimitry Andric     int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
13250b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
13260b57cec5SDimitry Andric               .addImm(Reg)
13270b57cec5SDimitry Andric               .addImm(Imm * 8)
13280b57cec5SDimitry Andric               .setMIFlag(Flag);
13290b57cec5SDimitry Andric     break;
13300b57cec5SDimitry Andric   }
13310b57cec5SDimitry Andric   case AArch64::STRDui:
13320b57cec5SDimitry Andric   case AArch64::LDRDui: {
13330b57cec5SDimitry Andric     unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
13340b57cec5SDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
13350b57cec5SDimitry Andric               .addImm(Reg)
13360b57cec5SDimitry Andric               .addImm(Imm * 8)
13370b57cec5SDimitry Andric               .setMIFlag(Flag);
13380b57cec5SDimitry Andric     break;
13390b57cec5SDimitry Andric   }
13407a6dacacSDimitry Andric   case AArch64::STPQi:
13417a6dacacSDimitry Andric   case AArch64::LDPQi: {
13427a6dacacSDimitry Andric     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
13437a6dacacSDimitry Andric     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
13447a6dacacSDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQP))
13457a6dacacSDimitry Andric               .addImm(Reg0)
13467a6dacacSDimitry Andric               .addImm(Reg1)
13477a6dacacSDimitry Andric               .addImm(Imm * 16)
13487a6dacacSDimitry Andric               .setMIFlag(Flag);
13497a6dacacSDimitry Andric     break;
13507a6dacacSDimitry Andric   }
13517a6dacacSDimitry Andric   case AArch64::LDPQpost:
13527a6dacacSDimitry Andric     Imm = -Imm;
13530fca6ea1SDimitry Andric     [[fallthrough]];
13547a6dacacSDimitry Andric   case AArch64::STPQpre: {
13557a6dacacSDimitry Andric     unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
13567a6dacacSDimitry Andric     unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
13577a6dacacSDimitry Andric     MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveAnyRegQPX))
13587a6dacacSDimitry Andric               .addImm(Reg0)
13597a6dacacSDimitry Andric               .addImm(Reg1)
13607a6dacacSDimitry Andric               .addImm(Imm * 16)
13617a6dacacSDimitry Andric               .setMIFlag(Flag);
13627a6dacacSDimitry Andric     break;
13637a6dacacSDimitry Andric   }
13640b57cec5SDimitry Andric   }
13650b57cec5SDimitry Andric   auto I = MBB->insertAfter(MBBI, MIB);
13660b57cec5SDimitry Andric   return I;
13670b57cec5SDimitry Andric }
13680b57cec5SDimitry Andric 
13690b57cec5SDimitry Andric // Fix up the SEH opcode associated with the save/restore instruction.
13700b57cec5SDimitry Andric static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
13710b57cec5SDimitry Andric                            unsigned LocalStackSize) {
13720b57cec5SDimitry Andric   MachineOperand *ImmOpnd = nullptr;
13730b57cec5SDimitry Andric   unsigned ImmIdx = MBBI->getNumOperands() - 1;
13740b57cec5SDimitry Andric   switch (MBBI->getOpcode()) {
13750b57cec5SDimitry Andric   default:
13760b57cec5SDimitry Andric     llvm_unreachable("Fix the offset in the SEH instruction");
13770b57cec5SDimitry Andric   case AArch64::SEH_SaveFPLR:
13780b57cec5SDimitry Andric   case AArch64::SEH_SaveRegP:
13790b57cec5SDimitry Andric   case AArch64::SEH_SaveReg:
13800b57cec5SDimitry Andric   case AArch64::SEH_SaveFRegP:
13810b57cec5SDimitry Andric   case AArch64::SEH_SaveFReg:
13827a6dacacSDimitry Andric   case AArch64::SEH_SaveAnyRegQP:
13837a6dacacSDimitry Andric   case AArch64::SEH_SaveAnyRegQPX:
13840b57cec5SDimitry Andric     ImmOpnd = &MBBI->getOperand(ImmIdx);
13850b57cec5SDimitry Andric     break;
13860b57cec5SDimitry Andric   }
13870b57cec5SDimitry Andric   if (ImmOpnd)
13880b57cec5SDimitry Andric     ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
13890b57cec5SDimitry Andric }
13900b57cec5SDimitry Andric 
13910fca6ea1SDimitry Andric bool requiresGetVGCall(MachineFunction &MF) {
13920fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
13930fca6ea1SDimitry Andric   return AFI->hasStreamingModeChanges() &&
13940fca6ea1SDimitry Andric          !MF.getSubtarget<AArch64Subtarget>().hasSVE();
13950fca6ea1SDimitry Andric }
13960fca6ea1SDimitry Andric 
1397*71ac745dSDimitry Andric static bool requiresSaveVG(MachineFunction &MF) {
1398*71ac745dSDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1399*71ac745dSDimitry Andric   // For Darwin platforms we don't save VG for non-SVE functions, even if SME
1400*71ac745dSDimitry Andric   // is enabled with streaming mode changes.
1401*71ac745dSDimitry Andric   if (!AFI->hasStreamingModeChanges())
1402*71ac745dSDimitry Andric     return false;
1403*71ac745dSDimitry Andric   auto &ST = MF.getSubtarget<AArch64Subtarget>();
1404*71ac745dSDimitry Andric   if (ST.isTargetDarwin())
1405*71ac745dSDimitry Andric     return ST.hasSVE();
1406*71ac745dSDimitry Andric   return true;
1407*71ac745dSDimitry Andric }
1408*71ac745dSDimitry Andric 
14090fca6ea1SDimitry Andric bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
14100fca6ea1SDimitry Andric   unsigned Opc = MBBI->getOpcode();
14110fca6ea1SDimitry Andric   if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
14120fca6ea1SDimitry Andric       Opc == AArch64::UBFMXri)
14130fca6ea1SDimitry Andric     return true;
14140fca6ea1SDimitry Andric 
14150fca6ea1SDimitry Andric   if (requiresGetVGCall(*MBBI->getMF())) {
14160fca6ea1SDimitry Andric     if (Opc == AArch64::ORRXrr)
14170fca6ea1SDimitry Andric       return true;
14180fca6ea1SDimitry Andric 
14190fca6ea1SDimitry Andric     if (Opc == AArch64::BL) {
14200fca6ea1SDimitry Andric       auto Op1 = MBBI->getOperand(0);
14210fca6ea1SDimitry Andric       return Op1.isSymbol() &&
14220fca6ea1SDimitry Andric              (StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
14230fca6ea1SDimitry Andric     }
14240fca6ea1SDimitry Andric   }
14250fca6ea1SDimitry Andric 
14260fca6ea1SDimitry Andric   return false;
14270fca6ea1SDimitry Andric }
14280fca6ea1SDimitry Andric 
14290b57cec5SDimitry Andric // Convert callee-save register save/restore instruction to do stack pointer
14300b57cec5SDimitry Andric // decrement/increment to allocate/deallocate the callee-save stack area by
14310b57cec5SDimitry Andric // converting store/load to use pre/post increment version.
14320b57cec5SDimitry Andric static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
14330b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
14340b57cec5SDimitry Andric     const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
143581ad6265SDimitry Andric     bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI,
143681ad6265SDimitry Andric     MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup,
143781ad6265SDimitry Andric     int CFAOffset = 0) {
14380b57cec5SDimitry Andric   unsigned NewOpc;
14390fca6ea1SDimitry Andric 
14400fca6ea1SDimitry Andric   // If the function contains streaming mode changes, we expect instructions
14410fca6ea1SDimitry Andric   // to calculate the value of VG before spilling. For locally-streaming
14420fca6ea1SDimitry Andric   // functions, we need to do this for both the streaming and non-streaming
14430fca6ea1SDimitry Andric   // vector length. Move past these instructions if necessary.
14440fca6ea1SDimitry Andric   MachineFunction &MF = *MBB.getParent();
1445*71ac745dSDimitry Andric   if (requiresSaveVG(MF))
14460fca6ea1SDimitry Andric     while (isVGInstruction(MBBI))
14470fca6ea1SDimitry Andric       ++MBBI;
14480fca6ea1SDimitry Andric 
14490b57cec5SDimitry Andric   switch (MBBI->getOpcode()) {
14500b57cec5SDimitry Andric   default:
14510b57cec5SDimitry Andric     llvm_unreachable("Unexpected callee-save save/restore opcode!");
14520b57cec5SDimitry Andric   case AArch64::STPXi:
14530b57cec5SDimitry Andric     NewOpc = AArch64::STPXpre;
14540b57cec5SDimitry Andric     break;
14550b57cec5SDimitry Andric   case AArch64::STPDi:
14560b57cec5SDimitry Andric     NewOpc = AArch64::STPDpre;
14570b57cec5SDimitry Andric     break;
14580b57cec5SDimitry Andric   case AArch64::STPQi:
14590b57cec5SDimitry Andric     NewOpc = AArch64::STPQpre;
14600b57cec5SDimitry Andric     break;
14610b57cec5SDimitry Andric   case AArch64::STRXui:
14620b57cec5SDimitry Andric     NewOpc = AArch64::STRXpre;
14630b57cec5SDimitry Andric     break;
14640b57cec5SDimitry Andric   case AArch64::STRDui:
14650b57cec5SDimitry Andric     NewOpc = AArch64::STRDpre;
14660b57cec5SDimitry Andric     break;
14670b57cec5SDimitry Andric   case AArch64::STRQui:
14680b57cec5SDimitry Andric     NewOpc = AArch64::STRQpre;
14690b57cec5SDimitry Andric     break;
14700b57cec5SDimitry Andric   case AArch64::LDPXi:
14710b57cec5SDimitry Andric     NewOpc = AArch64::LDPXpost;
14720b57cec5SDimitry Andric     break;
14730b57cec5SDimitry Andric   case AArch64::LDPDi:
14740b57cec5SDimitry Andric     NewOpc = AArch64::LDPDpost;
14750b57cec5SDimitry Andric     break;
14760b57cec5SDimitry Andric   case AArch64::LDPQi:
14770b57cec5SDimitry Andric     NewOpc = AArch64::LDPQpost;
14780b57cec5SDimitry Andric     break;
14790b57cec5SDimitry Andric   case AArch64::LDRXui:
14800b57cec5SDimitry Andric     NewOpc = AArch64::LDRXpost;
14810b57cec5SDimitry Andric     break;
14820b57cec5SDimitry Andric   case AArch64::LDRDui:
14830b57cec5SDimitry Andric     NewOpc = AArch64::LDRDpost;
14840b57cec5SDimitry Andric     break;
14850b57cec5SDimitry Andric   case AArch64::LDRQui:
14860b57cec5SDimitry Andric     NewOpc = AArch64::LDRQpost;
14870b57cec5SDimitry Andric     break;
14880b57cec5SDimitry Andric   }
14890b57cec5SDimitry Andric   // Get rid of the SEH code associated with the old instruction.
14900b57cec5SDimitry Andric   if (NeedsWinCFI) {
14910b57cec5SDimitry Andric     auto SEH = std::next(MBBI);
14920b57cec5SDimitry Andric     if (AArch64InstrInfo::isSEHInstruction(*SEH))
14930b57cec5SDimitry Andric       SEH->eraseFromParent();
14940b57cec5SDimitry Andric   }
14950b57cec5SDimitry Andric 
14965f757f3fSDimitry Andric   TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
1497fe6060f1SDimitry Andric   int64_t MinOffset, MaxOffset;
1498fe6060f1SDimitry Andric   bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
1499fe6060f1SDimitry Andric       NewOpc, Scale, Width, MinOffset, MaxOffset);
1500fe6060f1SDimitry Andric   (void)Success;
1501fe6060f1SDimitry Andric   assert(Success && "unknown load/store opcode");
1502fe6060f1SDimitry Andric 
1503fe6060f1SDimitry Andric   // If the first store isn't right where we want SP then we can't fold the
1504fe6060f1SDimitry Andric   // update in so create a normal arithmetic instruction instead.
1505fe6060f1SDimitry Andric   if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
1506fe6060f1SDimitry Andric       CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
15070fca6ea1SDimitry Andric     // If we are destroying the frame, make sure we add the increment after the
15080fca6ea1SDimitry Andric     // last frame operation.
15090fca6ea1SDimitry Andric     if (FrameFlag == MachineInstr::FrameDestroy)
15100fca6ea1SDimitry Andric       ++MBBI;
1511fe6060f1SDimitry Andric     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
151281ad6265SDimitry Andric                     StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
151381ad6265SDimitry Andric                     false, false, nullptr, EmitCFI,
151481ad6265SDimitry Andric                     StackOffset::getFixed(CFAOffset));
151581ad6265SDimitry Andric 
1516fe6060f1SDimitry Andric     return std::prev(MBBI);
1517fe6060f1SDimitry Andric   }
1518fe6060f1SDimitry Andric 
15190b57cec5SDimitry Andric   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
15200b57cec5SDimitry Andric   MIB.addReg(AArch64::SP, RegState::Define);
15210b57cec5SDimitry Andric 
15220b57cec5SDimitry Andric   // Copy all operands other than the immediate offset.
15230b57cec5SDimitry Andric   unsigned OpndIdx = 0;
15240b57cec5SDimitry Andric   for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
15250b57cec5SDimitry Andric        ++OpndIdx)
15260b57cec5SDimitry Andric     MIB.add(MBBI->getOperand(OpndIdx));
15270b57cec5SDimitry Andric 
15280b57cec5SDimitry Andric   assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
15290b57cec5SDimitry Andric          "Unexpected immediate offset in first/last callee-save save/restore "
15300b57cec5SDimitry Andric          "instruction!");
15310b57cec5SDimitry Andric   assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
15320b57cec5SDimitry Andric          "Unexpected base register in callee-save save/restore instruction!");
15330b57cec5SDimitry Andric   assert(CSStackSizeInc % Scale == 0);
1534fe6060f1SDimitry Andric   MIB.addImm(CSStackSizeInc / (int)Scale);
15350b57cec5SDimitry Andric 
15360b57cec5SDimitry Andric   MIB.setMIFlags(MBBI->getFlags());
15370b57cec5SDimitry Andric   MIB.setMemRefs(MBBI->memoperands());
15380b57cec5SDimitry Andric 
15390b57cec5SDimitry Andric   // Generate a new SEH code that corresponds to the new instruction.
15400b57cec5SDimitry Andric   if (NeedsWinCFI) {
15410b57cec5SDimitry Andric     *HasWinCFI = true;
154281ad6265SDimitry Andric     InsertSEH(*MIB, *TII, FrameFlag);
154381ad6265SDimitry Andric   }
154481ad6265SDimitry Andric 
154581ad6265SDimitry Andric   if (EmitCFI) {
154681ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(
154781ad6265SDimitry Andric         MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset - CSStackSizeInc));
154881ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
154981ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
155081ad6265SDimitry Andric         .setMIFlags(FrameFlag);
15510b57cec5SDimitry Andric   }
15520b57cec5SDimitry Andric 
15530b57cec5SDimitry Andric   return std::prev(MBB.erase(MBBI));
15540b57cec5SDimitry Andric }
15550b57cec5SDimitry Andric 
15560b57cec5SDimitry Andric // Fixup callee-save register save/restore instructions to take into account
15570b57cec5SDimitry Andric // combined SP bump by adding the local stack size to the stack offsets.
15580b57cec5SDimitry Andric static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
1559480093f4SDimitry Andric                                               uint64_t LocalStackSize,
15600b57cec5SDimitry Andric                                               bool NeedsWinCFI,
15610b57cec5SDimitry Andric                                               bool *HasWinCFI) {
15620b57cec5SDimitry Andric   if (AArch64InstrInfo::isSEHInstruction(MI))
15630b57cec5SDimitry Andric     return;
15640b57cec5SDimitry Andric 
15650b57cec5SDimitry Andric   unsigned Opc = MI.getOpcode();
15660b57cec5SDimitry Andric   unsigned Scale;
15670b57cec5SDimitry Andric   switch (Opc) {
15680b57cec5SDimitry Andric   case AArch64::STPXi:
15690b57cec5SDimitry Andric   case AArch64::STRXui:
15700b57cec5SDimitry Andric   case AArch64::STPDi:
15710b57cec5SDimitry Andric   case AArch64::STRDui:
15720b57cec5SDimitry Andric   case AArch64::LDPXi:
15730b57cec5SDimitry Andric   case AArch64::LDRXui:
15740b57cec5SDimitry Andric   case AArch64::LDPDi:
15750b57cec5SDimitry Andric   case AArch64::LDRDui:
15760b57cec5SDimitry Andric     Scale = 8;
15770b57cec5SDimitry Andric     break;
15780b57cec5SDimitry Andric   case AArch64::STPQi:
15790b57cec5SDimitry Andric   case AArch64::STRQui:
15800b57cec5SDimitry Andric   case AArch64::LDPQi:
15810b57cec5SDimitry Andric   case AArch64::LDRQui:
15820b57cec5SDimitry Andric     Scale = 16;
15830b57cec5SDimitry Andric     break;
15840b57cec5SDimitry Andric   default:
15850b57cec5SDimitry Andric     llvm_unreachable("Unexpected callee-save save/restore opcode!");
15860b57cec5SDimitry Andric   }
15870b57cec5SDimitry Andric 
15880b57cec5SDimitry Andric   unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
15890b57cec5SDimitry Andric   assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
15900b57cec5SDimitry Andric          "Unexpected base register in callee-save save/restore instruction!");
15910b57cec5SDimitry Andric   // Last operand is immediate offset that needs fixing.
15920b57cec5SDimitry Andric   MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
15930b57cec5SDimitry Andric   // All generated opcodes have scaled offsets.
15940b57cec5SDimitry Andric   assert(LocalStackSize % Scale == 0);
15950b57cec5SDimitry Andric   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
15960b57cec5SDimitry Andric 
15970b57cec5SDimitry Andric   if (NeedsWinCFI) {
15980b57cec5SDimitry Andric     *HasWinCFI = true;
15990b57cec5SDimitry Andric     auto MBBI = std::next(MachineBasicBlock::iterator(MI));
16000b57cec5SDimitry Andric     assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
16010b57cec5SDimitry Andric     assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
16020b57cec5SDimitry Andric            "Expecting a SEH instruction");
16030b57cec5SDimitry Andric     fixupSEHOpcode(MBBI, LocalStackSize);
16040b57cec5SDimitry Andric   }
16050b57cec5SDimitry Andric }
16060b57cec5SDimitry Andric 
1607480093f4SDimitry Andric static bool isTargetWindows(const MachineFunction &MF) {
1608480093f4SDimitry Andric   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
1609480093f4SDimitry Andric }
1610480093f4SDimitry Andric 
1611480093f4SDimitry Andric // Convenience function to determine whether I is an SVE callee save.
1612480093f4SDimitry Andric static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
1613480093f4SDimitry Andric   switch (I->getOpcode()) {
1614480093f4SDimitry Andric   default:
1615480093f4SDimitry Andric     return false;
16160fca6ea1SDimitry Andric   case AArch64::PTRUE_C_B:
16170fca6ea1SDimitry Andric   case AArch64::LD1B_2Z_IMM:
16180fca6ea1SDimitry Andric   case AArch64::ST1B_2Z_IMM:
1619480093f4SDimitry Andric   case AArch64::STR_ZXI:
1620480093f4SDimitry Andric   case AArch64::STR_PXI:
1621480093f4SDimitry Andric   case AArch64::LDR_ZXI:
1622480093f4SDimitry Andric   case AArch64::LDR_PXI:
1623480093f4SDimitry Andric     return I->getFlag(MachineInstr::FrameSetup) ||
1624480093f4SDimitry Andric            I->getFlag(MachineInstr::FrameDestroy);
1625480093f4SDimitry Andric   }
1626480093f4SDimitry Andric }
1627480093f4SDimitry Andric 
162881ad6265SDimitry Andric static void emitShadowCallStackPrologue(const TargetInstrInfo &TII,
162981ad6265SDimitry Andric                                         MachineFunction &MF,
163081ad6265SDimitry Andric                                         MachineBasicBlock &MBB,
163181ad6265SDimitry Andric                                         MachineBasicBlock::iterator MBBI,
163281ad6265SDimitry Andric                                         const DebugLoc &DL, bool NeedsWinCFI,
163381ad6265SDimitry Andric                                         bool NeedsUnwindInfo) {
163481ad6265SDimitry Andric   // Shadow call stack prolog: str x30, [x18], #8
163581ad6265SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost))
163681ad6265SDimitry Andric       .addReg(AArch64::X18, RegState::Define)
163781ad6265SDimitry Andric       .addReg(AArch64::LR)
163881ad6265SDimitry Andric       .addReg(AArch64::X18)
163981ad6265SDimitry Andric       .addImm(8)
164081ad6265SDimitry Andric       .setMIFlag(MachineInstr::FrameSetup);
164181ad6265SDimitry Andric 
164281ad6265SDimitry Andric   // This instruction also makes x18 live-in to the entry block.
164381ad6265SDimitry Andric   MBB.addLiveIn(AArch64::X18);
164481ad6265SDimitry Andric 
164581ad6265SDimitry Andric   if (NeedsWinCFI)
164681ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop))
164781ad6265SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
164881ad6265SDimitry Andric 
164981ad6265SDimitry Andric   if (NeedsUnwindInfo) {
165081ad6265SDimitry Andric     // Emit a CFI instruction that causes 8 to be subtracted from the value of
165181ad6265SDimitry Andric     // x18 when unwinding past this frame.
165281ad6265SDimitry Andric     static const char CFIInst[] = {
165381ad6265SDimitry Andric         dwarf::DW_CFA_val_expression,
165481ad6265SDimitry Andric         18, // register
165581ad6265SDimitry Andric         2,  // length
165681ad6265SDimitry Andric         static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
165781ad6265SDimitry Andric         static_cast<char>(-8) & 0x7f, // addend (sleb128)
165881ad6265SDimitry Andric     };
165981ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
166081ad6265SDimitry Andric         nullptr, StringRef(CFIInst, sizeof(CFIInst))));
166181ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(AArch64::CFI_INSTRUCTION))
166281ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
166381ad6265SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
166481ad6265SDimitry Andric   }
166581ad6265SDimitry Andric }
166681ad6265SDimitry Andric 
166781ad6265SDimitry Andric static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
166881ad6265SDimitry Andric                                         MachineFunction &MF,
166981ad6265SDimitry Andric                                         MachineBasicBlock &MBB,
167081ad6265SDimitry Andric                                         MachineBasicBlock::iterator MBBI,
167181ad6265SDimitry Andric                                         const DebugLoc &DL) {
167281ad6265SDimitry Andric   // Shadow call stack epilog: ldr x30, [x18, #-8]!
167381ad6265SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::LDRXpre))
167481ad6265SDimitry Andric       .addReg(AArch64::X18, RegState::Define)
167581ad6265SDimitry Andric       .addReg(AArch64::LR, RegState::Define)
167681ad6265SDimitry Andric       .addReg(AArch64::X18)
167781ad6265SDimitry Andric       .addImm(-8)
167881ad6265SDimitry Andric       .setMIFlag(MachineInstr::FrameDestroy);
167981ad6265SDimitry Andric 
1680bdd1243dSDimitry Andric   if (MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF)) {
168181ad6265SDimitry Andric     unsigned CFIIndex =
168281ad6265SDimitry Andric         MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, 18));
168381ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
168481ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
168581ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameDestroy);
168681ad6265SDimitry Andric   }
168781ad6265SDimitry Andric }
168881ad6265SDimitry Andric 
168906c3fb27SDimitry Andric // Define the current CFA rule to use the provided FP.
169006c3fb27SDimitry Andric static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
169106c3fb27SDimitry Andric                                 MachineBasicBlock::iterator MBBI,
169206c3fb27SDimitry Andric                                 const DebugLoc &DL, unsigned FixedObject) {
169306c3fb27SDimitry Andric   const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
169406c3fb27SDimitry Andric   const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
169506c3fb27SDimitry Andric   const TargetInstrInfo *TII = STI.getInstrInfo();
169606c3fb27SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
169706c3fb27SDimitry Andric 
169806c3fb27SDimitry Andric   const int OffsetToFirstCalleeSaveFromFP =
169906c3fb27SDimitry Andric       AFI->getCalleeSaveBaseToFrameRecordOffset() -
170006c3fb27SDimitry Andric       AFI->getCalleeSavedStackSize();
170106c3fb27SDimitry Andric   Register FramePtr = TRI->getFrameRegister(MF);
170206c3fb27SDimitry Andric   unsigned Reg = TRI->getDwarfRegNum(FramePtr, true);
170306c3fb27SDimitry Andric   unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
170406c3fb27SDimitry Andric       nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
170506c3fb27SDimitry Andric   BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
170606c3fb27SDimitry Andric       .addCFIIndex(CFIIndex)
170706c3fb27SDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
170806c3fb27SDimitry Andric }
170906c3fb27SDimitry Andric 
17105f757f3fSDimitry Andric #ifndef NDEBUG
17115f757f3fSDimitry Andric /// Collect live registers from the end of \p MI's parent up to (including) \p
17125f757f3fSDimitry Andric /// MI in \p LiveRegs.
17135f757f3fSDimitry Andric static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
17145f757f3fSDimitry Andric                                 LivePhysRegs &LiveRegs) {
17155f757f3fSDimitry Andric 
17165f757f3fSDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
17175f757f3fSDimitry Andric   LiveRegs.addLiveOuts(MBB);
17185f757f3fSDimitry Andric   for (const MachineInstr &MI :
17195f757f3fSDimitry Andric        reverse(make_range(MI.getIterator(), MBB.instr_end())))
17205f757f3fSDimitry Andric     LiveRegs.stepBackward(MI);
17215f757f3fSDimitry Andric }
17225f757f3fSDimitry Andric #endif
17235f757f3fSDimitry Andric 
17240b57cec5SDimitry Andric void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
17250b57cec5SDimitry Andric                                         MachineBasicBlock &MBB) const {
17260b57cec5SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.begin();
17270b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
17280b57cec5SDimitry Andric   const Function &F = MF.getFunction();
17290b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
17300b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
17310b57cec5SDimitry Andric   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
17325f757f3fSDimitry Andric 
17330b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1734bdd1243dSDimitry Andric   bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
173506c3fb27SDimitry Andric   bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
17360b57cec5SDimitry Andric   bool HasFP = hasFP(MF);
17370b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
17380b57cec5SDimitry Andric   bool HasWinCFI = false;
17390b57cec5SDimitry Andric   auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
17400b57cec5SDimitry Andric 
17415f757f3fSDimitry Andric   MachineBasicBlock::iterator End = MBB.end();
17425f757f3fSDimitry Andric #ifndef NDEBUG
17435f757f3fSDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
17445f757f3fSDimitry Andric   // Collect live register from the end of MBB up to the start of the existing
17455f757f3fSDimitry Andric   // frame setup instructions.
17465f757f3fSDimitry Andric   MachineBasicBlock::iterator NonFrameStart = MBB.begin();
17475f757f3fSDimitry Andric   while (NonFrameStart != End &&
17485f757f3fSDimitry Andric          NonFrameStart->getFlag(MachineInstr::FrameSetup))
17495f757f3fSDimitry Andric     ++NonFrameStart;
17505f757f3fSDimitry Andric 
17515f757f3fSDimitry Andric   LivePhysRegs LiveRegs(*TRI);
17525f757f3fSDimitry Andric   if (NonFrameStart != MBB.end()) {
17535f757f3fSDimitry Andric     getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
17545f757f3fSDimitry Andric     // Ignore registers used for stack management for now.
17555f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::SP);
17565f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::X19);
17575f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::FP);
17585f757f3fSDimitry Andric     LiveRegs.removeReg(AArch64::LR);
17590fca6ea1SDimitry Andric 
17600fca6ea1SDimitry Andric     // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
17610fca6ea1SDimitry Andric     // This is necessary to spill VG if required where SVE is unavailable, but
17620fca6ea1SDimitry Andric     // X0 is preserved around this call.
17630fca6ea1SDimitry Andric     if (requiresGetVGCall(MF))
17640fca6ea1SDimitry Andric       LiveRegs.removeReg(AArch64::X0);
17655f757f3fSDimitry Andric   }
17665f757f3fSDimitry Andric 
17675f757f3fSDimitry Andric   auto VerifyClobberOnExit = make_scope_exit([&]() {
17685f757f3fSDimitry Andric     if (NonFrameStart == MBB.end())
17695f757f3fSDimitry Andric       return;
17705f757f3fSDimitry Andric     // Check if any of the newly instructions clobber any of the live registers.
17715f757f3fSDimitry Andric     for (MachineInstr &MI :
17725f757f3fSDimitry Andric          make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
17735f757f3fSDimitry Andric       for (auto &Op : MI.operands())
17745f757f3fSDimitry Andric         if (Op.isReg() && Op.isDef())
17755f757f3fSDimitry Andric           assert(!LiveRegs.contains(Op.getReg()) &&
17765f757f3fSDimitry Andric                  "live register clobbered by inserted prologue instructions");
17775f757f3fSDimitry Andric     }
17785f757f3fSDimitry Andric   });
17795f757f3fSDimitry Andric #endif
17805f757f3fSDimitry Andric 
17810b57cec5SDimitry Andric   bool IsFunclet = MBB.isEHFuncletEntry();
17820b57cec5SDimitry Andric 
17830b57cec5SDimitry Andric   // At this point, we're going to decide whether or not the function uses a
17840b57cec5SDimitry Andric   // redzone. In most cases, the function doesn't have a redzone so let's
17850b57cec5SDimitry Andric   // assume that's false and set it to true in the case that there's a redzone.
17860b57cec5SDimitry Andric   AFI->setHasRedZone(false);
17870b57cec5SDimitry Andric 
17880b57cec5SDimitry Andric   // Debug location must be unknown since the first debug location is used
17890b57cec5SDimitry Andric   // to determine the end of the prologue.
17900b57cec5SDimitry Andric   DebugLoc DL;
17910b57cec5SDimitry Andric 
1792e8d8bef9SDimitry Andric   const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
17935f757f3fSDimitry Andric   if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
179481ad6265SDimitry Andric     emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI,
1795bdd1243dSDimitry Andric                                 MFnI.needsDwarfUnwindInfo(MF));
1796fe6060f1SDimitry Andric 
1797bdd1243dSDimitry Andric   if (MFnI.shouldSignReturnAddress(MF)) {
17985f757f3fSDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
17990b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
18005f757f3fSDimitry Andric     if (NeedsWinCFI)
18015f757f3fSDimitry Andric       HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
18020b57cec5SDimitry Andric   }
18030b57cec5SDimitry Andric 
180481ad6265SDimitry Andric   if (EmitCFI && MFnI.isMTETagged()) {
180581ad6265SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED))
180681ad6265SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
180781ad6265SDimitry Andric   }
18080b57cec5SDimitry Andric 
1809fe6060f1SDimitry Andric   // We signal the presence of a Swift extended frame to external tools by
1810fe6060f1SDimitry Andric   // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
1811fe6060f1SDimitry Andric   // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
1812fe6060f1SDimitry Andric   // bits so that is still true.
1813fe6060f1SDimitry Andric   if (HasFP && AFI->hasSwiftAsyncContext()) {
1814349cc55cSDimitry Andric     switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1815349cc55cSDimitry Andric     case SwiftAsyncFramePointerMode::DeploymentBased:
1816349cc55cSDimitry Andric       if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
1817349cc55cSDimitry Andric         // The special symbol below is absolute and has a *value* that can be
1818349cc55cSDimitry Andric         // combined with the frame pointer to signal an extended frame.
1819349cc55cSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
1820349cc55cSDimitry Andric             .addExternalSymbol("swift_async_extendedFramePointerFlags",
1821349cc55cSDimitry Andric                                AArch64II::MO_GOT);
18225f757f3fSDimitry Andric         if (NeedsWinCFI) {
18235f757f3fSDimitry Andric           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
18245f757f3fSDimitry Andric               .setMIFlags(MachineInstr::FrameSetup);
18255f757f3fSDimitry Andric           HasWinCFI = true;
18265f757f3fSDimitry Andric         }
1827349cc55cSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
1828349cc55cSDimitry Andric             .addUse(AArch64::FP)
1829349cc55cSDimitry Andric             .addUse(AArch64::X16)
1830349cc55cSDimitry Andric             .addImm(Subtarget.isTargetILP32() ? 32 : 0);
18315f757f3fSDimitry Andric         if (NeedsWinCFI) {
18325f757f3fSDimitry Andric           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
18335f757f3fSDimitry Andric               .setMIFlags(MachineInstr::FrameSetup);
18345f757f3fSDimitry Andric           HasWinCFI = true;
18355f757f3fSDimitry Andric         }
1836349cc55cSDimitry Andric         break;
1837349cc55cSDimitry Andric       }
1838bdd1243dSDimitry Andric       [[fallthrough]];
1839349cc55cSDimitry Andric 
1840349cc55cSDimitry Andric     case SwiftAsyncFramePointerMode::Always:
1841fe6060f1SDimitry Andric       // ORR x29, x29, #0x1000_0000_0000_0000
1842fe6060f1SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
1843fe6060f1SDimitry Andric           .addUse(AArch64::FP)
1844fe6060f1SDimitry Andric           .addImm(0x1100)
1845fe6060f1SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
18465f757f3fSDimitry Andric       if (NeedsWinCFI) {
18475f757f3fSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
18485f757f3fSDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
18495f757f3fSDimitry Andric         HasWinCFI = true;
18505f757f3fSDimitry Andric       }
1851349cc55cSDimitry Andric       break;
1852349cc55cSDimitry Andric 
1853349cc55cSDimitry Andric     case SwiftAsyncFramePointerMode::Never:
1854349cc55cSDimitry Andric       break;
1855349cc55cSDimitry Andric     }
1856fe6060f1SDimitry Andric   }
1857fe6060f1SDimitry Andric 
18580b57cec5SDimitry Andric   // All calls are tail calls in GHC calling conv, and functions have no
18590b57cec5SDimitry Andric   // prologue/epilogue.
18600b57cec5SDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
18610b57cec5SDimitry Andric     return;
18620b57cec5SDimitry Andric 
1863e8d8bef9SDimitry Andric   // Set tagged base pointer to the requested stack slot.
18640b57cec5SDimitry Andric   // Ideally it should match SP value after prologue.
1865bdd1243dSDimitry Andric   std::optional<int> TBPI = AFI->getTaggedBasePointerIndex();
1866e8d8bef9SDimitry Andric   if (TBPI)
1867e8d8bef9SDimitry Andric     AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
1868e8d8bef9SDimitry Andric   else
18690b57cec5SDimitry Andric     AFI->setTaggedBasePointerOffset(MFI.getStackSize());
18700b57cec5SDimitry Andric 
18718bcb0991SDimitry Andric   const StackOffset &SVEStackSize = getSVEStackSize(MF);
18728bcb0991SDimitry Andric 
18730b57cec5SDimitry Andric   // getStackSize() includes all the locals in its size calculation. We don't
18740b57cec5SDimitry Andric   // include these locals when computing the stack size of a funclet, as they
18750b57cec5SDimitry Andric   // are allocated in the parent's stack frame and accessed via the frame
18760b57cec5SDimitry Andric   // pointer from the funclet.  We only save the callee saved registers in the
18770b57cec5SDimitry Andric   // funclet, which are really the callee saved registers of the parent
18780b57cec5SDimitry Andric   // function, including the funclet.
18790fca6ea1SDimitry Andric   int64_t NumBytes =
18800fca6ea1SDimitry Andric       IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
18810b57cec5SDimitry Andric   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
18820b57cec5SDimitry Andric     assert(!HasFP && "unexpected function without stack frame but with FP");
18838bcb0991SDimitry Andric     assert(!SVEStackSize &&
18848bcb0991SDimitry Andric            "unexpected function without stack frame but with SVE objects");
18850b57cec5SDimitry Andric     // All of the stack allocation is for locals.
18860b57cec5SDimitry Andric     AFI->setLocalStackSize(NumBytes);
18870b57cec5SDimitry Andric     if (!NumBytes)
18880b57cec5SDimitry Andric       return;
18890b57cec5SDimitry Andric     // REDZONE: If the stack size is less than 128 bytes, we don't need
18900b57cec5SDimitry Andric     // to actually allocate.
18910b57cec5SDimitry Andric     if (canUseRedZone(MF)) {
18920b57cec5SDimitry Andric       AFI->setHasRedZone(true);
18930b57cec5SDimitry Andric       ++NumRedZoneFunctions;
18940b57cec5SDimitry Andric     } else {
18958bcb0991SDimitry Andric       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
1896e8d8bef9SDimitry Andric                       StackOffset::getFixed(-NumBytes), TII,
1897e8d8bef9SDimitry Andric                       MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
189881ad6265SDimitry Andric       if (EmitCFI) {
18990b57cec5SDimitry Andric         // Label used to tie together the PROLOG_LABEL and the MachineMoves.
19000fca6ea1SDimitry Andric         MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
19010b57cec5SDimitry Andric         // Encode the stack size of the leaf function.
19020b57cec5SDimitry Andric         unsigned CFIIndex = MF.addFrameInst(
19035ffd83dbSDimitry Andric             MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
19040b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
19050b57cec5SDimitry Andric             .addCFIIndex(CFIIndex)
19060b57cec5SDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
19070b57cec5SDimitry Andric       }
19080b57cec5SDimitry Andric     }
19090b57cec5SDimitry Andric 
19100b57cec5SDimitry Andric     if (NeedsWinCFI) {
19110b57cec5SDimitry Andric       HasWinCFI = true;
19120b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
19130b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
19140b57cec5SDimitry Andric     }
19150b57cec5SDimitry Andric 
19160b57cec5SDimitry Andric     return;
19170b57cec5SDimitry Andric   }
19180b57cec5SDimitry Andric 
19190fca6ea1SDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
192062cfcf62SDimitry Andric   unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
19210b57cec5SDimitry Andric 
19220b57cec5SDimitry Andric   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
19230b57cec5SDimitry Andric   // All of the remaining stack allocations are for locals.
19240b57cec5SDimitry Andric   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
19250b57cec5SDimitry Andric   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1926fe6060f1SDimitry Andric   bool HomPrologEpilog = homogeneousPrologEpilog(MF);
19270b57cec5SDimitry Andric   if (CombineSPBump) {
19288bcb0991SDimitry Andric     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
19298bcb0991SDimitry Andric     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
1930e8d8bef9SDimitry Andric                     StackOffset::getFixed(-NumBytes), TII,
193181ad6265SDimitry Andric                     MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
193206c3fb27SDimitry Andric                     EmitAsyncCFI);
19330b57cec5SDimitry Andric     NumBytes = 0;
1934fe6060f1SDimitry Andric   } else if (HomPrologEpilog) {
1935fe6060f1SDimitry Andric     // Stack has been already adjusted.
1936fe6060f1SDimitry Andric     NumBytes -= PrologueSaveSize;
19370b57cec5SDimitry Andric   } else if (PrologueSaveSize != 0) {
19380b57cec5SDimitry Andric     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
193981ad6265SDimitry Andric         MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
194006c3fb27SDimitry Andric         EmitAsyncCFI);
19410b57cec5SDimitry Andric     NumBytes -= PrologueSaveSize;
19420b57cec5SDimitry Andric   }
19430b57cec5SDimitry Andric   assert(NumBytes >= 0 && "Negative stack allocation size!?");
19440b57cec5SDimitry Andric 
19450b57cec5SDimitry Andric   // Move past the saves of the callee-saved registers, fixing up the offsets
19460b57cec5SDimitry Andric   // and pre-inc if we decided to combine the callee-save and local stack
19470b57cec5SDimitry Andric   // pointer bump above.
1948480093f4SDimitry Andric   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
1949480093f4SDimitry Andric          !IsSVECalleeSave(MBBI)) {
1950*71ac745dSDimitry Andric     if (CombineSPBump &&
1951*71ac745dSDimitry Andric         // Only fix-up frame-setup load/store instructions.
1952*71ac745dSDimitry Andric         (!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
19530b57cec5SDimitry Andric       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
19540b57cec5SDimitry Andric                                         NeedsWinCFI, &HasWinCFI);
19550b57cec5SDimitry Andric     ++MBBI;
19560b57cec5SDimitry Andric   }
19570b57cec5SDimitry Andric 
195862cfcf62SDimitry Andric   // For funclets the FP belongs to the containing function.
195962cfcf62SDimitry Andric   if (!IsFunclet && HasFP) {
19608bcb0991SDimitry Andric     // Only set up FP if we actually need to.
1961e8d8bef9SDimitry Andric     int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
19628bcb0991SDimitry Andric 
19630b57cec5SDimitry Andric     if (CombineSPBump)
19640b57cec5SDimitry Andric       FPOffset += AFI->getLocalStackSize();
19650b57cec5SDimitry Andric 
1966fe6060f1SDimitry Andric     if (AFI->hasSwiftAsyncContext()) {
1967fe6060f1SDimitry Andric       // Before we update the live FP we have to ensure there's a valid (or
1968fe6060f1SDimitry Andric       // null) asynchronous context in its slot just before FP in the frame
1969fe6060f1SDimitry Andric       // record, so store it now.
1970fe6060f1SDimitry Andric       const auto &Attrs = MF.getFunction().getAttributes();
1971fe6060f1SDimitry Andric       bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1972fe6060f1SDimitry Andric       if (HaveInitialContext)
1973fe6060f1SDimitry Andric         MBB.addLiveIn(AArch64::X22);
19745f757f3fSDimitry Andric       Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1975fe6060f1SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
19765f757f3fSDimitry Andric           .addUse(Reg)
1977fe6060f1SDimitry Andric           .addUse(AArch64::SP)
1978fe6060f1SDimitry Andric           .addImm(FPOffset - 8)
1979fe6060f1SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
19805f757f3fSDimitry Andric       if (NeedsWinCFI) {
19815f757f3fSDimitry Andric         // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
19825f757f3fSDimitry Andric         // to multiple instructions, should be mutually-exclusive.
19835f757f3fSDimitry Andric         assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
19845f757f3fSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
19855f757f3fSDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
19865f757f3fSDimitry Andric         HasWinCFI = true;
19875f757f3fSDimitry Andric       }
1988fe6060f1SDimitry Andric     }
1989fe6060f1SDimitry Andric 
1990fe6060f1SDimitry Andric     if (HomPrologEpilog) {
1991fe6060f1SDimitry Andric       auto Prolog = MBBI;
1992fe6060f1SDimitry Andric       --Prolog;
1993fe6060f1SDimitry Andric       assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1994fe6060f1SDimitry Andric       Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1995fe6060f1SDimitry Andric     } else {
19960b57cec5SDimitry Andric       // Issue    sub fp, sp, FPOffset or
19970b57cec5SDimitry Andric       //          mov fp,sp          when FPOffset is zero.
19980b57cec5SDimitry Andric       // Note: All stores of callee-saved registers are marked as "FrameSetup".
19990b57cec5SDimitry Andric       // This code marks the instruction(s) that set the FP also.
20008bcb0991SDimitry Andric       emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
2001e8d8bef9SDimitry Andric                       StackOffset::getFixed(FPOffset), TII,
2002e8d8bef9SDimitry Andric                       MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
2003bdd1243dSDimitry Andric       if (NeedsWinCFI && HasWinCFI) {
2004bdd1243dSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
2005bdd1243dSDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
2006bdd1243dSDimitry Andric         // After setting up the FP, the rest of the prolog doesn't need to be
2007bdd1243dSDimitry Andric         // included in the SEH unwind info.
2008bdd1243dSDimitry Andric         NeedsWinCFI = false;
2009bdd1243dSDimitry Andric       }
20100b57cec5SDimitry Andric     }
201106c3fb27SDimitry Andric     if (EmitAsyncCFI)
201206c3fb27SDimitry Andric       emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
201381ad6265SDimitry Andric   }
201481ad6265SDimitry Andric 
201581ad6265SDimitry Andric   // Now emit the moves for whatever callee saved regs we have (including FP,
201681ad6265SDimitry Andric   // LR if those are saved). Frame instructions for SVE register are emitted
201781ad6265SDimitry Andric   // later, after the instruction which actually save SVE regs.
201806c3fb27SDimitry Andric   if (EmitAsyncCFI)
201981ad6265SDimitry Andric     emitCalleeSavedGPRLocations(MBB, MBBI);
20200b57cec5SDimitry Andric 
2021bdd1243dSDimitry Andric   // Alignment is required for the parent frame, not the funclet
2022bdd1243dSDimitry Andric   const bool NeedsRealignment =
2023bdd1243dSDimitry Andric       NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
20245f757f3fSDimitry Andric   const int64_t RealignmentPadding =
2025bdd1243dSDimitry Andric       (NeedsRealignment && MFI.getMaxAlign() > Align(16))
2026bdd1243dSDimitry Andric           ? MFI.getMaxAlign().value() - 16
2027bdd1243dSDimitry Andric           : 0;
2028bdd1243dSDimitry Andric 
2029bdd1243dSDimitry Andric   if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
2030bdd1243dSDimitry Andric     uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
20310b57cec5SDimitry Andric     if (NeedsWinCFI) {
20320b57cec5SDimitry Andric       HasWinCFI = true;
20330b57cec5SDimitry Andric       // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
20340b57cec5SDimitry Andric       // exceed this amount.  We need to move at most 2^24 - 1 into x15.
20350b57cec5SDimitry Andric       // This is at most two instructions, MOVZ follwed by MOVK.
20360b57cec5SDimitry Andric       // TODO: Fix to use multiple stack alloc unwind codes for stacks
20370b57cec5SDimitry Andric       // exceeding 256MB in size.
20380b57cec5SDimitry Andric       if (NumBytes >= (1 << 28))
20390b57cec5SDimitry Andric         report_fatal_error("Stack size cannot exceed 256MB for stack "
20400b57cec5SDimitry Andric                            "unwinding purposes");
20410b57cec5SDimitry Andric 
20420b57cec5SDimitry Andric       uint32_t LowNumWords = NumWords & 0xFFFF;
20430b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
20440b57cec5SDimitry Andric           .addImm(LowNumWords)
20450b57cec5SDimitry Andric           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
20460b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
20470b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20480b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
20490b57cec5SDimitry Andric       if ((NumWords & 0xFFFF0000) != 0) {
20500b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
20510b57cec5SDimitry Andric             .addReg(AArch64::X15)
20520b57cec5SDimitry Andric             .addImm((NumWords & 0xFFFF0000) >> 16) // High half
20530b57cec5SDimitry Andric             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
20540b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20550b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20560b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20570b57cec5SDimitry Andric       }
20580b57cec5SDimitry Andric     } else {
20590b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
20600b57cec5SDimitry Andric           .addImm(NumWords)
20610b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
20620b57cec5SDimitry Andric     }
20630b57cec5SDimitry Andric 
2064bdd1243dSDimitry Andric     const char *ChkStk = Subtarget.getChkStkName();
20650b57cec5SDimitry Andric     switch (MF.getTarget().getCodeModel()) {
20660b57cec5SDimitry Andric     case CodeModel::Tiny:
20670b57cec5SDimitry Andric     case CodeModel::Small:
20680b57cec5SDimitry Andric     case CodeModel::Medium:
20690b57cec5SDimitry Andric     case CodeModel::Kernel:
20700b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
2071bdd1243dSDimitry Andric           .addExternalSymbol(ChkStk)
20720b57cec5SDimitry Andric           .addReg(AArch64::X15, RegState::Implicit)
20730b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
20740b57cec5SDimitry Andric           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
20750b57cec5SDimitry Andric           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
20760b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
20770b57cec5SDimitry Andric       if (NeedsWinCFI) {
20780b57cec5SDimitry Andric         HasWinCFI = true;
20790b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20800b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20810b57cec5SDimitry Andric       }
20820b57cec5SDimitry Andric       break;
20830b57cec5SDimitry Andric     case CodeModel::Large:
20840b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
20850b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Define)
2086bdd1243dSDimitry Andric           .addExternalSymbol(ChkStk)
2087bdd1243dSDimitry Andric           .addExternalSymbol(ChkStk)
20880b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
20890b57cec5SDimitry Andric       if (NeedsWinCFI) {
20900b57cec5SDimitry Andric         HasWinCFI = true;
20910b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
20920b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
20930b57cec5SDimitry Andric       }
20940b57cec5SDimitry Andric 
20955ffd83dbSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
20960b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Kill)
20970b57cec5SDimitry Andric           .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
20980b57cec5SDimitry Andric           .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
20990b57cec5SDimitry Andric           .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
21000b57cec5SDimitry Andric           .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
21010b57cec5SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
21020b57cec5SDimitry Andric       if (NeedsWinCFI) {
21030b57cec5SDimitry Andric         HasWinCFI = true;
21040b57cec5SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
21050b57cec5SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
21060b57cec5SDimitry Andric       }
21070b57cec5SDimitry Andric       break;
21080b57cec5SDimitry Andric     }
21090b57cec5SDimitry Andric 
21100b57cec5SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
21110b57cec5SDimitry Andric         .addReg(AArch64::SP, RegState::Kill)
21120b57cec5SDimitry Andric         .addReg(AArch64::X15, RegState::Kill)
21130b57cec5SDimitry Andric         .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
21140b57cec5SDimitry Andric         .setMIFlags(MachineInstr::FrameSetup);
21150b57cec5SDimitry Andric     if (NeedsWinCFI) {
21160b57cec5SDimitry Andric       HasWinCFI = true;
21170b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
21180b57cec5SDimitry Andric           .addImm(NumBytes)
21190b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
21200b57cec5SDimitry Andric     }
21210b57cec5SDimitry Andric     NumBytes = 0;
2122bdd1243dSDimitry Andric 
2123bdd1243dSDimitry Andric     if (RealignmentPadding > 0) {
212406c3fb27SDimitry Andric       if (RealignmentPadding >= 4096) {
212506c3fb27SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
212606c3fb27SDimitry Andric             .addReg(AArch64::X16, RegState::Define)
212706c3fb27SDimitry Andric             .addImm(RealignmentPadding)
212806c3fb27SDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
212906c3fb27SDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
213006c3fb27SDimitry Andric             .addReg(AArch64::SP)
213106c3fb27SDimitry Andric             .addReg(AArch64::X16, RegState::Kill)
213206c3fb27SDimitry Andric             .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
213306c3fb27SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
213406c3fb27SDimitry Andric       } else {
2135bdd1243dSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
2136bdd1243dSDimitry Andric             .addReg(AArch64::SP)
2137bdd1243dSDimitry Andric             .addImm(RealignmentPadding)
213806c3fb27SDimitry Andric             .addImm(0)
213906c3fb27SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
214006c3fb27SDimitry Andric       }
2141bdd1243dSDimitry Andric 
2142bdd1243dSDimitry Andric       uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
2143bdd1243dSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
2144bdd1243dSDimitry Andric           .addReg(AArch64::X15, RegState::Kill)
2145bdd1243dSDimitry Andric           .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
2146bdd1243dSDimitry Andric       AFI->setStackRealigned(true);
2147bdd1243dSDimitry Andric 
2148bdd1243dSDimitry Andric       // No need for SEH instructions here; if we're realigning the stack,
2149bdd1243dSDimitry Andric       // we've set a frame pointer and already finished the SEH prologue.
2150bdd1243dSDimitry Andric       assert(!NeedsWinCFI);
2151bdd1243dSDimitry Andric     }
21520b57cec5SDimitry Andric   }
21530b57cec5SDimitry Andric 
21545f757f3fSDimitry Andric   StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
2155480093f4SDimitry Andric   MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
2156480093f4SDimitry Andric 
2157480093f4SDimitry Andric   // Process the SVE callee-saves to determine what space needs to be
2158480093f4SDimitry Andric   // allocated.
2159979e22ffSDimitry Andric   if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
21605f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
21615f757f3fSDimitry Andric                       << "\n");
2162480093f4SDimitry Andric     // Find callee save instructions in frame.
2163480093f4SDimitry Andric     CalleeSavesBegin = MBBI;
2164480093f4SDimitry Andric     assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
2165480093f4SDimitry Andric     while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
2166480093f4SDimitry Andric       ++MBBI;
2167480093f4SDimitry Andric     CalleeSavesEnd = MBBI;
2168480093f4SDimitry Andric 
21695f757f3fSDimitry Andric     SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
21705f757f3fSDimitry Andric     SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
2171480093f4SDimitry Andric   }
2172480093f4SDimitry Andric 
2173480093f4SDimitry Andric   // Allocate space for the callee saves (if any).
21745f757f3fSDimitry Andric   StackOffset CFAOffset =
21755f757f3fSDimitry Andric       StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
21765f757f3fSDimitry Andric   StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
21775f757f3fSDimitry Andric   allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
21785f757f3fSDimitry Andric                      nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
21795f757f3fSDimitry Andric                      MFI.hasVarSizedObjects() || LocalsSize);
21805f757f3fSDimitry Andric   CFAOffset += SVECalleeSavesSize;
218181ad6265SDimitry Andric 
218206c3fb27SDimitry Andric   if (EmitAsyncCFI)
218381ad6265SDimitry Andric     emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
2184480093f4SDimitry Andric 
21855f757f3fSDimitry Andric   // Allocate space for the rest of the frame including SVE locals. Align the
21865f757f3fSDimitry Andric   // stack as necessary.
21875f757f3fSDimitry Andric   assert(!(canUseRedZone(MF) && NeedsRealignment) &&
21885f757f3fSDimitry Andric          "Cannot use redzone with stack realignment");
218981ad6265SDimitry Andric   if (!canUseRedZone(MF)) {
21900b57cec5SDimitry Andric     // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
21910b57cec5SDimitry Andric     // the correct value here, as NumBytes also includes padding bytes,
21920b57cec5SDimitry Andric     // which shouldn't be counted here.
21935f757f3fSDimitry Andric     allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
21945f757f3fSDimitry Andric                        SVELocalsSize + StackOffset::getFixed(NumBytes),
21955f757f3fSDimitry Andric                        NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
21965f757f3fSDimitry Andric                        CFAOffset, MFI.hasVarSizedObjects());
21970b57cec5SDimitry Andric   }
21980b57cec5SDimitry Andric 
21990b57cec5SDimitry Andric   // If we need a base pointer, set it up here. It's whatever the value of the
22000b57cec5SDimitry Andric   // stack pointer is at this point. Any variable size objects will be allocated
22010b57cec5SDimitry Andric   // after this, so we can still use the base pointer to reference locals.
22020b57cec5SDimitry Andric   //
22030b57cec5SDimitry Andric   // FIXME: Clarify FrameSetup flags here.
22040b57cec5SDimitry Andric   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
22050b57cec5SDimitry Andric   // needed.
220662cfcf62SDimitry Andric   // For funclets the BP belongs to the containing function.
220762cfcf62SDimitry Andric   if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
22080b57cec5SDimitry Andric     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
22090b57cec5SDimitry Andric                      false);
22100b57cec5SDimitry Andric     if (NeedsWinCFI) {
22110b57cec5SDimitry Andric       HasWinCFI = true;
22120b57cec5SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
22130b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
22140b57cec5SDimitry Andric     }
22150b57cec5SDimitry Andric   }
22160b57cec5SDimitry Andric 
22170b57cec5SDimitry Andric   // The very last FrameSetup instruction indicates the end of prologue. Emit a
22180b57cec5SDimitry Andric   // SEH opcode indicating the prologue end.
22190b57cec5SDimitry Andric   if (NeedsWinCFI && HasWinCFI) {
22200b57cec5SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
22210b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
22220b57cec5SDimitry Andric   }
22230b57cec5SDimitry Andric 
222462cfcf62SDimitry Andric   // SEH funclets are passed the frame pointer in X1.  If the parent
222562cfcf62SDimitry Andric   // function uses the base register, then the base register is used
222662cfcf62SDimitry Andric   // directly, and is not retrieved from X1.
222762cfcf62SDimitry Andric   if (IsFunclet && F.hasPersonalityFn()) {
222862cfcf62SDimitry Andric     EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
222962cfcf62SDimitry Andric     if (isAsynchronousEHPersonality(Per)) {
223062cfcf62SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
223162cfcf62SDimitry Andric           .addReg(AArch64::X1)
223262cfcf62SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
223362cfcf62SDimitry Andric       MBB.addLiveIn(AArch64::X1);
223462cfcf62SDimitry Andric     }
223562cfcf62SDimitry Andric   }
223606c3fb27SDimitry Andric 
223706c3fb27SDimitry Andric   if (EmitCFI && !EmitAsyncCFI) {
223806c3fb27SDimitry Andric     if (HasFP) {
223906c3fb27SDimitry Andric       emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
224006c3fb27SDimitry Andric     } else {
224106c3fb27SDimitry Andric       StackOffset TotalSize =
224206c3fb27SDimitry Andric           SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
224306c3fb27SDimitry Andric       unsigned CFIIndex = MF.addFrameInst(createDefCFA(
224406c3fb27SDimitry Andric           *RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, TotalSize,
224506c3fb27SDimitry Andric           /*LastAdjustmentWasScalable=*/false));
224606c3fb27SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
224706c3fb27SDimitry Andric           .addCFIIndex(CFIIndex)
224806c3fb27SDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
224906c3fb27SDimitry Andric     }
225006c3fb27SDimitry Andric     emitCalleeSavedGPRLocations(MBB, MBBI);
225106c3fb27SDimitry Andric     emitCalleeSavedSVELocations(MBB, MBBI);
225206c3fb27SDimitry Andric   }
22530b57cec5SDimitry Andric }
22540b57cec5SDimitry Andric 
22550b57cec5SDimitry Andric static bool isFuncletReturnInstr(const MachineInstr &MI) {
22560b57cec5SDimitry Andric   switch (MI.getOpcode()) {
22570b57cec5SDimitry Andric   default:
22580b57cec5SDimitry Andric     return false;
22590b57cec5SDimitry Andric   case AArch64::CATCHRET:
22600b57cec5SDimitry Andric   case AArch64::CLEANUPRET:
22610b57cec5SDimitry Andric     return true;
22620b57cec5SDimitry Andric   }
22630b57cec5SDimitry Andric }
22640b57cec5SDimitry Andric 
22650b57cec5SDimitry Andric void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
22660b57cec5SDimitry Andric                                         MachineBasicBlock &MBB) const {
22670b57cec5SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
22680b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
22695f757f3fSDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
22700b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
22710b57cec5SDimitry Andric   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
22720b57cec5SDimitry Andric   DebugLoc DL;
22730b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
22745f757f3fSDimitry Andric   bool EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
22750b57cec5SDimitry Andric   bool HasWinCFI = false;
22760b57cec5SDimitry Andric   bool IsFunclet = false;
22770b57cec5SDimitry Andric 
22780b57cec5SDimitry Andric   if (MBB.end() != MBBI) {
22790b57cec5SDimitry Andric     DL = MBBI->getDebugLoc();
22800b57cec5SDimitry Andric     IsFunclet = isFuncletReturnInstr(*MBBI);
22810b57cec5SDimitry Andric   }
22820b57cec5SDimitry Andric 
22835f757f3fSDimitry Andric   MachineBasicBlock::iterator EpilogStartI = MBB.end();
22845f757f3fSDimitry Andric 
228581ad6265SDimitry Andric   auto FinishingTouches = make_scope_exit([&]() {
22865f757f3fSDimitry Andric     if (AFI->shouldSignReturnAddress(MF)) {
22875f757f3fSDimitry Andric       BuildMI(MBB, MBB.getFirstTerminator(), DL,
22885f757f3fSDimitry Andric               TII->get(AArch64::PAUTH_EPILOGUE))
22895f757f3fSDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
22905f757f3fSDimitry Andric       if (NeedsWinCFI)
22915f757f3fSDimitry Andric         HasWinCFI = true; // AArch64PointerAuth pass will insert SEH_PACSignLR
22925f757f3fSDimitry Andric     }
22935f757f3fSDimitry Andric     if (AFI->needsShadowCallStackPrologueEpilogue(MF))
229481ad6265SDimitry Andric       emitShadowCallStackEpilogue(*TII, MF, MBB, MBB.getFirstTerminator(), DL);
229581ad6265SDimitry Andric     if (EmitCFI)
229681ad6265SDimitry Andric       emitCalleeSavedGPRRestores(MBB, MBB.getFirstTerminator());
22975f757f3fSDimitry Andric     if (HasWinCFI) {
2298bdd1243dSDimitry Andric       BuildMI(MBB, MBB.getFirstTerminator(), DL,
2299bdd1243dSDimitry Andric               TII->get(AArch64::SEH_EpilogEnd))
2300bdd1243dSDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
23015f757f3fSDimitry Andric       if (!MF.hasWinCFI())
23025f757f3fSDimitry Andric         MF.setHasWinCFI(true);
23035f757f3fSDimitry Andric     }
23045f757f3fSDimitry Andric     if (NeedsWinCFI) {
23055f757f3fSDimitry Andric       assert(EpilogStartI != MBB.end());
23065f757f3fSDimitry Andric       if (!HasWinCFI)
23075f757f3fSDimitry Andric         MBB.erase(EpilogStartI);
23085f757f3fSDimitry Andric     }
230981ad6265SDimitry Andric   });
231081ad6265SDimitry Andric 
2311480093f4SDimitry Andric   int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
23120b57cec5SDimitry Andric                                : MFI.getStackSize();
23130b57cec5SDimitry Andric 
23140b57cec5SDimitry Andric   // All calls are tail calls in GHC calling conv, and functions have no
23150b57cec5SDimitry Andric   // prologue/epilogue.
23160b57cec5SDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
23170b57cec5SDimitry Andric     return;
23180b57cec5SDimitry Andric 
2319fe6060f1SDimitry Andric   // How much of the stack used by incoming arguments this function is expected
2320fe6060f1SDimitry Andric   // to restore in this particular epilogue.
2321fe6060f1SDimitry Andric   int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
23220fca6ea1SDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
23230fca6ea1SDimitry Andric                                               MF.getFunction().isVarArg());
232462cfcf62SDimitry Andric   unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
23250b57cec5SDimitry Andric 
2326fe6060f1SDimitry Andric   int64_t AfterCSRPopSize = ArgumentStackToRestore;
23270b57cec5SDimitry Andric   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
23280b57cec5SDimitry Andric   // We cannot rely on the local stack size set in emitPrologue if the function
23290b57cec5SDimitry Andric   // has funclets, as funclets have different local stack size requirements, and
23300b57cec5SDimitry Andric   // the current value set in emitPrologue may be that of the containing
23310b57cec5SDimitry Andric   // function.
23320b57cec5SDimitry Andric   if (MF.hasEHFunclets())
23330b57cec5SDimitry Andric     AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
2334fe6060f1SDimitry Andric   if (homogeneousPrologEpilog(MF, &MBB)) {
2335fe6060f1SDimitry Andric     assert(!NeedsWinCFI);
2336fe6060f1SDimitry Andric     auto LastPopI = MBB.getFirstTerminator();
2337fe6060f1SDimitry Andric     if (LastPopI != MBB.begin()) {
2338fe6060f1SDimitry Andric       auto HomogeneousEpilog = std::prev(LastPopI);
2339fe6060f1SDimitry Andric       if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
2340fe6060f1SDimitry Andric         LastPopI = HomogeneousEpilog;
2341fe6060f1SDimitry Andric     }
2342fe6060f1SDimitry Andric 
2343fe6060f1SDimitry Andric     // Adjust local stack
2344fe6060f1SDimitry Andric     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
2345fe6060f1SDimitry Andric                     StackOffset::getFixed(AFI->getLocalStackSize()), TII,
23465f757f3fSDimitry Andric                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
2347fe6060f1SDimitry Andric 
2348fe6060f1SDimitry Andric     // SP has been already adjusted while restoring callee save regs.
2349fe6060f1SDimitry Andric     // We've bailed-out the case with adjusting SP for arguments.
2350fe6060f1SDimitry Andric     assert(AfterCSRPopSize == 0);
2351fe6060f1SDimitry Andric     return;
2352fe6060f1SDimitry Andric   }
23535ffd83dbSDimitry Andric   bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
23540b57cec5SDimitry Andric   // Assume we can't combine the last pop with the sp restore.
23550b57cec5SDimitry Andric 
235681ad6265SDimitry Andric   bool CombineAfterCSRBump = false;
23570b57cec5SDimitry Andric   if (!CombineSPBump && PrologueSaveSize != 0) {
23580b57cec5SDimitry Andric     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
235981ad6265SDimitry Andric     while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
236081ad6265SDimitry Andric            AArch64InstrInfo::isSEHInstruction(*Pop))
23610b57cec5SDimitry Andric       Pop = std::prev(Pop);
23620b57cec5SDimitry Andric     // Converting the last ldp to a post-index ldp is valid only if the last
23630b57cec5SDimitry Andric     // ldp's offset is 0.
23640b57cec5SDimitry Andric     const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
2365fe6060f1SDimitry Andric     // If the offset is 0 and the AfterCSR pop is not actually trying to
2366fe6060f1SDimitry Andric     // allocate more stack for arguments (in space that an untimely interrupt
2367fe6060f1SDimitry Andric     // may clobber), convert it to a post-index ldp.
236881ad6265SDimitry Andric     if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
23690b57cec5SDimitry Andric       convertCalleeSaveRestoreToSPPrePostIncDec(
237081ad6265SDimitry Andric           MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
237181ad6265SDimitry Andric           MachineInstr::FrameDestroy, PrologueSaveSize);
237281ad6265SDimitry Andric     } else {
23730b57cec5SDimitry Andric       // If not, make sure to emit an add after the last ldp.
23740b57cec5SDimitry Andric       // We're doing this by transfering the size to be restored from the
23750b57cec5SDimitry Andric       // adjustment *before* the CSR pops to the adjustment *after* the CSR
23760b57cec5SDimitry Andric       // pops.
23770b57cec5SDimitry Andric       AfterCSRPopSize += PrologueSaveSize;
237881ad6265SDimitry Andric       CombineAfterCSRBump = true;
23790b57cec5SDimitry Andric     }
23800b57cec5SDimitry Andric   }
23810b57cec5SDimitry Andric 
23820b57cec5SDimitry Andric   // Move past the restores of the callee-saved registers.
23830b57cec5SDimitry Andric   // If we plan on combining the sp bump of the local stack size and the callee
23840b57cec5SDimitry Andric   // save stack size, we might need to adjust the CSR save and restore offsets.
23850b57cec5SDimitry Andric   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
23860b57cec5SDimitry Andric   MachineBasicBlock::iterator Begin = MBB.begin();
23870b57cec5SDimitry Andric   while (LastPopI != Begin) {
23880b57cec5SDimitry Andric     --LastPopI;
2389480093f4SDimitry Andric     if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
2390480093f4SDimitry Andric         IsSVECalleeSave(LastPopI)) {
23910b57cec5SDimitry Andric       ++LastPopI;
23920b57cec5SDimitry Andric       break;
23930b57cec5SDimitry Andric     } else if (CombineSPBump)
23940b57cec5SDimitry Andric       fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
23950b57cec5SDimitry Andric                                         NeedsWinCFI, &HasWinCFI);
23960b57cec5SDimitry Andric   }
23970b57cec5SDimitry Andric 
23985f757f3fSDimitry Andric   if (NeedsWinCFI) {
23995f757f3fSDimitry Andric     // Note that there are cases where we insert SEH opcodes in the
24005f757f3fSDimitry Andric     // epilogue when we had no SEH opcodes in the prologue. For
24015f757f3fSDimitry Andric     // example, when there is no stack frame but there are stack
24025f757f3fSDimitry Andric     // arguments. Insert the SEH_EpilogStart and remove it later if it
24035f757f3fSDimitry Andric     // we didn't emit any SEH opcodes to avoid generating WinCFI for
24045f757f3fSDimitry Andric     // functions that don't need it.
24050b57cec5SDimitry Andric     BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
24060b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameDestroy);
24075f757f3fSDimitry Andric     EpilogStartI = LastPopI;
24085f757f3fSDimitry Andric     --EpilogStartI;
24090b57cec5SDimitry Andric   }
24100b57cec5SDimitry Andric 
2411fe6060f1SDimitry Andric   if (hasFP(MF) && AFI->hasSwiftAsyncContext()) {
241281ad6265SDimitry Andric     switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
241381ad6265SDimitry Andric     case SwiftAsyncFramePointerMode::DeploymentBased:
241481ad6265SDimitry Andric       // Avoid the reload as it is GOT relative, and instead fall back to the
241581ad6265SDimitry Andric       // hardcoded value below.  This allows a mismatch between the OS and
241681ad6265SDimitry Andric       // application without immediately terminating on the difference.
2417bdd1243dSDimitry Andric       [[fallthrough]];
241881ad6265SDimitry Andric     case SwiftAsyncFramePointerMode::Always:
241981ad6265SDimitry Andric       // We need to reset FP to its untagged state on return. Bit 60 is
242081ad6265SDimitry Andric       // currently used to show the presence of an extended frame.
2421fe6060f1SDimitry Andric 
2422fe6060f1SDimitry Andric       // BIC x29, x29, #0x1000_0000_0000_0000
2423fe6060f1SDimitry Andric       BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
2424fe6060f1SDimitry Andric               AArch64::FP)
2425fe6060f1SDimitry Andric           .addUse(AArch64::FP)
2426fe6060f1SDimitry Andric           .addImm(0x10fe)
2427fe6060f1SDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
24285f757f3fSDimitry Andric       if (NeedsWinCFI) {
24295f757f3fSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
24305f757f3fSDimitry Andric             .setMIFlags(MachineInstr::FrameDestroy);
24315f757f3fSDimitry Andric         HasWinCFI = true;
24325f757f3fSDimitry Andric       }
243381ad6265SDimitry Andric       break;
243481ad6265SDimitry Andric 
243581ad6265SDimitry Andric     case SwiftAsyncFramePointerMode::Never:
243681ad6265SDimitry Andric       break;
243781ad6265SDimitry Andric     }
2438fe6060f1SDimitry Andric   }
2439fe6060f1SDimitry Andric 
24408bcb0991SDimitry Andric   const StackOffset &SVEStackSize = getSVEStackSize(MF);
24418bcb0991SDimitry Andric 
24420b57cec5SDimitry Andric   // If there is a single SP update, insert it before the ret and we're done.
24430b57cec5SDimitry Andric   if (CombineSPBump) {
24448bcb0991SDimitry Andric     assert(!SVEStackSize && "Cannot combine SP bump with SVE");
244581ad6265SDimitry Andric 
244681ad6265SDimitry Andric     // When we are about to restore the CSRs, the CFA register is SP again.
244781ad6265SDimitry Andric     if (EmitCFI && hasFP(MF)) {
244881ad6265SDimitry Andric       const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
244981ad6265SDimitry Andric       unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
245081ad6265SDimitry Andric       unsigned CFIIndex =
245181ad6265SDimitry Andric           MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, NumBytes));
245281ad6265SDimitry Andric       BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
245381ad6265SDimitry Andric           .addCFIIndex(CFIIndex)
245481ad6265SDimitry Andric           .setMIFlags(MachineInstr::FrameDestroy);
245581ad6265SDimitry Andric     }
245681ad6265SDimitry Andric 
24570b57cec5SDimitry Andric     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
2458e8d8bef9SDimitry Andric                     StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
2459e8d8bef9SDimitry Andric                     TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
246081ad6265SDimitry Andric                     &HasWinCFI, EmitCFI, StackOffset::getFixed(NumBytes));
24610b57cec5SDimitry Andric     return;
24620b57cec5SDimitry Andric   }
24630b57cec5SDimitry Andric 
24640b57cec5SDimitry Andric   NumBytes -= PrologueSaveSize;
24650b57cec5SDimitry Andric   assert(NumBytes >= 0 && "Negative stack allocation size!?");
24660b57cec5SDimitry Andric 
2467480093f4SDimitry Andric   // Process the SVE callee-saves to determine what space needs to be
2468480093f4SDimitry Andric   // deallocated.
2469480093f4SDimitry Andric   StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
2470480093f4SDimitry Andric   MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
2471979e22ffSDimitry Andric   if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
247216d6b3b3SDimitry Andric     RestoreBegin = std::prev(RestoreEnd);
247316d6b3b3SDimitry Andric     while (RestoreBegin != MBB.begin() &&
247416d6b3b3SDimitry Andric            IsSVECalleeSave(std::prev(RestoreBegin)))
2475480093f4SDimitry Andric       --RestoreBegin;
2476480093f4SDimitry Andric 
2477480093f4SDimitry Andric     assert(IsSVECalleeSave(RestoreBegin) &&
2478480093f4SDimitry Andric            IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
2479480093f4SDimitry Andric 
2480e8d8bef9SDimitry Andric     StackOffset CalleeSavedSizeAsOffset =
2481e8d8bef9SDimitry Andric         StackOffset::getScalable(CalleeSavedSize);
2482979e22ffSDimitry Andric     DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
2483979e22ffSDimitry Andric     DeallocateAfter = CalleeSavedSizeAsOffset;
2484480093f4SDimitry Andric   }
2485480093f4SDimitry Andric 
24868bcb0991SDimitry Andric   // Deallocate the SVE area.
2487480093f4SDimitry Andric   if (SVEStackSize) {
248881ad6265SDimitry Andric     // If we have stack realignment or variable sized objects on the stack,
248981ad6265SDimitry Andric     // restore the stack pointer from the frame pointer prior to SVE CSR
249081ad6265SDimitry Andric     // restoration.
249181ad6265SDimitry Andric     if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) {
249281ad6265SDimitry Andric       if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
2493979e22ffSDimitry Andric         // Set SP to start of SVE callee-save area from which they can
2494979e22ffSDimitry Andric         // be reloaded. The code below will deallocate the stack space
2495480093f4SDimitry Andric         // space by moving FP -> SP.
2496480093f4SDimitry Andric         emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
2497e8d8bef9SDimitry Andric                         StackOffset::getScalable(-CalleeSavedSize), TII,
2498979e22ffSDimitry Andric                         MachineInstr::FrameDestroy);
249981ad6265SDimitry Andric       }
2500480093f4SDimitry Andric     } else {
2501480093f4SDimitry Andric       if (AFI->getSVECalleeSavedStackSize()) {
2502480093f4SDimitry Andric         // Deallocate the non-SVE locals first before we can deallocate (and
2503480093f4SDimitry Andric         // restore callee saves) from the SVE area.
250481ad6265SDimitry Andric         emitFrameOffset(
250581ad6265SDimitry Andric             MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
250681ad6265SDimitry Andric             StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
250781ad6265SDimitry Andric             false, false, nullptr, EmitCFI && !hasFP(MF),
250881ad6265SDimitry Andric             SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
2509480093f4SDimitry Andric         NumBytes = 0;
2510480093f4SDimitry Andric       }
2511480093f4SDimitry Andric 
2512480093f4SDimitry Andric       emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
251381ad6265SDimitry Andric                       DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
251481ad6265SDimitry Andric                       false, nullptr, EmitCFI && !hasFP(MF),
251581ad6265SDimitry Andric                       SVEStackSize +
251681ad6265SDimitry Andric                           StackOffset::getFixed(NumBytes + PrologueSaveSize));
2517480093f4SDimitry Andric 
2518480093f4SDimitry Andric       emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
251981ad6265SDimitry Andric                       DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
252081ad6265SDimitry Andric                       false, nullptr, EmitCFI && !hasFP(MF),
252181ad6265SDimitry Andric                       DeallocateAfter +
252281ad6265SDimitry Andric                           StackOffset::getFixed(NumBytes + PrologueSaveSize));
2523480093f4SDimitry Andric     }
252481ad6265SDimitry Andric     if (EmitCFI)
252581ad6265SDimitry Andric       emitCalleeSavedSVERestores(MBB, RestoreEnd);
2526480093f4SDimitry Andric   }
25278bcb0991SDimitry Andric 
25280b57cec5SDimitry Andric   if (!hasFP(MF)) {
25290b57cec5SDimitry Andric     bool RedZone = canUseRedZone(MF);
25300b57cec5SDimitry Andric     // If this was a redzone leaf function, we don't need to restore the
25310b57cec5SDimitry Andric     // stack pointer (but we may need to pop stack args for fastcc).
25320b57cec5SDimitry Andric     if (RedZone && AfterCSRPopSize == 0)
25330b57cec5SDimitry Andric       return;
25340b57cec5SDimitry Andric 
253581ad6265SDimitry Andric     // Pop the local variables off the stack. If there are no callee-saved
253681ad6265SDimitry Andric     // registers, it means we are actually positioned at the terminator and can
253781ad6265SDimitry Andric     // combine stack increment for the locals and the stack increment for
253881ad6265SDimitry Andric     // callee-popped arguments into (possibly) a single instruction and be done.
25390b57cec5SDimitry Andric     bool NoCalleeSaveRestore = PrologueSaveSize == 0;
2540480093f4SDimitry Andric     int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
25410b57cec5SDimitry Andric     if (NoCalleeSaveRestore)
25420b57cec5SDimitry Andric       StackRestoreBytes += AfterCSRPopSize;
25430b57cec5SDimitry Andric 
254481ad6265SDimitry Andric     emitFrameOffset(
254581ad6265SDimitry Andric         MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
254681ad6265SDimitry Andric         StackOffset::getFixed(StackRestoreBytes), TII,
254781ad6265SDimitry Andric         MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI,
254881ad6265SDimitry Andric         StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
254981ad6265SDimitry Andric 
25500b57cec5SDimitry Andric     // If we were able to combine the local stack pop with the argument pop,
25510b57cec5SDimitry Andric     // then we're done.
255281ad6265SDimitry Andric     if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
25530b57cec5SDimitry Andric       return;
25540b57cec5SDimitry Andric     }
25550b57cec5SDimitry Andric 
25560b57cec5SDimitry Andric     NumBytes = 0;
25570b57cec5SDimitry Andric   }
25580b57cec5SDimitry Andric 
25590b57cec5SDimitry Andric   // Restore the original stack pointer.
25600b57cec5SDimitry Andric   // FIXME: Rather than doing the math here, we should instead just use
25610b57cec5SDimitry Andric   // non-post-indexed loads for the restores if we aren't actually going to
25620b57cec5SDimitry Andric   // be able to save any instructions.
25638bcb0991SDimitry Andric   if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
2564e8d8bef9SDimitry Andric     emitFrameOffset(
2565e8d8bef9SDimitry Andric         MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
2566e8d8bef9SDimitry Andric         StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
25675f757f3fSDimitry Andric         TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
25688bcb0991SDimitry Andric   } else if (NumBytes)
25698bcb0991SDimitry Andric     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
2570e8d8bef9SDimitry Andric                     StackOffset::getFixed(NumBytes), TII,
25715f757f3fSDimitry Andric                     MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
25720b57cec5SDimitry Andric 
257381ad6265SDimitry Andric   // When we are about to restore the CSRs, the CFA register is SP again.
257481ad6265SDimitry Andric   if (EmitCFI && hasFP(MF)) {
257581ad6265SDimitry Andric     const AArch64RegisterInfo &RegInfo = *Subtarget.getRegisterInfo();
257681ad6265SDimitry Andric     unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
257781ad6265SDimitry Andric     unsigned CFIIndex = MF.addFrameInst(
257881ad6265SDimitry Andric         MCCFIInstruction::cfiDefCfa(nullptr, Reg, PrologueSaveSize));
257981ad6265SDimitry Andric     BuildMI(MBB, LastPopI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
258081ad6265SDimitry Andric         .addCFIIndex(CFIIndex)
258181ad6265SDimitry Andric         .setMIFlags(MachineInstr::FrameDestroy);
258281ad6265SDimitry Andric   }
258381ad6265SDimitry Andric 
25840b57cec5SDimitry Andric   // This must be placed after the callee-save restore code because that code
25850b57cec5SDimitry Andric   // assumes the SP is at the same location as it was after the callee-save save
25860b57cec5SDimitry Andric   // code in the prologue.
25870b57cec5SDimitry Andric   if (AfterCSRPopSize) {
2588fe6060f1SDimitry Andric     assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
2589fe6060f1SDimitry Andric                                   "interrupt may have clobbered");
25900b57cec5SDimitry Andric 
259181ad6265SDimitry Andric     emitFrameOffset(
259281ad6265SDimitry Andric         MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
259381ad6265SDimitry Andric         StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
259481ad6265SDimitry Andric         false, NeedsWinCFI, &HasWinCFI, EmitCFI,
259581ad6265SDimitry Andric         StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
25960b57cec5SDimitry Andric   }
25970b57cec5SDimitry Andric }
25980b57cec5SDimitry Andric 
259906c3fb27SDimitry Andric bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const {
260006c3fb27SDimitry Andric   return TargetFrameLowering::enableCFIFixup(MF) &&
260106c3fb27SDimitry Andric          MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF);
260206c3fb27SDimitry Andric }
260306c3fb27SDimitry Andric 
26040b57cec5SDimitry Andric /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
26050b57cec5SDimitry Andric /// debug info.  It's the same as what we use for resolving the code-gen
26060b57cec5SDimitry Andric /// references for now.  FIXME: This can go wrong when references are
26070b57cec5SDimitry Andric /// SP-relative and simple call frames aren't used.
2608e8d8bef9SDimitry Andric StackOffset
2609e8d8bef9SDimitry Andric AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
26105ffd83dbSDimitry Andric                                              Register &FrameReg) const {
26110b57cec5SDimitry Andric   return resolveFrameIndexReference(
26120b57cec5SDimitry Andric       MF, FI, FrameReg,
26130b57cec5SDimitry Andric       /*PreferFP=*/
26140fca6ea1SDimitry Andric       MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) ||
26150fca6ea1SDimitry Andric           MF.getFunction().hasFnAttribute(Attribute::SanitizeMemTag),
2616e8d8bef9SDimitry Andric       /*ForSimm=*/false);
26170b57cec5SDimitry Andric }
26180b57cec5SDimitry Andric 
2619e8d8bef9SDimitry Andric StackOffset
262052418fc2SDimitry Andric AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
262152418fc2SDimitry Andric                                                    int FI) const {
262252418fc2SDimitry Andric   // This function serves to provide a comparable offset from a single reference
262352418fc2SDimitry Andric   // point (the value of SP at function entry) that can be used for analysis,
262452418fc2SDimitry Andric   // e.g. the stack-frame-layout analysis pass. It is not guaranteed to be
262552418fc2SDimitry Andric   // correct for all objects in the presence of VLA-area objects or dynamic
262652418fc2SDimitry Andric   // stack re-alignment.
262752418fc2SDimitry Andric 
262852418fc2SDimitry Andric   const auto &MFI = MF.getFrameInfo();
262952418fc2SDimitry Andric 
263052418fc2SDimitry Andric   int64_t ObjectOffset = MFI.getObjectOffset(FI);
263162987288SDimitry Andric   StackOffset SVEStackSize = getSVEStackSize(MF);
263262987288SDimitry Andric 
263362987288SDimitry Andric   // For VLA-area objects, just emit an offset at the end of the stack frame.
263462987288SDimitry Andric   // Whilst not quite correct, these objects do live at the end of the frame and
263562987288SDimitry Andric   // so it is more useful for analysis for the offset to reflect this.
263662987288SDimitry Andric   if (MFI.isVariableSizedObjectIndex(FI)) {
263762987288SDimitry Andric     return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize;
263862987288SDimitry Andric   }
263952418fc2SDimitry Andric 
264052418fc2SDimitry Andric   // This is correct in the absence of any SVE stack objects.
264152418fc2SDimitry Andric   if (!SVEStackSize)
264252418fc2SDimitry Andric     return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
264352418fc2SDimitry Andric 
264452418fc2SDimitry Andric   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
264552418fc2SDimitry Andric   if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
264652418fc2SDimitry Andric     return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
264752418fc2SDimitry Andric                             ObjectOffset);
264852418fc2SDimitry Andric   }
264952418fc2SDimitry Andric 
265052418fc2SDimitry Andric   bool IsFixed = MFI.isFixedObjectIndex(FI);
265152418fc2SDimitry Andric   bool IsCSR =
265252418fc2SDimitry Andric       !IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
265352418fc2SDimitry Andric 
265452418fc2SDimitry Andric   StackOffset ScalableOffset = {};
265552418fc2SDimitry Andric   if (!IsFixed && !IsCSR)
265652418fc2SDimitry Andric     ScalableOffset = -SVEStackSize;
265752418fc2SDimitry Andric 
265852418fc2SDimitry Andric   return StackOffset::getFixed(ObjectOffset) + ScalableOffset;
265952418fc2SDimitry Andric }
266052418fc2SDimitry Andric 
266152418fc2SDimitry Andric StackOffset
2662e8d8bef9SDimitry Andric AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
2663e8d8bef9SDimitry Andric                                                      int FI) const {
2664e8d8bef9SDimitry Andric   return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI));
26650b57cec5SDimitry Andric }
26660b57cec5SDimitry Andric 
2667e8d8bef9SDimitry Andric static StackOffset getFPOffset(const MachineFunction &MF,
2668e8d8bef9SDimitry Andric                                int64_t ObjectOffset) {
26690b57cec5SDimitry Andric   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
26700b57cec5SDimitry Andric   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
26710fca6ea1SDimitry Andric   const Function &F = MF.getFunction();
26720fca6ea1SDimitry Andric   bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
267362cfcf62SDimitry Andric   unsigned FixedObject =
267462cfcf62SDimitry Andric       getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
2675e8d8bef9SDimitry Andric   int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
2676e8d8bef9SDimitry Andric   int64_t FPAdjust =
2677e8d8bef9SDimitry Andric       CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
2678e8d8bef9SDimitry Andric   return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
26790b57cec5SDimitry Andric }
26800b57cec5SDimitry Andric 
2681e8d8bef9SDimitry Andric static StackOffset getStackOffset(const MachineFunction &MF,
2682e8d8bef9SDimitry Andric                                   int64_t ObjectOffset) {
26830b57cec5SDimitry Andric   const auto &MFI = MF.getFrameInfo();
2684e8d8bef9SDimitry Andric   return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
26850b57cec5SDimitry Andric }
26860b57cec5SDimitry Andric 
2687e8d8bef9SDimitry Andric // TODO: This function currently does not work for scalable vectors.
26880b57cec5SDimitry Andric int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
26890b57cec5SDimitry Andric                                                  int FI) const {
26900b57cec5SDimitry Andric   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
26910b57cec5SDimitry Andric       MF.getSubtarget().getRegisterInfo());
26920b57cec5SDimitry Andric   int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
26930b57cec5SDimitry Andric   return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
2694e8d8bef9SDimitry Andric              ? getFPOffset(MF, ObjectOffset).getFixed()
2695e8d8bef9SDimitry Andric              : getStackOffset(MF, ObjectOffset).getFixed();
26960b57cec5SDimitry Andric }
26970b57cec5SDimitry Andric 
26988bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameIndexReference(
26995ffd83dbSDimitry Andric     const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP,
27000b57cec5SDimitry Andric     bool ForSimm) const {
27010b57cec5SDimitry Andric   const auto &MFI = MF.getFrameInfo();
2702480093f4SDimitry Andric   int64_t ObjectOffset = MFI.getObjectOffset(FI);
27030b57cec5SDimitry Andric   bool isFixed = MFI.isFixedObjectIndex(FI);
2704e8d8bef9SDimitry Andric   bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
27058bcb0991SDimitry Andric   return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
27060b57cec5SDimitry Andric                                      PreferFP, ForSimm);
27070b57cec5SDimitry Andric }
27080b57cec5SDimitry Andric 
27098bcb0991SDimitry Andric StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
2710480093f4SDimitry Andric     const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
27115ffd83dbSDimitry Andric     Register &FrameReg, bool PreferFP, bool ForSimm) const {
27120b57cec5SDimitry Andric   const auto &MFI = MF.getFrameInfo();
27130b57cec5SDimitry Andric   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
27140b57cec5SDimitry Andric       MF.getSubtarget().getRegisterInfo());
27150b57cec5SDimitry Andric   const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
27160b57cec5SDimitry Andric   const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
27170b57cec5SDimitry Andric 
2718e8d8bef9SDimitry Andric   int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
2719e8d8bef9SDimitry Andric   int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
27200b57cec5SDimitry Andric   bool isCSR =
2721480093f4SDimitry Andric       !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
27220b57cec5SDimitry Andric 
27238bcb0991SDimitry Andric   const StackOffset &SVEStackSize = getSVEStackSize(MF);
27248bcb0991SDimitry Andric 
27250b57cec5SDimitry Andric   // Use frame pointer to reference fixed objects. Use it for locals if
27260b57cec5SDimitry Andric   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
27270b57cec5SDimitry Andric   // reliable as a base). Make sure useFPForScavengingIndex() does the
27280b57cec5SDimitry Andric   // right thing for the emergency spill slot.
27290b57cec5SDimitry Andric   bool UseFP = false;
27308bcb0991SDimitry Andric   if (AFI->hasStackFrame() && !isSVE) {
273181ad6265SDimitry Andric     // We shouldn't prefer using the FP to access fixed-sized stack objects when
273281ad6265SDimitry Andric     // there are scalable (SVE) objects in between the FP and the fixed-sized
273381ad6265SDimitry Andric     // objects.
27348bcb0991SDimitry Andric     PreferFP &= !SVEStackSize;
27358bcb0991SDimitry Andric 
27360b57cec5SDimitry Andric     // Note: Keeping the following as multiple 'if' statements rather than
27370b57cec5SDimitry Andric     // merging to a single expression for readability.
27380b57cec5SDimitry Andric     //
27390b57cec5SDimitry Andric     // Argument access should always use the FP.
27400b57cec5SDimitry Andric     if (isFixed) {
27410b57cec5SDimitry Andric       UseFP = hasFP(MF);
2742fe6060f1SDimitry Andric     } else if (isCSR && RegInfo->hasStackRealignment(MF)) {
27430b57cec5SDimitry Andric       // References to the CSR area must use FP if we're re-aligning the stack
27440b57cec5SDimitry Andric       // since the dynamically-sized alignment padding is between the SP/BP and
27450b57cec5SDimitry Andric       // the CSR area.
27460b57cec5SDimitry Andric       assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
27470b57cec5SDimitry Andric       UseFP = true;
2748fe6060f1SDimitry Andric     } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
27490b57cec5SDimitry Andric       // If the FPOffset is negative and we're producing a signed immediate, we
27500b57cec5SDimitry Andric       // have to keep in mind that the available offset range for negative
27510b57cec5SDimitry Andric       // offsets is smaller than for positive ones. If an offset is available
27520b57cec5SDimitry Andric       // via the FP and the SP, use whichever is closest.
27530b57cec5SDimitry Andric       bool FPOffsetFits = !ForSimm || FPOffset >= -256;
275481ad6265SDimitry Andric       PreferFP |= Offset > -FPOffset && !SVEStackSize;
27550b57cec5SDimitry Andric 
27560b57cec5SDimitry Andric       if (MFI.hasVarSizedObjects()) {
27570b57cec5SDimitry Andric         // If we have variable sized objects, we can use either FP or BP, as the
27580b57cec5SDimitry Andric         // SP offset is unknown. We can use the base pointer if we have one and
27590b57cec5SDimitry Andric         // FP is not preferred. If not, we're stuck with using FP.
27600b57cec5SDimitry Andric         bool CanUseBP = RegInfo->hasBasePointer(MF);
27610b57cec5SDimitry Andric         if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
27620b57cec5SDimitry Andric           UseFP = PreferFP;
27635ffd83dbSDimitry Andric         else if (!CanUseBP) // Can't use BP. Forced to use FP.
27640b57cec5SDimitry Andric           UseFP = true;
27650b57cec5SDimitry Andric         // else we can use BP and FP, but the offset from FP won't fit.
27660b57cec5SDimitry Andric         // That will make us scavenge registers which we can probably avoid by
27670b57cec5SDimitry Andric         // using BP. If it won't fit for BP either, we'll scavenge anyway.
27680b57cec5SDimitry Andric       } else if (FPOffset >= 0) {
27690b57cec5SDimitry Andric         // Use SP or FP, whichever gives us the best chance of the offset
27700b57cec5SDimitry Andric         // being in range for direct access. If the FPOffset is positive,
27710b57cec5SDimitry Andric         // that'll always be best, as the SP will be even further away.
27720b57cec5SDimitry Andric         UseFP = true;
27730b57cec5SDimitry Andric       } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
27740b57cec5SDimitry Andric         // Funclets access the locals contained in the parent's stack frame
27750b57cec5SDimitry Andric         // via the frame pointer, so we have to use the FP in the parent
27760b57cec5SDimitry Andric         // function.
27770b57cec5SDimitry Andric         (void) Subtarget;
27780fca6ea1SDimitry Andric         assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
27790fca6ea1SDimitry Andric                                             MF.getFunction().isVarArg()) &&
27800b57cec5SDimitry Andric                "Funclets should only be present on Win64");
27810b57cec5SDimitry Andric         UseFP = true;
27820b57cec5SDimitry Andric       } else {
27830b57cec5SDimitry Andric         // We have the choice between FP and (SP or BP).
27840b57cec5SDimitry Andric         if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
27850b57cec5SDimitry Andric           UseFP = true;
27860b57cec5SDimitry Andric       }
27870b57cec5SDimitry Andric     }
27880b57cec5SDimitry Andric   }
27890b57cec5SDimitry Andric 
2790fe6060f1SDimitry Andric   assert(
2791fe6060f1SDimitry Andric       ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
27920b57cec5SDimitry Andric       "In the presence of dynamic stack pointer realignment, "
27930b57cec5SDimitry Andric       "non-argument/CSR objects cannot be accessed through the frame pointer");
27940b57cec5SDimitry Andric 
27958bcb0991SDimitry Andric   if (isSVE) {
2796e8d8bef9SDimitry Andric     StackOffset FPOffset =
2797e8d8bef9SDimitry Andric         StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
2798e8d8bef9SDimitry Andric     StackOffset SPOffset =
2799e8d8bef9SDimitry Andric         SVEStackSize +
2800e8d8bef9SDimitry Andric         StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
2801e8d8bef9SDimitry Andric                          ObjectOffset);
28028bcb0991SDimitry Andric     // Always use the FP for SVE spills if available and beneficial.
2803fe6060f1SDimitry Andric     if (hasFP(MF) && (SPOffset.getFixed() ||
2804e8d8bef9SDimitry Andric                       FPOffset.getScalable() < SPOffset.getScalable() ||
2805fe6060f1SDimitry Andric                       RegInfo->hasStackRealignment(MF))) {
28060b57cec5SDimitry Andric       FrameReg = RegInfo->getFrameRegister(MF);
28070b57cec5SDimitry Andric       return FPOffset;
28080b57cec5SDimitry Andric     }
28090b57cec5SDimitry Andric 
28108bcb0991SDimitry Andric     FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
28118bcb0991SDimitry Andric                                            : (unsigned)AArch64::SP;
28128bcb0991SDimitry Andric     return SPOffset;
28138bcb0991SDimitry Andric   }
28148bcb0991SDimitry Andric 
28158bcb0991SDimitry Andric   StackOffset ScalableOffset = {};
28168bcb0991SDimitry Andric   if (UseFP && !(isFixed || isCSR))
28178bcb0991SDimitry Andric     ScalableOffset = -SVEStackSize;
28188bcb0991SDimitry Andric   if (!UseFP && (isFixed || isCSR))
28198bcb0991SDimitry Andric     ScalableOffset = SVEStackSize;
28208bcb0991SDimitry Andric 
28218bcb0991SDimitry Andric   if (UseFP) {
28228bcb0991SDimitry Andric     FrameReg = RegInfo->getFrameRegister(MF);
2823e8d8bef9SDimitry Andric     return StackOffset::getFixed(FPOffset) + ScalableOffset;
28248bcb0991SDimitry Andric   }
28258bcb0991SDimitry Andric 
28260b57cec5SDimitry Andric   // Use the base pointer if we have one.
28270b57cec5SDimitry Andric   if (RegInfo->hasBasePointer(MF))
28280b57cec5SDimitry Andric     FrameReg = RegInfo->getBaseRegister();
28290b57cec5SDimitry Andric   else {
28300b57cec5SDimitry Andric     assert(!MFI.hasVarSizedObjects() &&
28310b57cec5SDimitry Andric            "Can't use SP when we have var sized objects.");
28320b57cec5SDimitry Andric     FrameReg = AArch64::SP;
28330b57cec5SDimitry Andric     // If we're using the red zone for this function, the SP won't actually
28340b57cec5SDimitry Andric     // be adjusted, so the offsets will be negative. They're also all
28350b57cec5SDimitry Andric     // within range of the signed 9-bit immediate instructions.
28360b57cec5SDimitry Andric     if (canUseRedZone(MF))
28370b57cec5SDimitry Andric       Offset -= AFI->getLocalStackSize();
28380b57cec5SDimitry Andric   }
28390b57cec5SDimitry Andric 
2840e8d8bef9SDimitry Andric   return StackOffset::getFixed(Offset) + ScalableOffset;
28410b57cec5SDimitry Andric }
28420b57cec5SDimitry Andric 
28430b57cec5SDimitry Andric static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
28440b57cec5SDimitry Andric   // Do not set a kill flag on values that are also marked as live-in. This
28450b57cec5SDimitry Andric   // happens with the @llvm-returnaddress intrinsic and with arguments passed in
28460b57cec5SDimitry Andric   // callee saved registers.
28470b57cec5SDimitry Andric   // Omitting the kill flags is conservatively correct even if the live-in
28480b57cec5SDimitry Andric   // is not used after all.
28490b57cec5SDimitry Andric   bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
28500b57cec5SDimitry Andric   return getKillRegState(!IsLiveIn);
28510b57cec5SDimitry Andric }
28520b57cec5SDimitry Andric 
28530b57cec5SDimitry Andric static bool produceCompactUnwindFrame(MachineFunction &MF) {
28540b57cec5SDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
28550b57cec5SDimitry Andric   AttributeList Attrs = MF.getFunction().getAttributes();
28560b57cec5SDimitry Andric   return Subtarget.isTargetMachO() &&
28570b57cec5SDimitry Andric          !(Subtarget.getTargetLowering()->supportSwiftError() &&
2858fe6060f1SDimitry Andric            Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
2859*71ac745dSDimitry Andric          MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
2860*71ac745dSDimitry Andric          !requiresSaveVG(MF);
28610b57cec5SDimitry Andric }
28620b57cec5SDimitry Andric 
28630b57cec5SDimitry Andric static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
2864bdd1243dSDimitry Andric                                              bool NeedsWinCFI, bool IsFirst,
2865bdd1243dSDimitry Andric                                              const TargetRegisterInfo *TRI) {
28660b57cec5SDimitry Andric   // If we are generating register pairs for a Windows function that requires
28670b57cec5SDimitry Andric   // EH support, then pair consecutive registers only.  There are no unwind
28680b57cec5SDimitry Andric   // opcodes for saves/restores of non-consectuve register pairs.
2869e8d8bef9SDimitry Andric   // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
2870e8d8bef9SDimitry Andric   // save_lrpair.
28710b57cec5SDimitry Andric   // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
28720b57cec5SDimitry Andric 
2873480093f4SDimitry Andric   if (Reg2 == AArch64::FP)
2874480093f4SDimitry Andric     return true;
28750b57cec5SDimitry Andric   if (!NeedsWinCFI)
28760b57cec5SDimitry Andric     return false;
2877bdd1243dSDimitry Andric   if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1)
28780b57cec5SDimitry Andric     return false;
2879e8d8bef9SDimitry Andric   // If pairing a GPR with LR, the pair can be described by the save_lrpair
2880e8d8bef9SDimitry Andric   // opcode. If this is the first register pair, it would end up with a
2881e8d8bef9SDimitry Andric   // predecrement, but there's no save_lrpair_x opcode, so we can only do this
2882e8d8bef9SDimitry Andric   // if LR is paired with something else than the first register.
2883e8d8bef9SDimitry Andric   // The save_lrpair opcode requires the first register to be an odd one.
2884e8d8bef9SDimitry Andric   if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2885e8d8bef9SDimitry Andric       (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2886e8d8bef9SDimitry Andric     return false;
28870b57cec5SDimitry Andric   return true;
28880b57cec5SDimitry Andric }
28890b57cec5SDimitry Andric 
28908bcb0991SDimitry Andric /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
28918bcb0991SDimitry Andric /// WindowsCFI requires that only consecutive registers can be paired.
28928bcb0991SDimitry Andric /// LR and FP need to be allocated together when the frame needs to save
28938bcb0991SDimitry Andric /// the frame-record. This means any other register pairing with LR is invalid.
28948bcb0991SDimitry Andric static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
2895e8d8bef9SDimitry Andric                                       bool UsesWinAAPCS, bool NeedsWinCFI,
2896bdd1243dSDimitry Andric                                       bool NeedsFrameRecord, bool IsFirst,
2897bdd1243dSDimitry Andric                                       const TargetRegisterInfo *TRI) {
2898480093f4SDimitry Andric   if (UsesWinAAPCS)
2899bdd1243dSDimitry Andric     return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst,
2900bdd1243dSDimitry Andric                                             TRI);
29018bcb0991SDimitry Andric 
29028bcb0991SDimitry Andric   // If we need to store the frame record, don't pair any register
29038bcb0991SDimitry Andric   // with LR other than FP.
29048bcb0991SDimitry Andric   if (NeedsFrameRecord)
29058bcb0991SDimitry Andric     return Reg2 == AArch64::LR;
29068bcb0991SDimitry Andric 
29078bcb0991SDimitry Andric   return false;
29088bcb0991SDimitry Andric }
29098bcb0991SDimitry Andric 
29100b57cec5SDimitry Andric namespace {
29110b57cec5SDimitry Andric 
29120b57cec5SDimitry Andric struct RegPairInfo {
29130b57cec5SDimitry Andric   unsigned Reg1 = AArch64::NoRegister;
29140b57cec5SDimitry Andric   unsigned Reg2 = AArch64::NoRegister;
29150b57cec5SDimitry Andric   int FrameIdx;
29160b57cec5SDimitry Andric   int Offset;
29170fca6ea1SDimitry Andric   enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG } Type;
29180b57cec5SDimitry Andric 
29190b57cec5SDimitry Andric   RegPairInfo() = default;
29200b57cec5SDimitry Andric 
29210b57cec5SDimitry Andric   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
2922480093f4SDimitry Andric 
2923480093f4SDimitry Andric   unsigned getScale() const {
2924480093f4SDimitry Andric     switch (Type) {
2925480093f4SDimitry Andric     case PPR:
2926480093f4SDimitry Andric       return 2;
2927480093f4SDimitry Andric     case GPR:
2928480093f4SDimitry Andric     case FPR64:
29290fca6ea1SDimitry Andric     case VG:
2930480093f4SDimitry Andric       return 8;
2931480093f4SDimitry Andric     case ZPR:
2932480093f4SDimitry Andric     case FPR128:
2933480093f4SDimitry Andric       return 16;
2934480093f4SDimitry Andric     }
2935480093f4SDimitry Andric     llvm_unreachable("Unsupported type");
2936480093f4SDimitry Andric   }
2937480093f4SDimitry Andric 
2938480093f4SDimitry Andric   bool isScalable() const { return Type == PPR || Type == ZPR; }
29390b57cec5SDimitry Andric };
29400b57cec5SDimitry Andric 
29410b57cec5SDimitry Andric } // end anonymous namespace
29420b57cec5SDimitry Andric 
29430b57cec5SDimitry Andric static void computeCalleeSaveRegisterPairs(
29445ffd83dbSDimitry Andric     MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
29450b57cec5SDimitry Andric     const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
294681ad6265SDimitry Andric     bool NeedsFrameRecord) {
29470b57cec5SDimitry Andric 
29480b57cec5SDimitry Andric   if (CSI.empty())
29490b57cec5SDimitry Andric     return;
29500b57cec5SDimitry Andric 
2951480093f4SDimitry Andric   bool IsWindows = isTargetWindows(MF);
29520b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
29530b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
29540b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
29550b57cec5SDimitry Andric   CallingConv::ID CC = MF.getFunction().getCallingConv();
29560b57cec5SDimitry Andric   unsigned Count = CSI.size();
29570b57cec5SDimitry Andric   (void)CC;
29580b57cec5SDimitry Andric   // MachO's compact unwind format relies on all registers being stored in
29590b57cec5SDimitry Andric   // pairs.
2960bdd1243dSDimitry Andric   assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
296106c3fb27SDimitry Andric           CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
296206c3fb27SDimitry Andric           CC == CallingConv::Win64 || (Count & 1) == 0) &&
29630b57cec5SDimitry Andric          "Odd number of callee-saved regs to spill!");
2964480093f4SDimitry Andric   int ByteOffset = AFI->getCalleeSavedStackSize();
2965e8d8bef9SDimitry Andric   int StackFillDir = -1;
2966e8d8bef9SDimitry Andric   int RegInc = 1;
2967e8d8bef9SDimitry Andric   unsigned FirstReg = 0;
2968e8d8bef9SDimitry Andric   if (NeedsWinCFI) {
2969e8d8bef9SDimitry Andric     // For WinCFI, fill the stack from the bottom up.
2970e8d8bef9SDimitry Andric     ByteOffset = 0;
2971e8d8bef9SDimitry Andric     StackFillDir = 1;
2972e8d8bef9SDimitry Andric     // As the CSI array is reversed to match PrologEpilogInserter, iterate
2973e8d8bef9SDimitry Andric     // backwards, to pair up registers starting from lower numbered registers.
2974e8d8bef9SDimitry Andric     RegInc = -1;
2975e8d8bef9SDimitry Andric     FirstReg = Count - 1;
2976e8d8bef9SDimitry Andric   }
2977480093f4SDimitry Andric   int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
2978fe6060f1SDimitry Andric   bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
29790fca6ea1SDimitry Andric   Register LastReg = 0;
298075b4d546SDimitry Andric 
2981e8d8bef9SDimitry Andric   // When iterating backwards, the loop condition relies on unsigned wraparound.
2982e8d8bef9SDimitry Andric   for (unsigned i = FirstReg; i < Count; i += RegInc) {
29830b57cec5SDimitry Andric     RegPairInfo RPI;
29840b57cec5SDimitry Andric     RPI.Reg1 = CSI[i].getReg();
29850b57cec5SDimitry Andric 
29860b57cec5SDimitry Andric     if (AArch64::GPR64RegClass.contains(RPI.Reg1))
29870b57cec5SDimitry Andric       RPI.Type = RegPairInfo::GPR;
29880b57cec5SDimitry Andric     else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
29890b57cec5SDimitry Andric       RPI.Type = RegPairInfo::FPR64;
29900b57cec5SDimitry Andric     else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
29910b57cec5SDimitry Andric       RPI.Type = RegPairInfo::FPR128;
2992480093f4SDimitry Andric     else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
2993480093f4SDimitry Andric       RPI.Type = RegPairInfo::ZPR;
2994480093f4SDimitry Andric     else if (AArch64::PPRRegClass.contains(RPI.Reg1))
2995480093f4SDimitry Andric       RPI.Type = RegPairInfo::PPR;
29960fca6ea1SDimitry Andric     else if (RPI.Reg1 == AArch64::VG)
29970fca6ea1SDimitry Andric       RPI.Type = RegPairInfo::VG;
29980b57cec5SDimitry Andric     else
29990b57cec5SDimitry Andric       llvm_unreachable("Unsupported register class.");
30000b57cec5SDimitry Andric 
30010fca6ea1SDimitry Andric     // Add the stack hazard size as we transition from GPR->FPR CSRs.
30020fca6ea1SDimitry Andric     if (AFI->hasStackHazardSlotIndex() &&
30030fca6ea1SDimitry Andric         (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
30040fca6ea1SDimitry Andric         AArch64InstrInfo::isFpOrNEON(RPI.Reg1))
30050fca6ea1SDimitry Andric       ByteOffset += StackFillDir * StackHazardSize;
30060fca6ea1SDimitry Andric     LastReg = RPI.Reg1;
30070fca6ea1SDimitry Andric 
30080b57cec5SDimitry Andric     // Add the next reg to the pair if it is in the same register class.
30090fca6ea1SDimitry Andric     if (unsigned(i + RegInc) < Count && !AFI->hasStackHazardSlotIndex()) {
301004eeddc0SDimitry Andric       Register NextReg = CSI[i + RegInc].getReg();
3011e8d8bef9SDimitry Andric       bool IsFirst = i == FirstReg;
30120b57cec5SDimitry Andric       switch (RPI.Type) {
30130b57cec5SDimitry Andric       case RegPairInfo::GPR:
30140b57cec5SDimitry Andric         if (AArch64::GPR64RegClass.contains(NextReg) &&
3015e8d8bef9SDimitry Andric             !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
3016bdd1243dSDimitry Andric                                        NeedsWinCFI, NeedsFrameRecord, IsFirst,
3017bdd1243dSDimitry Andric                                        TRI))
30180b57cec5SDimitry Andric           RPI.Reg2 = NextReg;
30190b57cec5SDimitry Andric         break;
30200b57cec5SDimitry Andric       case RegPairInfo::FPR64:
30210b57cec5SDimitry Andric         if (AArch64::FPR64RegClass.contains(NextReg) &&
3022e8d8bef9SDimitry Andric             !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
3023bdd1243dSDimitry Andric                                               IsFirst, TRI))
30240b57cec5SDimitry Andric           RPI.Reg2 = NextReg;
30250b57cec5SDimitry Andric         break;
30260b57cec5SDimitry Andric       case RegPairInfo::FPR128:
30270b57cec5SDimitry Andric         if (AArch64::FPR128RegClass.contains(NextReg))
30280b57cec5SDimitry Andric           RPI.Reg2 = NextReg;
30290b57cec5SDimitry Andric         break;
3030480093f4SDimitry Andric       case RegPairInfo::PPR:
30310fca6ea1SDimitry Andric         break;
3032480093f4SDimitry Andric       case RegPairInfo::ZPR:
30330fca6ea1SDimitry Andric         if (AFI->getPredicateRegForFillSpill() != 0)
30340fca6ea1SDimitry Andric           if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
30350fca6ea1SDimitry Andric             RPI.Reg2 = NextReg;
30360fca6ea1SDimitry Andric         break;
30370fca6ea1SDimitry Andric       case RegPairInfo::VG:
3038480093f4SDimitry Andric         break;
30390b57cec5SDimitry Andric       }
30400b57cec5SDimitry Andric     }
30410b57cec5SDimitry Andric 
30420b57cec5SDimitry Andric     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
30430b57cec5SDimitry Andric     // list to come in sorted by frame index so that we can issue the store
30440b57cec5SDimitry Andric     // pair instructions directly. Assert if we see anything otherwise.
30450b57cec5SDimitry Andric     //
30460b57cec5SDimitry Andric     // The order of the registers in the list is controlled by
30470b57cec5SDimitry Andric     // getCalleeSavedRegs(), so they will always be in-order, as well.
30480b57cec5SDimitry Andric     assert((!RPI.isPaired() ||
3049e8d8bef9SDimitry Andric             (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
30500b57cec5SDimitry Andric            "Out of order callee saved regs!");
30510b57cec5SDimitry Andric 
30528bcb0991SDimitry Andric     assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
30538bcb0991SDimitry Andric             RPI.Reg1 == AArch64::LR) &&
30548bcb0991SDimitry Andric            "FrameRecord must be allocated together with LR");
30558bcb0991SDimitry Andric 
3056480093f4SDimitry Andric     // Windows AAPCS has FP and LR reversed.
3057480093f4SDimitry Andric     assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
3058480093f4SDimitry Andric             RPI.Reg2 == AArch64::LR) &&
3059480093f4SDimitry Andric            "FrameRecord must be allocated together with LR");
3060480093f4SDimitry Andric 
30610b57cec5SDimitry Andric     // MachO's compact unwind format relies on all registers being stored in
30620b57cec5SDimitry Andric     // adjacent register pairs.
3063bdd1243dSDimitry Andric     assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
306406c3fb27SDimitry Andric             CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
306506c3fb27SDimitry Andric             CC == CallingConv::Win64 ||
30660b57cec5SDimitry Andric             (RPI.isPaired() &&
30670b57cec5SDimitry Andric              ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
30680b57cec5SDimitry Andric               RPI.Reg1 + 1 == RPI.Reg2))) &&
30690b57cec5SDimitry Andric            "Callee-save registers not saved as adjacent register pair!");
30700b57cec5SDimitry Andric 
30710b57cec5SDimitry Andric     RPI.FrameIdx = CSI[i].getFrameIdx();
3072e8d8bef9SDimitry Andric     if (NeedsWinCFI &&
3073e8d8bef9SDimitry Andric         RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
3074e8d8bef9SDimitry Andric       RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
3075480093f4SDimitry Andric     int Scale = RPI.getScale();
3076e8d8bef9SDimitry Andric 
3077e8d8bef9SDimitry Andric     int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3078e8d8bef9SDimitry Andric     assert(OffsetPre % Scale == 0);
3079e8d8bef9SDimitry Andric 
3080480093f4SDimitry Andric     if (RPI.isScalable())
30810fca6ea1SDimitry Andric       ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3082480093f4SDimitry Andric     else
3083e8d8bef9SDimitry Andric       ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3084480093f4SDimitry Andric 
3085fe6060f1SDimitry Andric     // Swift's async context is directly before FP, so allocate an extra
3086fe6060f1SDimitry Andric     // 8 bytes for it.
3087fe6060f1SDimitry Andric     if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
30885f757f3fSDimitry Andric         ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
30895f757f3fSDimitry Andric          (IsWindows && RPI.Reg2 == AArch64::LR)))
3090fe6060f1SDimitry Andric       ByteOffset += StackFillDir * 8;
3091fe6060f1SDimitry Andric 
30920b57cec5SDimitry Andric     // Round up size of non-pair to pair size if we need to pad the
30930b57cec5SDimitry Andric     // callee-save area to ensure 16-byte alignment.
30940fca6ea1SDimitry Andric     if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
30950fca6ea1SDimitry Andric         RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
30960fca6ea1SDimitry Andric         ByteOffset % 16 != 0) {
3097e8d8bef9SDimitry Andric       ByteOffset += 8 * StackFillDir;
30985ffd83dbSDimitry Andric       assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
3099e8d8bef9SDimitry Andric       // A stack frame with a gap looks like this, bottom up:
3100e8d8bef9SDimitry Andric       // d9, d8. x21, gap, x20, x19.
3101fe6060f1SDimitry Andric       // Set extra alignment on the x21 object to create the gap above it.
31025ffd83dbSDimitry Andric       MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
3103fe6060f1SDimitry Andric       NeedGapToAlignStack = false;
31040b57cec5SDimitry Andric     }
31050b57cec5SDimitry Andric 
3106e8d8bef9SDimitry Andric     int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3107e8d8bef9SDimitry Andric     assert(OffsetPost % Scale == 0);
3108e8d8bef9SDimitry Andric     // If filling top down (default), we want the offset after incrementing it.
31095f757f3fSDimitry Andric     // If filling bottom up (WinCFI) we need the original offset.
3110e8d8bef9SDimitry Andric     int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
3111fe6060f1SDimitry Andric 
3112fe6060f1SDimitry Andric     // The FP, LR pair goes 8 bytes into our expanded 24-byte slot so that the
3113fe6060f1SDimitry Andric     // Swift context can directly precede FP.
3114fe6060f1SDimitry Andric     if (NeedsFrameRecord && AFI->hasSwiftAsyncContext() &&
31155f757f3fSDimitry Andric         ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
31165f757f3fSDimitry Andric          (IsWindows && RPI.Reg2 == AArch64::LR)))
3117fe6060f1SDimitry Andric       Offset += 8;
31180b57cec5SDimitry Andric     RPI.Offset = Offset / Scale;
3119480093f4SDimitry Andric 
31200fca6ea1SDimitry Andric     assert((!RPI.isPaired() ||
31210fca6ea1SDimitry Andric             (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
3122480093f4SDimitry Andric             (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
31230b57cec5SDimitry Andric            "Offset out of bounds for LDP/STP immediate");
31240b57cec5SDimitry Andric 
3125e8d8bef9SDimitry Andric     // Save the offset to frame record so that the FP register can point to the
3126e8d8bef9SDimitry Andric     // innermost frame record (spilled FP and LR registers).
31270fca6ea1SDimitry Andric     if (NeedsFrameRecord &&
31280fca6ea1SDimitry Andric         ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
31290fca6ea1SDimitry Andric          (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
3130e8d8bef9SDimitry Andric       AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
3131e8d8bef9SDimitry Andric 
31320b57cec5SDimitry Andric     RegPairs.push_back(RPI);
31330b57cec5SDimitry Andric     if (RPI.isPaired())
3134e8d8bef9SDimitry Andric       i += RegInc;
3135e8d8bef9SDimitry Andric   }
3136e8d8bef9SDimitry Andric   if (NeedsWinCFI) {
3137e8d8bef9SDimitry Andric     // If we need an alignment gap in the stack, align the topmost stack
3138e8d8bef9SDimitry Andric     // object. A stack frame with a gap looks like this, bottom up:
3139e8d8bef9SDimitry Andric     // x19, d8. d9, gap.
3140e8d8bef9SDimitry Andric     // Set extra alignment on the topmost stack object (the first element in
3141e8d8bef9SDimitry Andric     // CSI, which goes top down), to create the gap above it.
3142e8d8bef9SDimitry Andric     if (AFI->hasCalleeSaveStackFreeSpace())
3143e8d8bef9SDimitry Andric       MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
3144e8d8bef9SDimitry Andric     // We iterated bottom up over the registers; flip RegPairs back to top
3145e8d8bef9SDimitry Andric     // down order.
3146e8d8bef9SDimitry Andric     std::reverse(RegPairs.begin(), RegPairs.end());
31470b57cec5SDimitry Andric   }
31480b57cec5SDimitry Andric }
31490b57cec5SDimitry Andric 
31500b57cec5SDimitry Andric bool AArch64FrameLowering::spillCalleeSavedRegisters(
31510b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
31525ffd83dbSDimitry Andric     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
31530b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
31540b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
31550fca6ea1SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
31560b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
31570b57cec5SDimitry Andric   DebugLoc DL;
31580b57cec5SDimitry Andric   SmallVector<RegPairInfo, 8> RegPairs;
31590b57cec5SDimitry Andric 
316081ad6265SDimitry Andric   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
316181ad6265SDimitry Andric 
31620fca6ea1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
31630fca6ea1SDimitry Andric   // Refresh the reserved regs in case there are any potential changes since the
31640fca6ea1SDimitry Andric   // last freeze.
31650fca6ea1SDimitry Andric   MRI.freezeReservedRegs();
31660fca6ea1SDimitry Andric 
3167fe6060f1SDimitry Andric   if (homogeneousPrologEpilog(MF)) {
3168fe6060f1SDimitry Andric     auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
3169fe6060f1SDimitry Andric                    .setMIFlag(MachineInstr::FrameSetup);
3170fe6060f1SDimitry Andric 
3171fe6060f1SDimitry Andric     for (auto &RPI : RegPairs) {
3172fe6060f1SDimitry Andric       MIB.addReg(RPI.Reg1);
3173fe6060f1SDimitry Andric       MIB.addReg(RPI.Reg2);
3174fe6060f1SDimitry Andric 
3175fe6060f1SDimitry Andric       // Update register live in.
3176fe6060f1SDimitry Andric       if (!MRI.isReserved(RPI.Reg1))
3177fe6060f1SDimitry Andric         MBB.addLiveIn(RPI.Reg1);
31785f757f3fSDimitry Andric       if (RPI.isPaired() && !MRI.isReserved(RPI.Reg2))
3179fe6060f1SDimitry Andric         MBB.addLiveIn(RPI.Reg2);
3180fe6060f1SDimitry Andric     }
3181fe6060f1SDimitry Andric     return true;
3182fe6060f1SDimitry Andric   }
31830fca6ea1SDimitry Andric   bool PTrueCreated = false;
3184349cc55cSDimitry Andric   for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
31850b57cec5SDimitry Andric     unsigned Reg1 = RPI.Reg1;
31860b57cec5SDimitry Andric     unsigned Reg2 = RPI.Reg2;
31870b57cec5SDimitry Andric     unsigned StrOpc;
31880b57cec5SDimitry Andric 
31890b57cec5SDimitry Andric     // Issue sequence of spills for cs regs.  The first spill may be converted
31900b57cec5SDimitry Andric     // to a pre-decrement store later by emitPrologue if the callee-save stack
31910b57cec5SDimitry Andric     // area allocation can't be combined with the local stack area allocation.
31920b57cec5SDimitry Andric     // For example:
31930b57cec5SDimitry Andric     //    stp     x22, x21, [sp, #0]     // addImm(+0)
31940b57cec5SDimitry Andric     //    stp     x20, x19, [sp, #16]    // addImm(+2)
31950b57cec5SDimitry Andric     //    stp     fp, lr, [sp, #32]      // addImm(+4)
31960b57cec5SDimitry Andric     // Rationale: This sequence saves uop updates compared to a sequence of
31970b57cec5SDimitry Andric     // pre-increment spills like stp xi,xj,[sp,#-16]!
31980b57cec5SDimitry Andric     // Note: Similar rationale and sequence for restores in epilog.
31995ffd83dbSDimitry Andric     unsigned Size;
32005ffd83dbSDimitry Andric     Align Alignment;
32010b57cec5SDimitry Andric     switch (RPI.Type) {
32020b57cec5SDimitry Andric     case RegPairInfo::GPR:
32030b57cec5SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
32040b57cec5SDimitry Andric       Size = 8;
32055ffd83dbSDimitry Andric       Alignment = Align(8);
32060b57cec5SDimitry Andric       break;
32070b57cec5SDimitry Andric     case RegPairInfo::FPR64:
32080b57cec5SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
32090b57cec5SDimitry Andric       Size = 8;
32105ffd83dbSDimitry Andric       Alignment = Align(8);
32110b57cec5SDimitry Andric       break;
32120b57cec5SDimitry Andric     case RegPairInfo::FPR128:
32130b57cec5SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
32140b57cec5SDimitry Andric       Size = 16;
32155ffd83dbSDimitry Andric       Alignment = Align(16);
32160b57cec5SDimitry Andric       break;
3217480093f4SDimitry Andric     case RegPairInfo::ZPR:
32180fca6ea1SDimitry Andric       StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
3219480093f4SDimitry Andric       Size = 16;
32205ffd83dbSDimitry Andric       Alignment = Align(16);
3221480093f4SDimitry Andric       break;
3222480093f4SDimitry Andric     case RegPairInfo::PPR:
3223480093f4SDimitry Andric       StrOpc = AArch64::STR_PXI;
3224480093f4SDimitry Andric       Size = 2;
32255ffd83dbSDimitry Andric       Alignment = Align(2);
3226480093f4SDimitry Andric       break;
32270fca6ea1SDimitry Andric     case RegPairInfo::VG:
32280fca6ea1SDimitry Andric       StrOpc = AArch64::STRXui;
32290fca6ea1SDimitry Andric       Size = 8;
32300fca6ea1SDimitry Andric       Alignment = Align(8);
32310fca6ea1SDimitry Andric       break;
32320b57cec5SDimitry Andric     }
32330fca6ea1SDimitry Andric 
32340fca6ea1SDimitry Andric     unsigned X0Scratch = AArch64::NoRegister;
32350fca6ea1SDimitry Andric     if (Reg1 == AArch64::VG) {
32360fca6ea1SDimitry Andric       // Find an available register to store value of VG to.
32370fca6ea1SDimitry Andric       Reg1 = findScratchNonCalleeSaveRegister(&MBB);
32380fca6ea1SDimitry Andric       assert(Reg1 != AArch64::NoRegister);
32390fca6ea1SDimitry Andric       SMEAttrs Attrs(MF.getFunction());
32400fca6ea1SDimitry Andric 
32410fca6ea1SDimitry Andric       if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
32420fca6ea1SDimitry Andric           AFI->getStreamingVGIdx() == std::numeric_limits<int>::max()) {
32430fca6ea1SDimitry Andric         // For locally-streaming functions, we need to store both the streaming
32440fca6ea1SDimitry Andric         // & non-streaming VG. Spill the streaming value first.
32450fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::RDSVLI_XI), Reg1)
32460fca6ea1SDimitry Andric             .addImm(1)
32470fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32480fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::UBFMXri), Reg1)
32490fca6ea1SDimitry Andric             .addReg(Reg1)
32500fca6ea1SDimitry Andric             .addImm(3)
32510fca6ea1SDimitry Andric             .addImm(63)
32520fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32530fca6ea1SDimitry Andric 
32540fca6ea1SDimitry Andric         AFI->setStreamingVGIdx(RPI.FrameIdx);
32550fca6ea1SDimitry Andric       } else if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
32560fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::CNTD_XPiI), Reg1)
32570fca6ea1SDimitry Andric             .addImm(31)
32580fca6ea1SDimitry Andric             .addImm(1)
32590fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32600fca6ea1SDimitry Andric         AFI->setVGIdx(RPI.FrameIdx);
32610fca6ea1SDimitry Andric       } else {
32620fca6ea1SDimitry Andric         const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
32630fca6ea1SDimitry Andric         if (llvm::any_of(
32640fca6ea1SDimitry Andric                 MBB.liveins(),
32650fca6ea1SDimitry Andric                 [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
32660fca6ea1SDimitry Andric                   return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
32670fca6ea1SDimitry Andric                       AArch64::X0, LiveIn.PhysReg);
32680fca6ea1SDimitry Andric                 }))
32690fca6ea1SDimitry Andric           X0Scratch = Reg1;
32700fca6ea1SDimitry Andric 
32710fca6ea1SDimitry Andric         if (X0Scratch != AArch64::NoRegister)
32720fca6ea1SDimitry Andric           BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), Reg1)
32730fca6ea1SDimitry Andric               .addReg(AArch64::XZR)
32740fca6ea1SDimitry Andric               .addReg(AArch64::X0, RegState::Undef)
32750fca6ea1SDimitry Andric               .addReg(AArch64::X0, RegState::Implicit)
32760fca6ea1SDimitry Andric               .setMIFlag(MachineInstr::FrameSetup);
32770fca6ea1SDimitry Andric 
32780fca6ea1SDimitry Andric         const uint32_t *RegMask = TRI->getCallPreservedMask(
32790fca6ea1SDimitry Andric             MF,
32800fca6ea1SDimitry Andric             CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
32810fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::BL))
32820fca6ea1SDimitry Andric             .addExternalSymbol("__arm_get_current_vg")
32830fca6ea1SDimitry Andric             .addRegMask(RegMask)
32840fca6ea1SDimitry Andric             .addReg(AArch64::X0, RegState::ImplicitDefine)
32850fca6ea1SDimitry Andric             .setMIFlag(MachineInstr::FrameSetup);
32860fca6ea1SDimitry Andric         Reg1 = AArch64::X0;
32870fca6ea1SDimitry Andric         AFI->setVGIdx(RPI.FrameIdx);
32880fca6ea1SDimitry Andric       }
32890fca6ea1SDimitry Andric     }
32900fca6ea1SDimitry Andric 
32910b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
32920b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
32930b57cec5SDimitry Andric                dbgs() << ") -> fi#(" << RPI.FrameIdx;
32940b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
32950b57cec5SDimitry Andric                dbgs() << ")\n");
32960b57cec5SDimitry Andric 
32970b57cec5SDimitry Andric     assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
32980b57cec5SDimitry Andric            "Windows unwdinding requires a consecutive (FP,LR) pair");
32990b57cec5SDimitry Andric     // Windows unwind codes require consecutive registers if registers are
33000b57cec5SDimitry Andric     // paired.  Make the switch here, so that the code below will save (x,x+1)
33010b57cec5SDimitry Andric     // and not (x+1,x).
33020b57cec5SDimitry Andric     unsigned FrameIdxReg1 = RPI.FrameIdx;
33030b57cec5SDimitry Andric     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
33040b57cec5SDimitry Andric     if (NeedsWinCFI && RPI.isPaired()) {
33050b57cec5SDimitry Andric       std::swap(Reg1, Reg2);
33060b57cec5SDimitry Andric       std::swap(FrameIdxReg1, FrameIdxReg2);
33070b57cec5SDimitry Andric     }
33080fca6ea1SDimitry Andric 
33090fca6ea1SDimitry Andric     if (RPI.isPaired() && RPI.isScalable()) {
33100fca6ea1SDimitry Andric       [[maybe_unused]] const AArch64Subtarget &Subtarget =
33110fca6ea1SDimitry Andric                               MF.getSubtarget<AArch64Subtarget>();
33120fca6ea1SDimitry Andric       AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
33130fca6ea1SDimitry Andric       unsigned PnReg = AFI->getPredicateRegForFillSpill();
33140fca6ea1SDimitry Andric       assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
33150fca6ea1SDimitry Andric              "Expects SVE2.1 or SME2 target and a predicate register");
33160fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
33170fca6ea1SDimitry Andric       auto IsPPR = [](const RegPairInfo &c) {
33180fca6ea1SDimitry Andric         return c.Reg1 == RegPairInfo::PPR;
33190fca6ea1SDimitry Andric       };
33200fca6ea1SDimitry Andric       auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
33210fca6ea1SDimitry Andric       auto IsZPR = [](const RegPairInfo &c) {
33220fca6ea1SDimitry Andric         return c.Type == RegPairInfo::ZPR;
33230fca6ea1SDimitry Andric       };
33240fca6ea1SDimitry Andric       auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
33250fca6ea1SDimitry Andric       assert(!(PPRBegin < ZPRBegin) &&
33260fca6ea1SDimitry Andric              "Expected callee save predicate to be handled first");
33270fca6ea1SDimitry Andric #endif
33280fca6ea1SDimitry Andric       if (!PTrueCreated) {
33290fca6ea1SDimitry Andric         PTrueCreated = true;
33300fca6ea1SDimitry Andric         BuildMI(MBB, MI, DL, TII.get(AArch64::PTRUE_C_B), PnReg)
33310fca6ea1SDimitry Andric             .setMIFlags(MachineInstr::FrameSetup);
33320fca6ea1SDimitry Andric       }
33330fca6ea1SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
33340fca6ea1SDimitry Andric       if (!MRI.isReserved(Reg1))
33350fca6ea1SDimitry Andric         MBB.addLiveIn(Reg1);
33360fca6ea1SDimitry Andric       if (!MRI.isReserved(Reg2))
33370fca6ea1SDimitry Andric         MBB.addLiveIn(Reg2);
33380fca6ea1SDimitry Andric       MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
33390fca6ea1SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
33400fca6ea1SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
33410fca6ea1SDimitry Andric           MachineMemOperand::MOStore, Size, Alignment));
33420fca6ea1SDimitry Andric       MIB.addReg(PnReg);
33430fca6ea1SDimitry Andric       MIB.addReg(AArch64::SP)
33440fca6ea1SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale],
33450fca6ea1SDimitry Andric                               // where factor*scale is implicit
33460fca6ea1SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
33470fca6ea1SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
33480fca6ea1SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
33490fca6ea1SDimitry Andric           MachineMemOperand::MOStore, Size, Alignment));
33500fca6ea1SDimitry Andric       if (NeedsWinCFI)
33510fca6ea1SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameSetup);
33520fca6ea1SDimitry Andric     } else { // The code when the pair of ZReg is not present
33530b57cec5SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
33540b57cec5SDimitry Andric       if (!MRI.isReserved(Reg1))
33550b57cec5SDimitry Andric         MBB.addLiveIn(Reg1);
33560b57cec5SDimitry Andric       if (RPI.isPaired()) {
33570b57cec5SDimitry Andric         if (!MRI.isReserved(Reg2))
33580b57cec5SDimitry Andric           MBB.addLiveIn(Reg2);
33590b57cec5SDimitry Andric         MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
33600b57cec5SDimitry Andric         MIB.addMemOperand(MF.getMachineMemOperand(
33610b57cec5SDimitry Andric             MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
33625ffd83dbSDimitry Andric             MachineMemOperand::MOStore, Size, Alignment));
33630b57cec5SDimitry Andric       }
33640b57cec5SDimitry Andric       MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
33650b57cec5SDimitry Andric           .addReg(AArch64::SP)
33660b57cec5SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale],
33670b57cec5SDimitry Andric                               // where factor*scale is implicit
33680b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
33690b57cec5SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
33700b57cec5SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
33715ffd83dbSDimitry Andric           MachineMemOperand::MOStore, Size, Alignment));
33720b57cec5SDimitry Andric       if (NeedsWinCFI)
33730b57cec5SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameSetup);
33740fca6ea1SDimitry Andric     }
3375480093f4SDimitry Andric     // Update the StackIDs of the SVE stack slots.
3376480093f4SDimitry Andric     MachineFrameInfo &MFI = MF.getFrameInfo();
33770fca6ea1SDimitry Andric     if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
33780fca6ea1SDimitry Andric       MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector);
33790fca6ea1SDimitry Andric       if (RPI.isPaired())
33800fca6ea1SDimitry Andric         MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector);
33810fca6ea1SDimitry Andric     }
3382480093f4SDimitry Andric 
33830fca6ea1SDimitry Andric     if (X0Scratch != AArch64::NoRegister)
33840fca6ea1SDimitry Andric       BuildMI(MBB, MI, DL, TII.get(AArch64::ORRXrr), AArch64::X0)
33850fca6ea1SDimitry Andric           .addReg(AArch64::XZR)
33860fca6ea1SDimitry Andric           .addReg(X0Scratch, RegState::Undef)
33870fca6ea1SDimitry Andric           .addReg(X0Scratch, RegState::Implicit)
33880fca6ea1SDimitry Andric           .setMIFlag(MachineInstr::FrameSetup);
33890b57cec5SDimitry Andric   }
33900b57cec5SDimitry Andric   return true;
33910b57cec5SDimitry Andric }
33920b57cec5SDimitry Andric 
33930b57cec5SDimitry Andric bool AArch64FrameLowering::restoreCalleeSavedRegisters(
339481ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
33955ffd83dbSDimitry Andric     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
33960b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
33970b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
33980b57cec5SDimitry Andric   DebugLoc DL;
33990b57cec5SDimitry Andric   SmallVector<RegPairInfo, 8> RegPairs;
34000b57cec5SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
34010b57cec5SDimitry Andric 
340281ad6265SDimitry Andric   if (MBBI != MBB.end())
340381ad6265SDimitry Andric     DL = MBBI->getDebugLoc();
34040b57cec5SDimitry Andric 
340581ad6265SDimitry Andric   computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF));
34060fca6ea1SDimitry Andric   if (homogeneousPrologEpilog(MF, &MBB)) {
34070fca6ea1SDimitry Andric     auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog))
34080fca6ea1SDimitry Andric                    .setMIFlag(MachineInstr::FrameDestroy);
34090fca6ea1SDimitry Andric     for (auto &RPI : RegPairs) {
34100fca6ea1SDimitry Andric       MIB.addReg(RPI.Reg1, RegState::Define);
34110fca6ea1SDimitry Andric       MIB.addReg(RPI.Reg2, RegState::Define);
34120fca6ea1SDimitry Andric     }
34130fca6ea1SDimitry Andric     return true;
34140fca6ea1SDimitry Andric   }
34150b57cec5SDimitry Andric 
34160fca6ea1SDimitry Andric   // For performance reasons restore SVE register in increasing order
34170fca6ea1SDimitry Andric   auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; };
34180fca6ea1SDimitry Andric   auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR);
34190fca6ea1SDimitry Andric   auto PPREnd = std::find_if_not(PPRBegin, RegPairs.end(), IsPPR);
34200fca6ea1SDimitry Andric   std::reverse(PPRBegin, PPREnd);
34210fca6ea1SDimitry Andric   auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; };
34220fca6ea1SDimitry Andric   auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR);
34230fca6ea1SDimitry Andric   auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR);
34240fca6ea1SDimitry Andric   std::reverse(ZPRBegin, ZPREnd);
34250fca6ea1SDimitry Andric 
34260fca6ea1SDimitry Andric   bool PTrueCreated = false;
34270fca6ea1SDimitry Andric   for (const RegPairInfo &RPI : RegPairs) {
34280b57cec5SDimitry Andric     unsigned Reg1 = RPI.Reg1;
34290b57cec5SDimitry Andric     unsigned Reg2 = RPI.Reg2;
34300b57cec5SDimitry Andric 
34310b57cec5SDimitry Andric     // Issue sequence of restores for cs regs. The last restore may be converted
34320b57cec5SDimitry Andric     // to a post-increment load later by emitEpilogue if the callee-save stack
34330b57cec5SDimitry Andric     // area allocation can't be combined with the local stack area allocation.
34340b57cec5SDimitry Andric     // For example:
34350b57cec5SDimitry Andric     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
34360b57cec5SDimitry Andric     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
34370b57cec5SDimitry Andric     //    ldp     x22, x21, [sp, #0]      // addImm(+0)
34380b57cec5SDimitry Andric     // Note: see comment in spillCalleeSavedRegisters()
34390b57cec5SDimitry Andric     unsigned LdrOpc;
34405ffd83dbSDimitry Andric     unsigned Size;
34415ffd83dbSDimitry Andric     Align Alignment;
34420b57cec5SDimitry Andric     switch (RPI.Type) {
34430b57cec5SDimitry Andric     case RegPairInfo::GPR:
34440b57cec5SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
34450b57cec5SDimitry Andric       Size = 8;
34465ffd83dbSDimitry Andric       Alignment = Align(8);
34470b57cec5SDimitry Andric       break;
34480b57cec5SDimitry Andric     case RegPairInfo::FPR64:
34490b57cec5SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
34500b57cec5SDimitry Andric       Size = 8;
34515ffd83dbSDimitry Andric       Alignment = Align(8);
34520b57cec5SDimitry Andric       break;
34530b57cec5SDimitry Andric     case RegPairInfo::FPR128:
34540b57cec5SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
34550b57cec5SDimitry Andric       Size = 16;
34565ffd83dbSDimitry Andric       Alignment = Align(16);
34570b57cec5SDimitry Andric       break;
3458480093f4SDimitry Andric     case RegPairInfo::ZPR:
34590fca6ea1SDimitry Andric       LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
3460480093f4SDimitry Andric       Size = 16;
34615ffd83dbSDimitry Andric       Alignment = Align(16);
3462480093f4SDimitry Andric       break;
3463480093f4SDimitry Andric     case RegPairInfo::PPR:
3464480093f4SDimitry Andric       LdrOpc = AArch64::LDR_PXI;
3465480093f4SDimitry Andric       Size = 2;
34665ffd83dbSDimitry Andric       Alignment = Align(2);
3467480093f4SDimitry Andric       break;
34680fca6ea1SDimitry Andric     case RegPairInfo::VG:
34690fca6ea1SDimitry Andric       continue;
34700b57cec5SDimitry Andric     }
34710b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
34720b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
34730b57cec5SDimitry Andric                dbgs() << ") -> fi#(" << RPI.FrameIdx;
34740b57cec5SDimitry Andric                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
34750b57cec5SDimitry Andric                dbgs() << ")\n");
34760b57cec5SDimitry Andric 
34770b57cec5SDimitry Andric     // Windows unwind codes require consecutive registers if registers are
34780b57cec5SDimitry Andric     // paired.  Make the switch here, so that the code below will save (x,x+1)
34790b57cec5SDimitry Andric     // and not (x+1,x).
34800b57cec5SDimitry Andric     unsigned FrameIdxReg1 = RPI.FrameIdx;
34810b57cec5SDimitry Andric     unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
34820b57cec5SDimitry Andric     if (NeedsWinCFI && RPI.isPaired()) {
34830b57cec5SDimitry Andric       std::swap(Reg1, Reg2);
34840b57cec5SDimitry Andric       std::swap(FrameIdxReg1, FrameIdxReg2);
34850b57cec5SDimitry Andric     }
34860fca6ea1SDimitry Andric 
34870fca6ea1SDimitry Andric     AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
34880fca6ea1SDimitry Andric     if (RPI.isPaired() && RPI.isScalable()) {
34890fca6ea1SDimitry Andric       [[maybe_unused]] const AArch64Subtarget &Subtarget =
34900fca6ea1SDimitry Andric                               MF.getSubtarget<AArch64Subtarget>();
34910fca6ea1SDimitry Andric       unsigned PnReg = AFI->getPredicateRegForFillSpill();
34920fca6ea1SDimitry Andric       assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
34930fca6ea1SDimitry Andric              "Expects SVE2.1 or SME2 target and a predicate register");
34940fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
34950fca6ea1SDimitry Andric       assert(!(PPRBegin < ZPRBegin) &&
34960fca6ea1SDimitry Andric              "Expected callee save predicate to be handled first");
34970fca6ea1SDimitry Andric #endif
34980fca6ea1SDimitry Andric       if (!PTrueCreated) {
34990fca6ea1SDimitry Andric         PTrueCreated = true;
35000fca6ea1SDimitry Andric         BuildMI(MBB, MBBI, DL, TII.get(AArch64::PTRUE_C_B), PnReg)
35010fca6ea1SDimitry Andric             .setMIFlags(MachineInstr::FrameDestroy);
35020fca6ea1SDimitry Andric       }
350381ad6265SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
35040fca6ea1SDimitry Andric       MIB.addReg(/*PairRegs*/ AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
35050fca6ea1SDimitry Andric                  getDefRegState(true));
35060b57cec5SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
35070b57cec5SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
35085ffd83dbSDimitry Andric           MachineMemOperand::MOLoad, Size, Alignment));
35090fca6ea1SDimitry Andric       MIB.addReg(PnReg);
35100fca6ea1SDimitry Andric       MIB.addReg(AArch64::SP)
35110b57cec5SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale]
35120b57cec5SDimitry Andric                               // where factor*scale is implicit
35130b57cec5SDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
35140b57cec5SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
35150b57cec5SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
35165ffd83dbSDimitry Andric           MachineMemOperand::MOLoad, Size, Alignment));
35170b57cec5SDimitry Andric       if (NeedsWinCFI)
35180b57cec5SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
351981ad6265SDimitry Andric     } else {
35200fca6ea1SDimitry Andric       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc));
35210fca6ea1SDimitry Andric       if (RPI.isPaired()) {
35220fca6ea1SDimitry Andric         MIB.addReg(Reg2, getDefRegState(true));
35230fca6ea1SDimitry Andric         MIB.addMemOperand(MF.getMachineMemOperand(
35240fca6ea1SDimitry Andric             MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
35250fca6ea1SDimitry Andric             MachineMemOperand::MOLoad, Size, Alignment));
35260fca6ea1SDimitry Andric       }
35270fca6ea1SDimitry Andric       MIB.addReg(Reg1, getDefRegState(true));
35280fca6ea1SDimitry Andric       MIB.addReg(AArch64::SP)
35290fca6ea1SDimitry Andric           .addImm(RPI.Offset) // [sp, #offset*scale]
35300fca6ea1SDimitry Andric                               // where factor*scale is implicit
35310fca6ea1SDimitry Andric           .setMIFlag(MachineInstr::FrameDestroy);
35320fca6ea1SDimitry Andric       MIB.addMemOperand(MF.getMachineMemOperand(
35330fca6ea1SDimitry Andric           MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
35340fca6ea1SDimitry Andric           MachineMemOperand::MOLoad, Size, Alignment));
35350fca6ea1SDimitry Andric       if (NeedsWinCFI)
35360fca6ea1SDimitry Andric         InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
35370fca6ea1SDimitry Andric     }
35380fca6ea1SDimitry Andric   }
35390fca6ea1SDimitry Andric   return true;
35400fca6ea1SDimitry Andric }
35410fca6ea1SDimitry Andric 
354262987288SDimitry Andric // Return the FrameID for a MMO.
354362987288SDimitry Andric static std::optional<int> getMMOFrameID(MachineMemOperand *MMO,
35440fca6ea1SDimitry Andric                                         const MachineFrameInfo &MFI) {
35450fca6ea1SDimitry Andric   auto *PSV =
35460fca6ea1SDimitry Andric       dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue());
35470fca6ea1SDimitry Andric   if (PSV)
35480fca6ea1SDimitry Andric     return std::optional<int>(PSV->getFrameIndex());
35490fca6ea1SDimitry Andric 
35500fca6ea1SDimitry Andric   if (MMO->getValue()) {
35510fca6ea1SDimitry Andric     if (auto *Al = dyn_cast<AllocaInst>(getUnderlyingObject(MMO->getValue()))) {
35520fca6ea1SDimitry Andric       for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd();
35530fca6ea1SDimitry Andric            FI++)
35540fca6ea1SDimitry Andric         if (MFI.getObjectAllocation(FI) == Al)
35550fca6ea1SDimitry Andric           return FI;
355681ad6265SDimitry Andric     }
35570b57cec5SDimitry Andric   }
35580b57cec5SDimitry Andric 
35590fca6ea1SDimitry Andric   return std::nullopt;
35600fca6ea1SDimitry Andric }
35610fca6ea1SDimitry Andric 
356262987288SDimitry Andric // Return the FrameID for a Load/Store instruction by looking at the first MMO.
356362987288SDimitry Andric static std::optional<int> getLdStFrameID(const MachineInstr &MI,
356462987288SDimitry Andric                                          const MachineFrameInfo &MFI) {
356562987288SDimitry Andric   if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
356662987288SDimitry Andric     return std::nullopt;
356762987288SDimitry Andric 
356862987288SDimitry Andric   return getMMOFrameID(*MI.memoperands_begin(), MFI);
356962987288SDimitry Andric }
357062987288SDimitry Andric 
35710fca6ea1SDimitry Andric // Check if a Hazard slot is needed for the current function, and if so create
35720fca6ea1SDimitry Andric // one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
35730fca6ea1SDimitry Andric // which can be used to determine if any hazard padding is needed.
35740fca6ea1SDimitry Andric void AArch64FrameLowering::determineStackHazardSlot(
35750fca6ea1SDimitry Andric     MachineFunction &MF, BitVector &SavedRegs) const {
35760fca6ea1SDimitry Andric   if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
35770fca6ea1SDimitry Andric       MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
35780fca6ea1SDimitry Andric     return;
35790fca6ea1SDimitry Andric 
35800fca6ea1SDimitry Andric   // Stack hazards are only needed in streaming functions.
35810fca6ea1SDimitry Andric   SMEAttrs Attrs(MF.getFunction());
35820fca6ea1SDimitry Andric   if (!StackHazardInNonStreaming && Attrs.hasNonStreamingInterfaceAndBody())
35830fca6ea1SDimitry Andric     return;
35840fca6ea1SDimitry Andric 
35850fca6ea1SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
35860fca6ea1SDimitry Andric 
35870fca6ea1SDimitry Andric   // Add a hazard slot if there are any CSR FPR registers, or are any fp-only
35880fca6ea1SDimitry Andric   // stack objects.
35890fca6ea1SDimitry Andric   bool HasFPRCSRs = any_of(SavedRegs.set_bits(), [](unsigned Reg) {
35900fca6ea1SDimitry Andric     return AArch64::FPR64RegClass.contains(Reg) ||
35910fca6ea1SDimitry Andric            AArch64::FPR128RegClass.contains(Reg) ||
35920fca6ea1SDimitry Andric            AArch64::ZPRRegClass.contains(Reg) ||
35930fca6ea1SDimitry Andric            AArch64::PPRRegClass.contains(Reg);
35940fca6ea1SDimitry Andric   });
35950fca6ea1SDimitry Andric   bool HasFPRStackObjects = false;
35960fca6ea1SDimitry Andric   if (!HasFPRCSRs) {
35970fca6ea1SDimitry Andric     std::vector<unsigned> FrameObjects(MFI.getObjectIndexEnd());
35980fca6ea1SDimitry Andric     for (auto &MBB : MF) {
35990fca6ea1SDimitry Andric       for (auto &MI : MBB) {
36000fca6ea1SDimitry Andric         std::optional<int> FI = getLdStFrameID(MI, MFI);
36010fca6ea1SDimitry Andric         if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
36020fca6ea1SDimitry Andric           if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
36030fca6ea1SDimitry Andric               AArch64InstrInfo::isFpOrNEON(MI))
36040fca6ea1SDimitry Andric             FrameObjects[*FI] |= 2;
36050fca6ea1SDimitry Andric           else
36060fca6ea1SDimitry Andric             FrameObjects[*FI] |= 1;
36070fca6ea1SDimitry Andric         }
36080fca6ea1SDimitry Andric       }
36090fca6ea1SDimitry Andric     }
36100fca6ea1SDimitry Andric     HasFPRStackObjects =
36110fca6ea1SDimitry Andric         any_of(FrameObjects, [](unsigned B) { return (B & 3) == 2; });
36120fca6ea1SDimitry Andric   }
36130fca6ea1SDimitry Andric 
36140fca6ea1SDimitry Andric   if (HasFPRCSRs || HasFPRStackObjects) {
36150fca6ea1SDimitry Andric     int ID = MFI.CreateStackObject(StackHazardSize, Align(16), false);
36160fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Created Hazard slot at " << ID << " size "
36170fca6ea1SDimitry Andric                       << StackHazardSize << "\n");
36180fca6ea1SDimitry Andric     MF.getInfo<AArch64FunctionInfo>()->setStackHazardSlotIndex(ID);
36190fca6ea1SDimitry Andric   }
36200b57cec5SDimitry Andric }
36210b57cec5SDimitry Andric 
36220b57cec5SDimitry Andric void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
36230b57cec5SDimitry Andric                                                 BitVector &SavedRegs,
36240b57cec5SDimitry Andric                                                 RegScavenger *RS) const {
36250b57cec5SDimitry Andric   // All calls are tail calls in GHC calling conv, and functions have no
36260b57cec5SDimitry Andric   // prologue/epilogue.
36270b57cec5SDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
36280b57cec5SDimitry Andric     return;
36290b57cec5SDimitry Andric 
36300b57cec5SDimitry Andric   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
36310b57cec5SDimitry Andric   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
36320b57cec5SDimitry Andric       MF.getSubtarget().getRegisterInfo());
36335ffd83dbSDimitry Andric   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
36340b57cec5SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
36350b57cec5SDimitry Andric   unsigned UnspilledCSGPR = AArch64::NoRegister;
36360b57cec5SDimitry Andric   unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
36370b57cec5SDimitry Andric 
36380b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
36390b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
36400b57cec5SDimitry Andric 
36410b57cec5SDimitry Andric   unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
36420b57cec5SDimitry Andric                                 ? RegInfo->getBaseRegister()
36430b57cec5SDimitry Andric                                 : (unsigned)AArch64::NoRegister;
36440b57cec5SDimitry Andric 
36450b57cec5SDimitry Andric   unsigned ExtraCSSpill = 0;
36465f757f3fSDimitry Andric   bool HasUnpairedGPR64 = false;
36470b57cec5SDimitry Andric   // Figure out which callee-saved registers to save/restore.
36480b57cec5SDimitry Andric   for (unsigned i = 0; CSRegs[i]; ++i) {
36490b57cec5SDimitry Andric     const unsigned Reg = CSRegs[i];
36500b57cec5SDimitry Andric 
36510b57cec5SDimitry Andric     // Add the base pointer register to SavedRegs if it is callee-save.
36520b57cec5SDimitry Andric     if (Reg == BasePointerReg)
36530b57cec5SDimitry Andric       SavedRegs.set(Reg);
36540b57cec5SDimitry Andric 
36550b57cec5SDimitry Andric     bool RegUsed = SavedRegs.test(Reg);
3656480093f4SDimitry Andric     unsigned PairedReg = AArch64::NoRegister;
36575f757f3fSDimitry Andric     const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
36585f757f3fSDimitry Andric     if (RegIsGPR64 || AArch64::FPR64RegClass.contains(Reg) ||
36595f757f3fSDimitry Andric         AArch64::FPR128RegClass.contains(Reg)) {
36605f757f3fSDimitry Andric       // Compensate for odd numbers of GP CSRs.
36615f757f3fSDimitry Andric       // For now, all the known cases of odd number of CSRs are of GPRs.
36625f757f3fSDimitry Andric       if (HasUnpairedGPR64)
36635f757f3fSDimitry Andric         PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
36645f757f3fSDimitry Andric       else
3665480093f4SDimitry Andric         PairedReg = CSRegs[i ^ 1];
36665f757f3fSDimitry Andric     }
36675f757f3fSDimitry Andric 
36685f757f3fSDimitry Andric     // If the function requires all the GP registers to save (SavedRegs),
36695f757f3fSDimitry Andric     // and there are an odd number of GP CSRs at the same time (CSRegs),
36705f757f3fSDimitry Andric     // PairedReg could be in a different register class from Reg, which would
36715f757f3fSDimitry Andric     // lead to a FPR (usually D8) accidentally being marked saved.
36725f757f3fSDimitry Andric     if (RegIsGPR64 && !AArch64::GPR64RegClass.contains(PairedReg)) {
36735f757f3fSDimitry Andric       PairedReg = AArch64::NoRegister;
36745f757f3fSDimitry Andric       HasUnpairedGPR64 = true;
36755f757f3fSDimitry Andric     }
36765f757f3fSDimitry Andric     assert(PairedReg == AArch64::NoRegister ||
36775f757f3fSDimitry Andric            AArch64::GPR64RegClass.contains(Reg, PairedReg) ||
36785f757f3fSDimitry Andric            AArch64::FPR64RegClass.contains(Reg, PairedReg) ||
36795f757f3fSDimitry Andric            AArch64::FPR128RegClass.contains(Reg, PairedReg));
3680480093f4SDimitry Andric 
36810b57cec5SDimitry Andric     if (!RegUsed) {
36820b57cec5SDimitry Andric       if (AArch64::GPR64RegClass.contains(Reg) &&
36830b57cec5SDimitry Andric           !RegInfo->isReservedReg(MF, Reg)) {
36840b57cec5SDimitry Andric         UnspilledCSGPR = Reg;
36850b57cec5SDimitry Andric         UnspilledCSGPRPaired = PairedReg;
36860b57cec5SDimitry Andric       }
36870b57cec5SDimitry Andric       continue;
36880b57cec5SDimitry Andric     }
36890b57cec5SDimitry Andric 
36900b57cec5SDimitry Andric     // MachO's compact unwind format relies on all registers being stored in
36910b57cec5SDimitry Andric     // pairs.
36920b57cec5SDimitry Andric     // FIXME: the usual format is actually better if unwinding isn't needed.
3693fe6060f1SDimitry Andric     if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
36940b57cec5SDimitry Andric         !SavedRegs.test(PairedReg)) {
36950b57cec5SDimitry Andric       SavedRegs.set(PairedReg);
36960b57cec5SDimitry Andric       if (AArch64::GPR64RegClass.contains(PairedReg) &&
36970b57cec5SDimitry Andric           !RegInfo->isReservedReg(MF, PairedReg))
36980b57cec5SDimitry Andric         ExtraCSSpill = PairedReg;
36990b57cec5SDimitry Andric     }
37000b57cec5SDimitry Andric   }
37010b57cec5SDimitry Andric 
37025ffd83dbSDimitry Andric   if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&
37035ffd83dbSDimitry Andric       !Subtarget.isTargetWindows()) {
37045ffd83dbSDimitry Andric     // For Windows calling convention on a non-windows OS, where X18 is treated
37055ffd83dbSDimitry Andric     // as reserved, back up X18 when entering non-windows code (marked with the
37065ffd83dbSDimitry Andric     // Windows calling convention) and restore when returning regardless of
37075ffd83dbSDimitry Andric     // whether the individual function uses it - it might call other functions
37085ffd83dbSDimitry Andric     // that clobber it.
37095ffd83dbSDimitry Andric     SavedRegs.set(AArch64::X18);
37105ffd83dbSDimitry Andric   }
37115ffd83dbSDimitry Andric 
37120b57cec5SDimitry Andric   // Calculates the callee saved stack size.
37130b57cec5SDimitry Andric   unsigned CSStackSize = 0;
3714480093f4SDimitry Andric   unsigned SVECSStackSize = 0;
37150b57cec5SDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
37160b57cec5SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
3717480093f4SDimitry Andric   for (unsigned Reg : SavedRegs.set_bits()) {
3718480093f4SDimitry Andric     auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
3719480093f4SDimitry Andric     if (AArch64::PPRRegClass.contains(Reg) ||
3720480093f4SDimitry Andric         AArch64::ZPRRegClass.contains(Reg))
3721480093f4SDimitry Andric       SVECSStackSize += RegSize;
3722480093f4SDimitry Andric     else
3723480093f4SDimitry Andric       CSStackSize += RegSize;
3724480093f4SDimitry Andric   }
37250b57cec5SDimitry Andric 
37260fca6ea1SDimitry Andric   // Increase the callee-saved stack size if the function has streaming mode
37270fca6ea1SDimitry Andric   // changes, as we will need to spill the value of the VG register.
37280fca6ea1SDimitry Andric   // For locally streaming functions, we spill both the streaming and
37290fca6ea1SDimitry Andric   // non-streaming VG value.
37300fca6ea1SDimitry Andric   const Function &F = MF.getFunction();
37310fca6ea1SDimitry Andric   SMEAttrs Attrs(F);
3732*71ac745dSDimitry Andric   if (requiresSaveVG(MF)) {
37330fca6ea1SDimitry Andric     if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
37340fca6ea1SDimitry Andric       CSStackSize += 16;
37350fca6ea1SDimitry Andric     else
37360fca6ea1SDimitry Andric       CSStackSize += 8;
37370fca6ea1SDimitry Andric   }
37380fca6ea1SDimitry Andric 
37390fca6ea1SDimitry Andric   // Determine if a Hazard slot should be used, and increase the CSStackSize by
37400fca6ea1SDimitry Andric   // StackHazardSize if so.
37410fca6ea1SDimitry Andric   determineStackHazardSlot(MF, SavedRegs);
37420fca6ea1SDimitry Andric   if (AFI->hasStackHazardSlotIndex())
37430fca6ea1SDimitry Andric     CSStackSize += StackHazardSize;
37440fca6ea1SDimitry Andric 
37450b57cec5SDimitry Andric   // Save number of saved regs, so we can easily update CSStackSize later.
37460b57cec5SDimitry Andric   unsigned NumSavedRegs = SavedRegs.count();
37470b57cec5SDimitry Andric 
37480b57cec5SDimitry Andric   // The frame record needs to be created by saving the appropriate registers
3749480093f4SDimitry Andric   uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
37500b57cec5SDimitry Andric   if (hasFP(MF) ||
37510b57cec5SDimitry Andric       windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
37520b57cec5SDimitry Andric     SavedRegs.set(AArch64::FP);
37530b57cec5SDimitry Andric     SavedRegs.set(AArch64::LR);
37540b57cec5SDimitry Andric   }
37550b57cec5SDimitry Andric 
37560fca6ea1SDimitry Andric   LLVM_DEBUG({
37570fca6ea1SDimitry Andric     dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
37580fca6ea1SDimitry Andric     for (unsigned Reg : SavedRegs.set_bits())
37590fca6ea1SDimitry Andric       dbgs() << ' ' << printReg(Reg, RegInfo);
37600fca6ea1SDimitry Andric     dbgs() << "\n";
37610fca6ea1SDimitry Andric   });
37620b57cec5SDimitry Andric 
37630b57cec5SDimitry Andric   // If any callee-saved registers are used, the frame cannot be eliminated.
37648bcb0991SDimitry Andric   int64_t SVEStackSize =
3765480093f4SDimitry Andric       alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
37668bcb0991SDimitry Andric   bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
37670b57cec5SDimitry Andric 
37680b57cec5SDimitry Andric   // The CSR spill slots have not been allocated yet, so estimateStackSize
37690b57cec5SDimitry Andric   // won't include them.
37700b57cec5SDimitry Andric   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
37718bcb0991SDimitry Andric 
377206c3fb27SDimitry Andric   // We may address some of the stack above the canonical frame address, either
377306c3fb27SDimitry Andric   // for our own arguments or during a call. Include that in calculating whether
377406c3fb27SDimitry Andric   // we have complicated addressing concerns.
377506c3fb27SDimitry Andric   int64_t CalleeStackUsed = 0;
377606c3fb27SDimitry Andric   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
377706c3fb27SDimitry Andric     int64_t FixedOff = MFI.getObjectOffset(I);
37780fca6ea1SDimitry Andric     if (FixedOff > CalleeStackUsed)
37790fca6ea1SDimitry Andric       CalleeStackUsed = FixedOff;
378006c3fb27SDimitry Andric   }
378106c3fb27SDimitry Andric 
37828bcb0991SDimitry Andric   // Conservatively always assume BigStack when there are SVE spills.
378306c3fb27SDimitry Andric   bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
378406c3fb27SDimitry Andric                                    CalleeStackUsed) > EstimatedStackSizeLimit;
37850b57cec5SDimitry Andric   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
37860b57cec5SDimitry Andric     AFI->setHasStackFrame(true);
37870b57cec5SDimitry Andric 
37880b57cec5SDimitry Andric   // Estimate if we might need to scavenge a register at some point in order
37890b57cec5SDimitry Andric   // to materialize a stack offset. If so, either spill one additional
37900b57cec5SDimitry Andric   // callee-saved register or reserve a special spill slot to facilitate
37910b57cec5SDimitry Andric   // register scavenging. If we already spilled an extra callee-saved register
37920b57cec5SDimitry Andric   // above to keep the number of spills even, we don't need to do anything else
37930b57cec5SDimitry Andric   // here.
37940b57cec5SDimitry Andric   if (BigStack) {
37950b57cec5SDimitry Andric     if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
37960b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
37970b57cec5SDimitry Andric                         << " to get a scratch register.\n");
37980b57cec5SDimitry Andric       SavedRegs.set(UnspilledCSGPR);
37995f757f3fSDimitry Andric       ExtraCSSpill = UnspilledCSGPR;
38005f757f3fSDimitry Andric 
38010b57cec5SDimitry Andric       // MachO's compact unwind format relies on all registers being stored in
38020b57cec5SDimitry Andric       // pairs, so if we need to spill one extra for BigStack, then we need to
38030b57cec5SDimitry Andric       // store the pair.
38045f757f3fSDimitry Andric       if (producePairRegisters(MF)) {
38055f757f3fSDimitry Andric         if (UnspilledCSGPRPaired == AArch64::NoRegister) {
38065f757f3fSDimitry Andric           // Failed to make a pair for compact unwind format, revert spilling.
38075f757f3fSDimitry Andric           if (produceCompactUnwindFrame(MF)) {
38085f757f3fSDimitry Andric             SavedRegs.reset(UnspilledCSGPR);
38095f757f3fSDimitry Andric             ExtraCSSpill = AArch64::NoRegister;
38105f757f3fSDimitry Andric           }
38115f757f3fSDimitry Andric         } else
38120b57cec5SDimitry Andric           SavedRegs.set(UnspilledCSGPRPaired);
38135f757f3fSDimitry Andric       }
38140b57cec5SDimitry Andric     }
38150b57cec5SDimitry Andric 
38160b57cec5SDimitry Andric     // If we didn't find an extra callee-saved register to spill, create
38170b57cec5SDimitry Andric     // an emergency spill slot.
38180b57cec5SDimitry Andric     if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
38190b57cec5SDimitry Andric       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
38200b57cec5SDimitry Andric       const TargetRegisterClass &RC = AArch64::GPR64RegClass;
38210b57cec5SDimitry Andric       unsigned Size = TRI->getSpillSize(RC);
38225ffd83dbSDimitry Andric       Align Alignment = TRI->getSpillAlign(RC);
38235ffd83dbSDimitry Andric       int FI = MFI.CreateStackObject(Size, Alignment, false);
38240b57cec5SDimitry Andric       RS->addScavengingFrameIndex(FI);
38250b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
38260b57cec5SDimitry Andric                         << " as the emergency spill slot.\n");
38270b57cec5SDimitry Andric     }
38280b57cec5SDimitry Andric   }
38290b57cec5SDimitry Andric 
38300b57cec5SDimitry Andric   // Adding the size of additional 64bit GPR saves.
38310b57cec5SDimitry Andric   CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
3832fe6060f1SDimitry Andric 
3833fe6060f1SDimitry Andric   // A Swift asynchronous context extends the frame record with a pointer
3834fe6060f1SDimitry Andric   // directly before FP.
3835fe6060f1SDimitry Andric   if (hasFP(MF) && AFI->hasSwiftAsyncContext())
3836fe6060f1SDimitry Andric     CSStackSize += 8;
3837fe6060f1SDimitry Andric 
3838480093f4SDimitry Andric   uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16);
38390b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
38400fca6ea1SDimitry Andric                     << EstimatedStackSize + AlignedCSStackSize << " bytes.\n");
38410b57cec5SDimitry Andric 
3842480093f4SDimitry Andric   assert((!MFI.isCalleeSavedInfoValid() ||
3843480093f4SDimitry Andric           AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
3844480093f4SDimitry Andric          "Should not invalidate callee saved info");
3845480093f4SDimitry Andric 
38460b57cec5SDimitry Andric   // Round up to register pair alignment to avoid additional SP adjustment
38470b57cec5SDimitry Andric   // instructions.
38480b57cec5SDimitry Andric   AFI->setCalleeSavedStackSize(AlignedCSStackSize);
38490b57cec5SDimitry Andric   AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
3850480093f4SDimitry Andric   AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
38510b57cec5SDimitry Andric }
38520b57cec5SDimitry Andric 
3853e8d8bef9SDimitry Andric bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
3854fe6060f1SDimitry Andric     MachineFunction &MF, const TargetRegisterInfo *RegInfo,
3855fe6060f1SDimitry Andric     std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
3856fe6060f1SDimitry Andric     unsigned &MaxCSFrameIndex) const {
3857e8d8bef9SDimitry Andric   bool NeedsWinCFI = needsWinCFI(MF);
3858e8d8bef9SDimitry Andric   // To match the canonical windows frame layout, reverse the list of
3859e8d8bef9SDimitry Andric   // callee saved registers to get them laid out by PrologEpilogInserter
3860e8d8bef9SDimitry Andric   // in the right order. (PrologEpilogInserter allocates stack objects top
3861e8d8bef9SDimitry Andric   // down. Windows canonical prologs store higher numbered registers at
3862e8d8bef9SDimitry Andric   // the top, thus have the CSI array start from the highest registers.)
3863e8d8bef9SDimitry Andric   if (NeedsWinCFI)
3864e8d8bef9SDimitry Andric     std::reverse(CSI.begin(), CSI.end());
3865fe6060f1SDimitry Andric 
3866fe6060f1SDimitry Andric   if (CSI.empty())
3867fe6060f1SDimitry Andric     return true; // Early exit if no callee saved registers are modified!
3868fe6060f1SDimitry Andric 
3869fe6060f1SDimitry Andric   // Now that we know which registers need to be saved and restored, allocate
3870fe6060f1SDimitry Andric   // stack slots for them.
3871fe6060f1SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
3872fe6060f1SDimitry Andric   auto *AFI = MF.getInfo<AArch64FunctionInfo>();
387381ad6265SDimitry Andric 
387481ad6265SDimitry Andric   bool UsesWinAAPCS = isTargetWindows(MF);
387581ad6265SDimitry Andric   if (UsesWinAAPCS && hasFP(MF) && AFI->hasSwiftAsyncContext()) {
387681ad6265SDimitry Andric     int FrameIdx = MFI.CreateStackObject(8, Align(16), true);
387781ad6265SDimitry Andric     AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
38780fca6ea1SDimitry Andric     if ((unsigned)FrameIdx < MinCSFrameIndex)
38790fca6ea1SDimitry Andric       MinCSFrameIndex = FrameIdx;
38800fca6ea1SDimitry Andric     if ((unsigned)FrameIdx > MaxCSFrameIndex)
38810fca6ea1SDimitry Andric       MaxCSFrameIndex = FrameIdx;
388281ad6265SDimitry Andric   }
388381ad6265SDimitry Andric 
38840fca6ea1SDimitry Andric   // Insert VG into the list of CSRs, immediately before LR if saved.
3885*71ac745dSDimitry Andric   if (requiresSaveVG(MF)) {
38860fca6ea1SDimitry Andric     std::vector<CalleeSavedInfo> VGSaves;
38870fca6ea1SDimitry Andric     SMEAttrs Attrs(MF.getFunction());
38880fca6ea1SDimitry Andric 
38890fca6ea1SDimitry Andric     auto VGInfo = CalleeSavedInfo(AArch64::VG);
38900fca6ea1SDimitry Andric     VGInfo.setRestored(false);
38910fca6ea1SDimitry Andric     VGSaves.push_back(VGInfo);
38920fca6ea1SDimitry Andric 
38930fca6ea1SDimitry Andric     // Add VG again if the function is locally-streaming, as we will spill two
38940fca6ea1SDimitry Andric     // values.
38950fca6ea1SDimitry Andric     if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
38960fca6ea1SDimitry Andric       VGSaves.push_back(VGInfo);
38970fca6ea1SDimitry Andric 
38980fca6ea1SDimitry Andric     bool InsertBeforeLR = false;
38990fca6ea1SDimitry Andric 
39000fca6ea1SDimitry Andric     for (unsigned I = 0; I < CSI.size(); I++)
39010fca6ea1SDimitry Andric       if (CSI[I].getReg() == AArch64::LR) {
39020fca6ea1SDimitry Andric         InsertBeforeLR = true;
39030fca6ea1SDimitry Andric         CSI.insert(CSI.begin() + I, VGSaves.begin(), VGSaves.end());
39040fca6ea1SDimitry Andric         break;
39050fca6ea1SDimitry Andric       }
39060fca6ea1SDimitry Andric 
39070fca6ea1SDimitry Andric     if (!InsertBeforeLR)
39080fca6ea1SDimitry Andric       CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
39090fca6ea1SDimitry Andric   }
39100fca6ea1SDimitry Andric 
39110fca6ea1SDimitry Andric   Register LastReg = 0;
39120fca6ea1SDimitry Andric   int HazardSlotIndex = std::numeric_limits<int>::max();
3913fe6060f1SDimitry Andric   for (auto &CS : CSI) {
3914fe6060f1SDimitry Andric     Register Reg = CS.getReg();
3915fe6060f1SDimitry Andric     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
3916fe6060f1SDimitry Andric 
39170fca6ea1SDimitry Andric     // Create a hazard slot as we switch between GPR and FPR CSRs.
39180fca6ea1SDimitry Andric     if (AFI->hasStackHazardSlotIndex() &&
39190fca6ea1SDimitry Andric         (!LastReg || !AArch64InstrInfo::isFpOrNEON(LastReg)) &&
39200fca6ea1SDimitry Andric         AArch64InstrInfo::isFpOrNEON(Reg)) {
39210fca6ea1SDimitry Andric       assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
39220fca6ea1SDimitry Andric              "Unexpected register order for hazard slot");
39230fca6ea1SDimitry Andric       HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
39240fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
39250fca6ea1SDimitry Andric                         << "\n");
39260fca6ea1SDimitry Andric       AFI->setStackHazardCSRSlotIndex(HazardSlotIndex);
39270fca6ea1SDimitry Andric       if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
39280fca6ea1SDimitry Andric         MinCSFrameIndex = HazardSlotIndex;
39290fca6ea1SDimitry Andric       if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
39300fca6ea1SDimitry Andric         MaxCSFrameIndex = HazardSlotIndex;
39310fca6ea1SDimitry Andric     }
39320fca6ea1SDimitry Andric 
3933fe6060f1SDimitry Andric     unsigned Size = RegInfo->getSpillSize(*RC);
3934fe6060f1SDimitry Andric     Align Alignment(RegInfo->getSpillAlign(*RC));
3935fe6060f1SDimitry Andric     int FrameIdx = MFI.CreateStackObject(Size, Alignment, true);
3936fe6060f1SDimitry Andric     CS.setFrameIdx(FrameIdx);
3937fe6060f1SDimitry Andric 
39380fca6ea1SDimitry Andric     if ((unsigned)FrameIdx < MinCSFrameIndex)
39390fca6ea1SDimitry Andric       MinCSFrameIndex = FrameIdx;
39400fca6ea1SDimitry Andric     if ((unsigned)FrameIdx > MaxCSFrameIndex)
39410fca6ea1SDimitry Andric       MaxCSFrameIndex = FrameIdx;
3942fe6060f1SDimitry Andric 
3943fe6060f1SDimitry Andric     // Grab 8 bytes below FP for the extended asynchronous frame info.
394481ad6265SDimitry Andric     if (hasFP(MF) && AFI->hasSwiftAsyncContext() && !UsesWinAAPCS &&
394581ad6265SDimitry Andric         Reg == AArch64::FP) {
3946fe6060f1SDimitry Andric       FrameIdx = MFI.CreateStackObject(8, Alignment, true);
3947fe6060f1SDimitry Andric       AFI->setSwiftAsyncContextFrameIdx(FrameIdx);
39480fca6ea1SDimitry Andric       if ((unsigned)FrameIdx < MinCSFrameIndex)
39490fca6ea1SDimitry Andric         MinCSFrameIndex = FrameIdx;
39500fca6ea1SDimitry Andric       if ((unsigned)FrameIdx > MaxCSFrameIndex)
39510fca6ea1SDimitry Andric         MaxCSFrameIndex = FrameIdx;
3952fe6060f1SDimitry Andric     }
39530fca6ea1SDimitry Andric     LastReg = Reg;
3954fe6060f1SDimitry Andric   }
39550fca6ea1SDimitry Andric 
39560fca6ea1SDimitry Andric   // Add hazard slot in the case where no FPR CSRs are present.
39570fca6ea1SDimitry Andric   if (AFI->hasStackHazardSlotIndex() &&
39580fca6ea1SDimitry Andric       HazardSlotIndex == std::numeric_limits<int>::max()) {
39590fca6ea1SDimitry Andric     HazardSlotIndex = MFI.CreateStackObject(StackHazardSize, Align(8), true);
39600fca6ea1SDimitry Andric     LLVM_DEBUG(dbgs() << "Created CSR Hazard at slot " << HazardSlotIndex
39610fca6ea1SDimitry Andric                       << "\n");
39620fca6ea1SDimitry Andric     AFI->setStackHazardCSRSlotIndex(HazardSlotIndex);
39630fca6ea1SDimitry Andric     if ((unsigned)HazardSlotIndex < MinCSFrameIndex)
39640fca6ea1SDimitry Andric       MinCSFrameIndex = HazardSlotIndex;
39650fca6ea1SDimitry Andric     if ((unsigned)HazardSlotIndex > MaxCSFrameIndex)
39660fca6ea1SDimitry Andric       MaxCSFrameIndex = HazardSlotIndex;
39670fca6ea1SDimitry Andric   }
39680fca6ea1SDimitry Andric 
3969fe6060f1SDimitry Andric   return true;
3970e8d8bef9SDimitry Andric }
3971e8d8bef9SDimitry Andric 
39720b57cec5SDimitry Andric bool AArch64FrameLowering::enableStackSlotScavenging(
39730b57cec5SDimitry Andric     const MachineFunction &MF) const {
39740b57cec5SDimitry Andric   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
39755f757f3fSDimitry Andric   // If the function has streaming-mode changes, don't scavenge a
39765f757f3fSDimitry Andric   // spillslot in the callee-save area, as that might require an
39775f757f3fSDimitry Andric   // 'addvl' in the streaming-mode-changing call-sequence when the
39785f757f3fSDimitry Andric   // function doesn't use a FP.
39795f757f3fSDimitry Andric   if (AFI->hasStreamingModeChanges() && !hasFP(MF))
39805f757f3fSDimitry Andric     return false;
39810fca6ea1SDimitry Andric   // Don't allow register salvaging with hazard slots, in case it moves objects
39820fca6ea1SDimitry Andric   // into the wrong place.
39830fca6ea1SDimitry Andric   if (AFI->hasStackHazardSlotIndex())
39840fca6ea1SDimitry Andric     return false;
39850b57cec5SDimitry Andric   return AFI->hasCalleeSaveStackFreeSpace();
39860b57cec5SDimitry Andric }
39870b57cec5SDimitry Andric 
3988480093f4SDimitry Andric /// returns true if there are any SVE callee saves.
3989480093f4SDimitry Andric static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
3990480093f4SDimitry Andric                                       int &Min, int &Max) {
3991480093f4SDimitry Andric   Min = std::numeric_limits<int>::max();
3992480093f4SDimitry Andric   Max = std::numeric_limits<int>::min();
3993480093f4SDimitry Andric 
3994480093f4SDimitry Andric   if (!MFI.isCalleeSavedInfoValid())
3995480093f4SDimitry Andric     return false;
3996480093f4SDimitry Andric 
3997480093f4SDimitry Andric   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
3998480093f4SDimitry Andric   for (auto &CS : CSI) {
3999480093f4SDimitry Andric     if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
4000480093f4SDimitry Andric         AArch64::PPRRegClass.contains(CS.getReg())) {
4001480093f4SDimitry Andric       assert((Max == std::numeric_limits<int>::min() ||
4002480093f4SDimitry Andric               Max + 1 == CS.getFrameIdx()) &&
4003480093f4SDimitry Andric              "SVE CalleeSaves are not consecutive");
4004480093f4SDimitry Andric 
4005480093f4SDimitry Andric       Min = std::min(Min, CS.getFrameIdx());
4006480093f4SDimitry Andric       Max = std::max(Max, CS.getFrameIdx());
4007480093f4SDimitry Andric     }
4008480093f4SDimitry Andric   }
4009480093f4SDimitry Andric   return Min != std::numeric_limits<int>::max();
4010480093f4SDimitry Andric }
4011480093f4SDimitry Andric 
4012480093f4SDimitry Andric // Process all the SVE stack objects and determine offsets for each
4013480093f4SDimitry Andric // object. If AssignOffsets is true, the offsets get assigned.
4014480093f4SDimitry Andric // Fills in the first and last callee-saved frame indices into
4015480093f4SDimitry Andric // Min/MaxCSFrameIndex, respectively.
4016480093f4SDimitry Andric // Returns the size of the stack.
4017480093f4SDimitry Andric static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
4018480093f4SDimitry Andric                                               int &MinCSFrameIndex,
4019480093f4SDimitry Andric                                               int &MaxCSFrameIndex,
4020480093f4SDimitry Andric                                               bool AssignOffsets) {
4021979e22ffSDimitry Andric #ifndef NDEBUG
4022480093f4SDimitry Andric   // First process all fixed stack objects.
40238bcb0991SDimitry Andric   for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
4024e8d8bef9SDimitry Andric     assert(MFI.getStackID(I) != TargetStackID::ScalableVector &&
4025979e22ffSDimitry Andric            "SVE vectors should never be passed on the stack by value, only by "
4026979e22ffSDimitry Andric            "reference.");
4027979e22ffSDimitry Andric #endif
40288bcb0991SDimitry Andric 
4029480093f4SDimitry Andric   auto Assign = [&MFI](int FI, int64_t Offset) {
4030480093f4SDimitry Andric     LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
4031480093f4SDimitry Andric     MFI.setObjectOffset(FI, Offset);
4032480093f4SDimitry Andric   };
4033480093f4SDimitry Andric 
4034979e22ffSDimitry Andric   int64_t Offset = 0;
4035979e22ffSDimitry Andric 
4036480093f4SDimitry Andric   // Then process all callee saved slots.
4037480093f4SDimitry Andric   if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
4038480093f4SDimitry Andric     // Assign offsets to the callee save slots.
4039480093f4SDimitry Andric     for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
4040480093f4SDimitry Andric       Offset += MFI.getObjectSize(I);
40415ffd83dbSDimitry Andric       Offset = alignTo(Offset, MFI.getObjectAlign(I));
4042480093f4SDimitry Andric       if (AssignOffsets)
4043480093f4SDimitry Andric         Assign(I, -Offset);
4044480093f4SDimitry Andric     }
4045480093f4SDimitry Andric   }
4046480093f4SDimitry Andric 
4047979e22ffSDimitry Andric   // Ensure that the Callee-save area is aligned to 16bytes.
4048979e22ffSDimitry Andric   Offset = alignTo(Offset, Align(16U));
4049979e22ffSDimitry Andric 
4050480093f4SDimitry Andric   // Create a buffer of SVE objects to allocate and sort it.
4051480093f4SDimitry Andric   SmallVector<int, 8> ObjectsToAllocate;
40520eae32dcSDimitry Andric   // If we have a stack protector, and we've previously decided that we have SVE
40530eae32dcSDimitry Andric   // objects on the stack and thus need it to go in the SVE stack area, then it
40540eae32dcSDimitry Andric   // needs to go first.
40550eae32dcSDimitry Andric   int StackProtectorFI = -1;
40560eae32dcSDimitry Andric   if (MFI.hasStackProtectorIndex()) {
40570eae32dcSDimitry Andric     StackProtectorFI = MFI.getStackProtectorIndex();
40580eae32dcSDimitry Andric     if (MFI.getStackID(StackProtectorFI) == TargetStackID::ScalableVector)
40590eae32dcSDimitry Andric       ObjectsToAllocate.push_back(StackProtectorFI);
40600eae32dcSDimitry Andric   }
4061480093f4SDimitry Andric   for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
4062480093f4SDimitry Andric     unsigned StackID = MFI.getStackID(I);
4063e8d8bef9SDimitry Andric     if (StackID != TargetStackID::ScalableVector)
4064480093f4SDimitry Andric       continue;
40650eae32dcSDimitry Andric     if (I == StackProtectorFI)
40660eae32dcSDimitry Andric       continue;
4067480093f4SDimitry Andric     if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
4068480093f4SDimitry Andric       continue;
4069480093f4SDimitry Andric     if (MFI.isDeadObjectIndex(I))
4070480093f4SDimitry Andric       continue;
4071480093f4SDimitry Andric 
4072480093f4SDimitry Andric     ObjectsToAllocate.push_back(I);
4073480093f4SDimitry Andric   }
4074480093f4SDimitry Andric 
4075480093f4SDimitry Andric   // Allocate all SVE locals and spills
4076480093f4SDimitry Andric   for (unsigned FI : ObjectsToAllocate) {
40775ffd83dbSDimitry Andric     Align Alignment = MFI.getObjectAlign(FI);
4078480093f4SDimitry Andric     // FIXME: Given that the length of SVE vectors is not necessarily a power of
4079480093f4SDimitry Andric     // two, we'd need to align every object dynamically at runtime if the
4080480093f4SDimitry Andric     // alignment is larger than 16. This is not yet supported.
40815ffd83dbSDimitry Andric     if (Alignment > Align(16))
4082480093f4SDimitry Andric       report_fatal_error(
4083480093f4SDimitry Andric           "Alignment of scalable vectors > 16 bytes is not yet supported");
4084480093f4SDimitry Andric 
40855ffd83dbSDimitry Andric     Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
4086480093f4SDimitry Andric     if (AssignOffsets)
4087480093f4SDimitry Andric       Assign(FI, -Offset);
4088480093f4SDimitry Andric   }
4089480093f4SDimitry Andric 
40908bcb0991SDimitry Andric   return Offset;
40918bcb0991SDimitry Andric }
40928bcb0991SDimitry Andric 
4093480093f4SDimitry Andric int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
4094480093f4SDimitry Andric     MachineFrameInfo &MFI) const {
4095480093f4SDimitry Andric   int MinCSFrameIndex, MaxCSFrameIndex;
4096480093f4SDimitry Andric   return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
4097480093f4SDimitry Andric }
4098480093f4SDimitry Andric 
4099480093f4SDimitry Andric int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
4100480093f4SDimitry Andric     MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
4101480093f4SDimitry Andric   return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
4102480093f4SDimitry Andric                                         true);
4103480093f4SDimitry Andric }
4104480093f4SDimitry Andric 
41050b57cec5SDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
41060b57cec5SDimitry Andric     MachineFunction &MF, RegScavenger *RS) const {
41078bcb0991SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
41088bcb0991SDimitry Andric 
41098bcb0991SDimitry Andric   assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
41108bcb0991SDimitry Andric          "Upwards growing stack unsupported");
41118bcb0991SDimitry Andric 
4112480093f4SDimitry Andric   int MinCSFrameIndex, MaxCSFrameIndex;
4113480093f4SDimitry Andric   int64_t SVEStackSize =
4114480093f4SDimitry Andric       assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
41158bcb0991SDimitry Andric 
41168bcb0991SDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
4117480093f4SDimitry Andric   AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
4118480093f4SDimitry Andric   AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
41198bcb0991SDimitry Andric 
41200b57cec5SDimitry Andric   // If this function isn't doing Win64-style C++ EH, we don't need to do
41210b57cec5SDimitry Andric   // anything.
41220b57cec5SDimitry Andric   if (!MF.hasEHFunclets())
41230b57cec5SDimitry Andric     return;
41240b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
41250b57cec5SDimitry Andric   WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
41260b57cec5SDimitry Andric 
41270b57cec5SDimitry Andric   MachineBasicBlock &MBB = MF.front();
41280b57cec5SDimitry Andric   auto MBBI = MBB.begin();
41290b57cec5SDimitry Andric   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
41300b57cec5SDimitry Andric     ++MBBI;
41310b57cec5SDimitry Andric 
41320b57cec5SDimitry Andric   // Create an UnwindHelp object.
413362cfcf62SDimitry Andric   // The UnwindHelp object is allocated at the start of the fixed object area
413462cfcf62SDimitry Andric   int64_t FixedObject =
413562cfcf62SDimitry Andric       getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false);
413662cfcf62SDimitry Andric   int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8,
413762cfcf62SDimitry Andric                                            /*SPOffset*/ -FixedObject,
413862cfcf62SDimitry Andric                                            /*IsImmutable=*/false);
41390b57cec5SDimitry Andric   EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
414062cfcf62SDimitry Andric 
41410b57cec5SDimitry Andric   // We need to store -2 into the UnwindHelp object at the start of the
41420b57cec5SDimitry Andric   // function.
41430b57cec5SDimitry Andric   DebugLoc DL;
41440b57cec5SDimitry Andric   RS->enterBasicBlockEnd(MBB);
41455f757f3fSDimitry Andric   RS->backward(MBBI);
414604eeddc0SDimitry Andric   Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
41470b57cec5SDimitry Andric   assert(DstReg && "There must be a free register after frame setup");
41480b57cec5SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
41490b57cec5SDimitry Andric   BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
41500b57cec5SDimitry Andric       .addReg(DstReg, getKillRegState(true))
41510b57cec5SDimitry Andric       .addFrameIndex(UnwindHelpFI)
41520b57cec5SDimitry Andric       .addImm(0);
41530b57cec5SDimitry Andric }
41540b57cec5SDimitry Andric 
41555ffd83dbSDimitry Andric namespace {
41565ffd83dbSDimitry Andric struct TagStoreInstr {
41575ffd83dbSDimitry Andric   MachineInstr *MI;
41585ffd83dbSDimitry Andric   int64_t Offset, Size;
41595ffd83dbSDimitry Andric   explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
41605ffd83dbSDimitry Andric       : MI(MI), Offset(Offset), Size(Size) {}
41615ffd83dbSDimitry Andric };
41625ffd83dbSDimitry Andric 
41635ffd83dbSDimitry Andric class TagStoreEdit {
41645ffd83dbSDimitry Andric   MachineFunction *MF;
41655ffd83dbSDimitry Andric   MachineBasicBlock *MBB;
41665ffd83dbSDimitry Andric   MachineRegisterInfo *MRI;
41675ffd83dbSDimitry Andric   // Tag store instructions that are being replaced.
41685ffd83dbSDimitry Andric   SmallVector<TagStoreInstr, 8> TagStores;
41695ffd83dbSDimitry Andric   // Combined memref arguments of the above instructions.
41705ffd83dbSDimitry Andric   SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
41715ffd83dbSDimitry Andric 
41725ffd83dbSDimitry Andric   // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
41735ffd83dbSDimitry Andric   // FrameRegOffset + Size) with the address tag of SP.
41745ffd83dbSDimitry Andric   Register FrameReg;
41755ffd83dbSDimitry Andric   StackOffset FrameRegOffset;
41765ffd83dbSDimitry Andric   int64_t Size;
417706c3fb27SDimitry Andric   // If not std::nullopt, move FrameReg to (FrameReg + FrameRegUpdate) at the
417806c3fb27SDimitry Andric   // end.
4179bdd1243dSDimitry Andric   std::optional<int64_t> FrameRegUpdate;
41805ffd83dbSDimitry Andric   // MIFlags for any FrameReg updating instructions.
41815ffd83dbSDimitry Andric   unsigned FrameRegUpdateFlags;
41825ffd83dbSDimitry Andric 
41835ffd83dbSDimitry Andric   // Use zeroing instruction variants.
41845ffd83dbSDimitry Andric   bool ZeroData;
41855ffd83dbSDimitry Andric   DebugLoc DL;
41865ffd83dbSDimitry Andric 
41875ffd83dbSDimitry Andric   void emitUnrolled(MachineBasicBlock::iterator InsertI);
41885ffd83dbSDimitry Andric   void emitLoop(MachineBasicBlock::iterator InsertI);
41895ffd83dbSDimitry Andric 
41905ffd83dbSDimitry Andric public:
41915ffd83dbSDimitry Andric   TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
41925ffd83dbSDimitry Andric       : MBB(MBB), ZeroData(ZeroData) {
41935ffd83dbSDimitry Andric     MF = MBB->getParent();
41945ffd83dbSDimitry Andric     MRI = &MF->getRegInfo();
41955ffd83dbSDimitry Andric   }
41965ffd83dbSDimitry Andric   // Add an instruction to be replaced. Instructions must be added in the
41975ffd83dbSDimitry Andric   // ascending order of Offset, and have to be adjacent.
41985ffd83dbSDimitry Andric   void addInstruction(TagStoreInstr I) {
41995ffd83dbSDimitry Andric     assert((TagStores.empty() ||
42005ffd83dbSDimitry Andric             TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
42015ffd83dbSDimitry Andric            "Non-adjacent tag store instructions.");
42025ffd83dbSDimitry Andric     TagStores.push_back(I);
42035ffd83dbSDimitry Andric   }
42045ffd83dbSDimitry Andric   void clear() { TagStores.clear(); }
42055ffd83dbSDimitry Andric   // Emit equivalent code at the given location, and erase the current set of
42065ffd83dbSDimitry Andric   // instructions. May skip if the replacement is not profitable. May invalidate
42075ffd83dbSDimitry Andric   // the input iterator and replace it with a valid one.
42085ffd83dbSDimitry Andric   void emitCode(MachineBasicBlock::iterator &InsertI,
420981ad6265SDimitry Andric                 const AArch64FrameLowering *TFI, bool TryMergeSPUpdate);
42105ffd83dbSDimitry Andric };
42115ffd83dbSDimitry Andric 
42125ffd83dbSDimitry Andric void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
42135ffd83dbSDimitry Andric   const AArch64InstrInfo *TII =
42145ffd83dbSDimitry Andric       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
42155ffd83dbSDimitry Andric 
42165ffd83dbSDimitry Andric   const int64_t kMinOffset = -256 * 16;
42175ffd83dbSDimitry Andric   const int64_t kMaxOffset = 255 * 16;
42185ffd83dbSDimitry Andric 
42195ffd83dbSDimitry Andric   Register BaseReg = FrameReg;
4220e8d8bef9SDimitry Andric   int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
42215ffd83dbSDimitry Andric   if (BaseRegOffsetBytes < kMinOffset ||
422206c3fb27SDimitry Andric       BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset ||
422306c3fb27SDimitry Andric       // BaseReg can be FP, which is not necessarily aligned to 16-bytes. In
422406c3fb27SDimitry Andric       // that case, BaseRegOffsetBytes will not be aligned to 16 bytes, which
422506c3fb27SDimitry Andric       // is required for the offset of ST2G.
422606c3fb27SDimitry Andric       BaseRegOffsetBytes % 16 != 0) {
42275ffd83dbSDimitry Andric     Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
42285ffd83dbSDimitry Andric     emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
4229e8d8bef9SDimitry Andric                     StackOffset::getFixed(BaseRegOffsetBytes), TII);
42305ffd83dbSDimitry Andric     BaseReg = ScratchReg;
42315ffd83dbSDimitry Andric     BaseRegOffsetBytes = 0;
42325ffd83dbSDimitry Andric   }
42335ffd83dbSDimitry Andric 
42345ffd83dbSDimitry Andric   MachineInstr *LastI = nullptr;
42355ffd83dbSDimitry Andric   while (Size) {
42365ffd83dbSDimitry Andric     int64_t InstrSize = (Size > 16) ? 32 : 16;
42375ffd83dbSDimitry Andric     unsigned Opcode =
42385ffd83dbSDimitry Andric         InstrSize == 16
423906c3fb27SDimitry Andric             ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
424006c3fb27SDimitry Andric             : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
424106c3fb27SDimitry Andric     assert(BaseRegOffsetBytes % 16 == 0);
42425ffd83dbSDimitry Andric     MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
42435ffd83dbSDimitry Andric                           .addReg(AArch64::SP)
42445ffd83dbSDimitry Andric                           .addReg(BaseReg)
42455ffd83dbSDimitry Andric                           .addImm(BaseRegOffsetBytes / 16)
42465ffd83dbSDimitry Andric                           .setMemRefs(CombinedMemRefs);
42475ffd83dbSDimitry Andric     // A store to [BaseReg, #0] should go last for an opportunity to fold the
42485ffd83dbSDimitry Andric     // final SP adjustment in the epilogue.
42495ffd83dbSDimitry Andric     if (BaseRegOffsetBytes == 0)
42505ffd83dbSDimitry Andric       LastI = I;
42515ffd83dbSDimitry Andric     BaseRegOffsetBytes += InstrSize;
42525ffd83dbSDimitry Andric     Size -= InstrSize;
42535ffd83dbSDimitry Andric   }
42545ffd83dbSDimitry Andric 
42555ffd83dbSDimitry Andric   if (LastI)
42565ffd83dbSDimitry Andric     MBB->splice(InsertI, MBB, LastI);
42575ffd83dbSDimitry Andric }
42585ffd83dbSDimitry Andric 
42595ffd83dbSDimitry Andric void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
42605ffd83dbSDimitry Andric   const AArch64InstrInfo *TII =
42615ffd83dbSDimitry Andric       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
42625ffd83dbSDimitry Andric 
42635ffd83dbSDimitry Andric   Register BaseReg = FrameRegUpdate
42645ffd83dbSDimitry Andric                          ? FrameReg
42655ffd83dbSDimitry Andric                          : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
42665ffd83dbSDimitry Andric   Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
42675ffd83dbSDimitry Andric 
42685ffd83dbSDimitry Andric   emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
42695ffd83dbSDimitry Andric 
42705ffd83dbSDimitry Andric   int64_t LoopSize = Size;
42715ffd83dbSDimitry Andric   // If the loop size is not a multiple of 32, split off one 16-byte store at
42725ffd83dbSDimitry Andric   // the end to fold BaseReg update into.
42735ffd83dbSDimitry Andric   if (FrameRegUpdate && *FrameRegUpdate)
42745ffd83dbSDimitry Andric     LoopSize -= LoopSize % 32;
42755ffd83dbSDimitry Andric   MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL,
42765ffd83dbSDimitry Andric                                 TII->get(ZeroData ? AArch64::STZGloop_wback
42775ffd83dbSDimitry Andric                                                   : AArch64::STGloop_wback))
42785ffd83dbSDimitry Andric                             .addDef(SizeReg)
42795ffd83dbSDimitry Andric                             .addDef(BaseReg)
42805ffd83dbSDimitry Andric                             .addImm(LoopSize)
42815ffd83dbSDimitry Andric                             .addReg(BaseReg)
42825ffd83dbSDimitry Andric                             .setMemRefs(CombinedMemRefs);
42835ffd83dbSDimitry Andric   if (FrameRegUpdate)
42845ffd83dbSDimitry Andric     LoopI->setFlags(FrameRegUpdateFlags);
42855ffd83dbSDimitry Andric 
42865ffd83dbSDimitry Andric   int64_t ExtraBaseRegUpdate =
4287e8d8bef9SDimitry Andric       FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
42885ffd83dbSDimitry Andric   if (LoopSize < Size) {
42895ffd83dbSDimitry Andric     assert(FrameRegUpdate);
42905ffd83dbSDimitry Andric     assert(Size - LoopSize == 16);
42915ffd83dbSDimitry Andric     // Tag 16 more bytes at BaseReg and update BaseReg.
42925ffd83dbSDimitry Andric     BuildMI(*MBB, InsertI, DL,
42935ffd83dbSDimitry Andric             TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
42945ffd83dbSDimitry Andric         .addDef(BaseReg)
42955ffd83dbSDimitry Andric         .addReg(BaseReg)
42965ffd83dbSDimitry Andric         .addReg(BaseReg)
42975ffd83dbSDimitry Andric         .addImm(1 + ExtraBaseRegUpdate / 16)
42985ffd83dbSDimitry Andric         .setMemRefs(CombinedMemRefs)
42995ffd83dbSDimitry Andric         .setMIFlags(FrameRegUpdateFlags);
43005ffd83dbSDimitry Andric   } else if (ExtraBaseRegUpdate) {
43015ffd83dbSDimitry Andric     // Update BaseReg.
43025ffd83dbSDimitry Andric     BuildMI(
43035ffd83dbSDimitry Andric         *MBB, InsertI, DL,
43045ffd83dbSDimitry Andric         TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
43055ffd83dbSDimitry Andric         .addDef(BaseReg)
43065ffd83dbSDimitry Andric         .addReg(BaseReg)
43075ffd83dbSDimitry Andric         .addImm(std::abs(ExtraBaseRegUpdate))
43085ffd83dbSDimitry Andric         .addImm(0)
43095ffd83dbSDimitry Andric         .setMIFlags(FrameRegUpdateFlags);
43105ffd83dbSDimitry Andric   }
43115ffd83dbSDimitry Andric }
43125ffd83dbSDimitry Andric 
43135ffd83dbSDimitry Andric // Check if *II is a register update that can be merged into STGloop that ends
43145ffd83dbSDimitry Andric // at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
43155ffd83dbSDimitry Andric // end of the loop.
43165ffd83dbSDimitry Andric bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
43175ffd83dbSDimitry Andric                        int64_t Size, int64_t *TotalOffset) {
43185ffd83dbSDimitry Andric   MachineInstr &MI = *II;
43195ffd83dbSDimitry Andric   if ((MI.getOpcode() == AArch64::ADDXri ||
43205ffd83dbSDimitry Andric        MI.getOpcode() == AArch64::SUBXri) &&
43215ffd83dbSDimitry Andric       MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
43225ffd83dbSDimitry Andric     unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
43235ffd83dbSDimitry Andric     int64_t Offset = MI.getOperand(2).getImm() << Shift;
43245ffd83dbSDimitry Andric     if (MI.getOpcode() == AArch64::SUBXri)
43255ffd83dbSDimitry Andric       Offset = -Offset;
43265ffd83dbSDimitry Andric     int64_t AbsPostOffset = std::abs(Offset - Size);
43275ffd83dbSDimitry Andric     const int64_t kMaxOffset =
43285ffd83dbSDimitry Andric         0xFFF; // Max encoding for unshifted ADDXri / SUBXri
43295ffd83dbSDimitry Andric     if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
43305ffd83dbSDimitry Andric       *TotalOffset = Offset;
43315ffd83dbSDimitry Andric       return true;
43325ffd83dbSDimitry Andric     }
43335ffd83dbSDimitry Andric   }
43345ffd83dbSDimitry Andric   return false;
43355ffd83dbSDimitry Andric }
43365ffd83dbSDimitry Andric 
43375ffd83dbSDimitry Andric void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
43385ffd83dbSDimitry Andric                   SmallVectorImpl<MachineMemOperand *> &MemRefs) {
43395ffd83dbSDimitry Andric   MemRefs.clear();
43405ffd83dbSDimitry Andric   for (auto &TS : TSE) {
43415ffd83dbSDimitry Andric     MachineInstr *MI = TS.MI;
43425ffd83dbSDimitry Andric     // An instruction without memory operands may access anything. Be
43435ffd83dbSDimitry Andric     // conservative and return an empty list.
43445ffd83dbSDimitry Andric     if (MI->memoperands_empty()) {
43455ffd83dbSDimitry Andric       MemRefs.clear();
43465ffd83dbSDimitry Andric       return;
43475ffd83dbSDimitry Andric     }
43485ffd83dbSDimitry Andric     MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
43495ffd83dbSDimitry Andric   }
43505ffd83dbSDimitry Andric }
43515ffd83dbSDimitry Andric 
43525ffd83dbSDimitry Andric void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
435381ad6265SDimitry Andric                             const AArch64FrameLowering *TFI,
435481ad6265SDimitry Andric                             bool TryMergeSPUpdate) {
43555ffd83dbSDimitry Andric   if (TagStores.empty())
43565ffd83dbSDimitry Andric     return;
43575ffd83dbSDimitry Andric   TagStoreInstr &FirstTagStore = TagStores[0];
43585ffd83dbSDimitry Andric   TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
43595ffd83dbSDimitry Andric   Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
43605ffd83dbSDimitry Andric   DL = TagStores[0].MI->getDebugLoc();
43615ffd83dbSDimitry Andric 
43625ffd83dbSDimitry Andric   Register Reg;
43635ffd83dbSDimitry Andric   FrameRegOffset = TFI->resolveFrameOffsetReference(
43645ffd83dbSDimitry Andric       *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
43655ffd83dbSDimitry Andric       /*PreferFP=*/false, /*ForSimm=*/true);
43665ffd83dbSDimitry Andric   FrameReg = Reg;
4367bdd1243dSDimitry Andric   FrameRegUpdate = std::nullopt;
43685ffd83dbSDimitry Andric 
43695ffd83dbSDimitry Andric   mergeMemRefs(TagStores, CombinedMemRefs);
43705ffd83dbSDimitry Andric 
43710fca6ea1SDimitry Andric   LLVM_DEBUG({
43720fca6ea1SDimitry Andric     dbgs() << "Replacing adjacent STG instructions:\n";
43730fca6ea1SDimitry Andric     for (const auto &Instr : TagStores) {
43740fca6ea1SDimitry Andric       dbgs() << "  " << *Instr.MI;
43750fca6ea1SDimitry Andric     }
43760fca6ea1SDimitry Andric   });
43775ffd83dbSDimitry Andric 
43785ffd83dbSDimitry Andric   // Size threshold where a loop becomes shorter than a linear sequence of
43795ffd83dbSDimitry Andric   // tagging instructions.
43805ffd83dbSDimitry Andric   const int kSetTagLoopThreshold = 176;
43815ffd83dbSDimitry Andric   if (Size < kSetTagLoopThreshold) {
43825ffd83dbSDimitry Andric     if (TagStores.size() < 2)
43835ffd83dbSDimitry Andric       return;
43845ffd83dbSDimitry Andric     emitUnrolled(InsertI);
43855ffd83dbSDimitry Andric   } else {
43865ffd83dbSDimitry Andric     MachineInstr *UpdateInstr = nullptr;
438781ad6265SDimitry Andric     int64_t TotalOffset = 0;
438881ad6265SDimitry Andric     if (TryMergeSPUpdate) {
43895ffd83dbSDimitry Andric       // See if we can merge base register update into the STGloop.
43905ffd83dbSDimitry Andric       // This is done in AArch64LoadStoreOptimizer for "normal" stores,
43915ffd83dbSDimitry Andric       // but STGloop is way too unusual for that, and also it only
43925ffd83dbSDimitry Andric       // realistically happens in function epilogue. Also, STGloop is expanded
43935ffd83dbSDimitry Andric       // before that pass.
43945ffd83dbSDimitry Andric       if (InsertI != MBB->end() &&
4395e8d8bef9SDimitry Andric           canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
43965ffd83dbSDimitry Andric                             &TotalOffset)) {
43975ffd83dbSDimitry Andric         UpdateInstr = &*InsertI++;
43985ffd83dbSDimitry Andric         LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n  "
43995ffd83dbSDimitry Andric                           << *UpdateInstr);
44005ffd83dbSDimitry Andric       }
44015ffd83dbSDimitry Andric     }
44025ffd83dbSDimitry Andric 
44035ffd83dbSDimitry Andric     if (!UpdateInstr && TagStores.size() < 2)
44045ffd83dbSDimitry Andric       return;
44055ffd83dbSDimitry Andric 
44065ffd83dbSDimitry Andric     if (UpdateInstr) {
44075ffd83dbSDimitry Andric       FrameRegUpdate = TotalOffset;
44085ffd83dbSDimitry Andric       FrameRegUpdateFlags = UpdateInstr->getFlags();
44095ffd83dbSDimitry Andric     }
44105ffd83dbSDimitry Andric     emitLoop(InsertI);
44115ffd83dbSDimitry Andric     if (UpdateInstr)
44125ffd83dbSDimitry Andric       UpdateInstr->eraseFromParent();
44135ffd83dbSDimitry Andric   }
44145ffd83dbSDimitry Andric 
44155ffd83dbSDimitry Andric   for (auto &TS : TagStores)
44165ffd83dbSDimitry Andric     TS.MI->eraseFromParent();
44175ffd83dbSDimitry Andric }
44185ffd83dbSDimitry Andric 
44195ffd83dbSDimitry Andric bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
44205ffd83dbSDimitry Andric                                         int64_t &Size, bool &ZeroData) {
44215ffd83dbSDimitry Andric   MachineFunction &MF = *MI.getParent()->getParent();
44225ffd83dbSDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
44235ffd83dbSDimitry Andric 
44245ffd83dbSDimitry Andric   unsigned Opcode = MI.getOpcode();
442506c3fb27SDimitry Andric   ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
442606c3fb27SDimitry Andric               Opcode == AArch64::STZ2Gi);
44275ffd83dbSDimitry Andric 
44285ffd83dbSDimitry Andric   if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
44295ffd83dbSDimitry Andric     if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
44305ffd83dbSDimitry Andric       return false;
44315ffd83dbSDimitry Andric     if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
44325ffd83dbSDimitry Andric       return false;
44335ffd83dbSDimitry Andric     Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
44345ffd83dbSDimitry Andric     Size = MI.getOperand(2).getImm();
44355ffd83dbSDimitry Andric     return true;
44365ffd83dbSDimitry Andric   }
44375ffd83dbSDimitry Andric 
443806c3fb27SDimitry Andric   if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
44395ffd83dbSDimitry Andric     Size = 16;
444006c3fb27SDimitry Andric   else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
44415ffd83dbSDimitry Andric     Size = 32;
44425ffd83dbSDimitry Andric   else
44435ffd83dbSDimitry Andric     return false;
44445ffd83dbSDimitry Andric 
44455ffd83dbSDimitry Andric   if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
44465ffd83dbSDimitry Andric     return false;
44475ffd83dbSDimitry Andric 
44485ffd83dbSDimitry Andric   Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
44495ffd83dbSDimitry Andric            16 * MI.getOperand(2).getImm();
44505ffd83dbSDimitry Andric   return true;
44515ffd83dbSDimitry Andric }
44525ffd83dbSDimitry Andric 
44535ffd83dbSDimitry Andric // Detect a run of memory tagging instructions for adjacent stack frame slots,
44545ffd83dbSDimitry Andric // and replace them with a shorter instruction sequence:
44555ffd83dbSDimitry Andric // * replace STG + STG with ST2G
44565ffd83dbSDimitry Andric // * replace STGloop + STGloop with STGloop
44575ffd83dbSDimitry Andric // This code needs to run when stack slot offsets are already known, but before
44585ffd83dbSDimitry Andric // FrameIndex operands in STG instructions are eliminated.
44595ffd83dbSDimitry Andric MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
44605ffd83dbSDimitry Andric                                                 const AArch64FrameLowering *TFI,
44615ffd83dbSDimitry Andric                                                 RegScavenger *RS) {
44625ffd83dbSDimitry Andric   bool FirstZeroData;
44635ffd83dbSDimitry Andric   int64_t Size, Offset;
44645ffd83dbSDimitry Andric   MachineInstr &MI = *II;
44655ffd83dbSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
44665ffd83dbSDimitry Andric   MachineBasicBlock::iterator NextI = ++II;
44675ffd83dbSDimitry Andric   if (&MI == &MBB->instr_back())
44685ffd83dbSDimitry Andric     return II;
44695ffd83dbSDimitry Andric   if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
44705ffd83dbSDimitry Andric     return II;
44715ffd83dbSDimitry Andric 
44725ffd83dbSDimitry Andric   SmallVector<TagStoreInstr, 4> Instrs;
44735ffd83dbSDimitry Andric   Instrs.emplace_back(&MI, Offset, Size);
44745ffd83dbSDimitry Andric 
44755ffd83dbSDimitry Andric   constexpr int kScanLimit = 10;
44765ffd83dbSDimitry Andric   int Count = 0;
44775ffd83dbSDimitry Andric   for (MachineBasicBlock::iterator E = MBB->end();
44785ffd83dbSDimitry Andric        NextI != E && Count < kScanLimit; ++NextI) {
44795ffd83dbSDimitry Andric     MachineInstr &MI = *NextI;
44805ffd83dbSDimitry Andric     bool ZeroData;
44815ffd83dbSDimitry Andric     int64_t Size, Offset;
44825ffd83dbSDimitry Andric     // Collect instructions that update memory tags with a FrameIndex operand
44835ffd83dbSDimitry Andric     // and (when applicable) constant size, and whose output registers are dead
44845ffd83dbSDimitry Andric     // (the latter is almost always the case in practice). Since these
44855ffd83dbSDimitry Andric     // instructions effectively have no inputs or outputs, we are free to skip
44865ffd83dbSDimitry Andric     // any non-aliasing instructions in between without tracking used registers.
44875ffd83dbSDimitry Andric     if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
44885ffd83dbSDimitry Andric       if (ZeroData != FirstZeroData)
44895ffd83dbSDimitry Andric         break;
44905ffd83dbSDimitry Andric       Instrs.emplace_back(&MI, Offset, Size);
44915ffd83dbSDimitry Andric       continue;
44925ffd83dbSDimitry Andric     }
44935ffd83dbSDimitry Andric 
44945ffd83dbSDimitry Andric     // Only count non-transient, non-tagging instructions toward the scan
44955ffd83dbSDimitry Andric     // limit.
44965ffd83dbSDimitry Andric     if (!MI.isTransient())
44975ffd83dbSDimitry Andric       ++Count;
44985ffd83dbSDimitry Andric 
44995ffd83dbSDimitry Andric     // Just in case, stop before the epilogue code starts.
45005ffd83dbSDimitry Andric     if (MI.getFlag(MachineInstr::FrameSetup) ||
45015ffd83dbSDimitry Andric         MI.getFlag(MachineInstr::FrameDestroy))
45025ffd83dbSDimitry Andric       break;
45035ffd83dbSDimitry Andric 
45045ffd83dbSDimitry Andric     // Reject anything that may alias the collected instructions.
45055ffd83dbSDimitry Andric     if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
45065ffd83dbSDimitry Andric       break;
45075ffd83dbSDimitry Andric   }
45085ffd83dbSDimitry Andric 
45095ffd83dbSDimitry Andric   // New code will be inserted after the last tagging instruction we've found.
45105ffd83dbSDimitry Andric   MachineBasicBlock::iterator InsertI = Instrs.back().MI;
45115f757f3fSDimitry Andric 
45125f757f3fSDimitry Andric   // All the gathered stack tag instructions are merged and placed after
45135f757f3fSDimitry Andric   // last tag store in the list. The check should be made if the nzcv
45145f757f3fSDimitry Andric   // flag is live at the point where we are trying to insert. Otherwise
45155f757f3fSDimitry Andric   // the nzcv flag might get clobbered if any stg loops are present.
45165f757f3fSDimitry Andric 
45175f757f3fSDimitry Andric   // FIXME : This approach of bailing out from merge is conservative in
45185f757f3fSDimitry Andric   // some ways like even if stg loops are not present after merge the
45195f757f3fSDimitry Andric   // insert list, this liveness check is done (which is not needed).
45205f757f3fSDimitry Andric   LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
45215f757f3fSDimitry Andric   LiveRegs.addLiveOuts(*MBB);
45225f757f3fSDimitry Andric   for (auto I = MBB->rbegin();; ++I) {
45235f757f3fSDimitry Andric     MachineInstr &MI = *I;
45245f757f3fSDimitry Andric     if (MI == InsertI)
45255f757f3fSDimitry Andric       break;
45265f757f3fSDimitry Andric     LiveRegs.stepBackward(*I);
45275f757f3fSDimitry Andric   }
45285ffd83dbSDimitry Andric   InsertI++;
45295f757f3fSDimitry Andric   if (LiveRegs.contains(AArch64::NZCV))
45305f757f3fSDimitry Andric     return InsertI;
45315ffd83dbSDimitry Andric 
45325ffd83dbSDimitry Andric   llvm::stable_sort(Instrs,
45335ffd83dbSDimitry Andric                     [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
45345ffd83dbSDimitry Andric                       return Left.Offset < Right.Offset;
45355ffd83dbSDimitry Andric                     });
45365ffd83dbSDimitry Andric 
45375ffd83dbSDimitry Andric   // Make sure that we don't have any overlapping stores.
45385ffd83dbSDimitry Andric   int64_t CurOffset = Instrs[0].Offset;
45395ffd83dbSDimitry Andric   for (auto &Instr : Instrs) {
45405ffd83dbSDimitry Andric     if (CurOffset > Instr.Offset)
45415ffd83dbSDimitry Andric       return NextI;
45425ffd83dbSDimitry Andric     CurOffset = Instr.Offset + Instr.Size;
45435ffd83dbSDimitry Andric   }
45445ffd83dbSDimitry Andric 
45455ffd83dbSDimitry Andric   // Find contiguous runs of tagged memory and emit shorter instruction
45465ffd83dbSDimitry Andric   // sequencies for them when possible.
45475ffd83dbSDimitry Andric   TagStoreEdit TSE(MBB, FirstZeroData);
4548bdd1243dSDimitry Andric   std::optional<int64_t> EndOffset;
45495ffd83dbSDimitry Andric   for (auto &Instr : Instrs) {
45505ffd83dbSDimitry Andric     if (EndOffset && *EndOffset != Instr.Offset) {
45515ffd83dbSDimitry Andric       // Found a gap.
455281ad6265SDimitry Andric       TSE.emitCode(InsertI, TFI, /*TryMergeSPUpdate = */ false);
45535ffd83dbSDimitry Andric       TSE.clear();
45545ffd83dbSDimitry Andric     }
45555ffd83dbSDimitry Andric 
45565ffd83dbSDimitry Andric     TSE.addInstruction(Instr);
45575ffd83dbSDimitry Andric     EndOffset = Instr.Offset + Instr.Size;
45585ffd83dbSDimitry Andric   }
45595ffd83dbSDimitry Andric 
4560bdd1243dSDimitry Andric   const MachineFunction *MF = MBB->getParent();
456181ad6265SDimitry Andric   // Multiple FP/SP updates in a loop cannot be described by CFI instructions.
4562bdd1243dSDimitry Andric   TSE.emitCode(
4563bdd1243dSDimitry Andric       InsertI, TFI, /*TryMergeSPUpdate = */
4564bdd1243dSDimitry Andric       !MF->getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(*MF));
45655ffd83dbSDimitry Andric 
45665ffd83dbSDimitry Andric   return InsertI;
45675ffd83dbSDimitry Andric }
45685ffd83dbSDimitry Andric } // namespace
45695ffd83dbSDimitry Andric 
45700fca6ea1SDimitry Andric MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
45710fca6ea1SDimitry Andric                                               const AArch64FrameLowering *TFI) {
45720fca6ea1SDimitry Andric   MachineInstr &MI = *II;
45730fca6ea1SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
45740fca6ea1SDimitry Andric   MachineFunction *MF = MBB->getParent();
45750fca6ea1SDimitry Andric 
45760fca6ea1SDimitry Andric   if (MI.getOpcode() != AArch64::VGSavePseudo &&
45770fca6ea1SDimitry Andric       MI.getOpcode() != AArch64::VGRestorePseudo)
45780fca6ea1SDimitry Andric     return II;
45790fca6ea1SDimitry Andric 
45800fca6ea1SDimitry Andric   SMEAttrs FuncAttrs(MF->getFunction());
45810fca6ea1SDimitry Andric   bool LocallyStreaming =
45820fca6ea1SDimitry Andric       FuncAttrs.hasStreamingBody() && !FuncAttrs.hasStreamingInterface();
45830fca6ea1SDimitry Andric   const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
45840fca6ea1SDimitry Andric   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
45850fca6ea1SDimitry Andric   const AArch64InstrInfo *TII =
45860fca6ea1SDimitry Andric       MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
45870fca6ea1SDimitry Andric 
45880fca6ea1SDimitry Andric   int64_t VGFrameIdx =
45890fca6ea1SDimitry Andric       LocallyStreaming ? AFI->getStreamingVGIdx() : AFI->getVGIdx();
45900fca6ea1SDimitry Andric   assert(VGFrameIdx != std::numeric_limits<int>::max() &&
45910fca6ea1SDimitry Andric          "Expected FrameIdx for VG");
45920fca6ea1SDimitry Andric 
45930fca6ea1SDimitry Andric   unsigned CFIIndex;
45940fca6ea1SDimitry Andric   if (MI.getOpcode() == AArch64::VGSavePseudo) {
45950fca6ea1SDimitry Andric     const MachineFrameInfo &MFI = MF->getFrameInfo();
45960fca6ea1SDimitry Andric     int64_t Offset =
45970fca6ea1SDimitry Andric         MFI.getObjectOffset(VGFrameIdx) - TFI->getOffsetOfLocalArea();
45980fca6ea1SDimitry Andric     CFIIndex = MF->addFrameInst(MCCFIInstruction::createOffset(
45990fca6ea1SDimitry Andric         nullptr, TRI->getDwarfRegNum(AArch64::VG, true), Offset));
46000fca6ea1SDimitry Andric   } else
46010fca6ea1SDimitry Andric     CFIIndex = MF->addFrameInst(MCCFIInstruction::createRestore(
46020fca6ea1SDimitry Andric         nullptr, TRI->getDwarfRegNum(AArch64::VG, true)));
46030fca6ea1SDimitry Andric 
46040fca6ea1SDimitry Andric   MachineInstr *UnwindInst = BuildMI(*MBB, II, II->getDebugLoc(),
46050fca6ea1SDimitry Andric                                      TII->get(TargetOpcode::CFI_INSTRUCTION))
46060fca6ea1SDimitry Andric                                  .addCFIIndex(CFIIndex);
46070fca6ea1SDimitry Andric 
46080fca6ea1SDimitry Andric   MI.eraseFromParent();
46090fca6ea1SDimitry Andric   return UnwindInst->getIterator();
46100fca6ea1SDimitry Andric }
46110fca6ea1SDimitry Andric 
46125ffd83dbSDimitry Andric void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
46135ffd83dbSDimitry Andric     MachineFunction &MF, RegScavenger *RS = nullptr) const {
46145ffd83dbSDimitry Andric   for (auto &BB : MF)
46150fca6ea1SDimitry Andric     for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
4616*71ac745dSDimitry Andric       if (requiresSaveVG(MF))
46170fca6ea1SDimitry Andric         II = emitVGSaveRestore(II, this);
46180fca6ea1SDimitry Andric       if (StackTaggingMergeSetTag)
46195ffd83dbSDimitry Andric         II = tryMergeAdjacentSTG(II, this, RS);
46205ffd83dbSDimitry Andric     }
46210fca6ea1SDimitry Andric }
46225ffd83dbSDimitry Andric 
46235ffd83dbSDimitry Andric /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
46245ffd83dbSDimitry Andric /// before the update.  This is easily retrieved as it is exactly the offset
46255ffd83dbSDimitry Andric /// that is set in processFunctionBeforeFrameFinalized.
4626e8d8bef9SDimitry Andric StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
46275ffd83dbSDimitry Andric     const MachineFunction &MF, int FI, Register &FrameReg,
46280b57cec5SDimitry Andric     bool IgnoreSPUpdates) const {
46290b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
463062cfcf62SDimitry Andric   if (IgnoreSPUpdates) {
46310b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
46320b57cec5SDimitry Andric                       << MFI.getObjectOffset(FI) << "\n");
46330b57cec5SDimitry Andric     FrameReg = AArch64::SP;
4634e8d8bef9SDimitry Andric     return StackOffset::getFixed(MFI.getObjectOffset(FI));
46350b57cec5SDimitry Andric   }
46360b57cec5SDimitry Andric 
4637349cc55cSDimitry Andric   // Go to common code if we cannot provide sp + offset.
4638349cc55cSDimitry Andric   if (MFI.hasVarSizedObjects() ||
4639349cc55cSDimitry Andric       MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
4640349cc55cSDimitry Andric       MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
464162cfcf62SDimitry Andric     return getFrameIndexReference(MF, FI, FrameReg);
4642349cc55cSDimitry Andric 
4643349cc55cSDimitry Andric   FrameReg = AArch64::SP;
4644349cc55cSDimitry Andric   return getStackOffset(MF, MFI.getObjectOffset(FI));
464562cfcf62SDimitry Andric }
464662cfcf62SDimitry Andric 
46470b57cec5SDimitry Andric /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
46480b57cec5SDimitry Andric /// the parent's frame pointer
46490b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
46500b57cec5SDimitry Andric     const MachineFunction &MF) const {
46510b57cec5SDimitry Andric   return 0;
46520b57cec5SDimitry Andric }
46530b57cec5SDimitry Andric 
46540b57cec5SDimitry Andric /// Funclets only need to account for space for the callee saved registers,
46550b57cec5SDimitry Andric /// as the locals are accounted for in the parent's stack frame.
46560b57cec5SDimitry Andric unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
46570b57cec5SDimitry Andric     const MachineFunction &MF) const {
46580b57cec5SDimitry Andric   // This is the size of the pushed CSRs.
46590b57cec5SDimitry Andric   unsigned CSSize =
46600b57cec5SDimitry Andric       MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
46610b57cec5SDimitry Andric   // This is the amount of stack a funclet needs to allocate.
46620b57cec5SDimitry Andric   return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
46635ffd83dbSDimitry Andric                  getStackAlign());
46640b57cec5SDimitry Andric }
4665e8d8bef9SDimitry Andric 
4666e8d8bef9SDimitry Andric namespace {
4667e8d8bef9SDimitry Andric struct FrameObject {
4668e8d8bef9SDimitry Andric   bool IsValid = false;
4669e8d8bef9SDimitry Andric   // Index of the object in MFI.
4670e8d8bef9SDimitry Andric   int ObjectIndex = 0;
4671e8d8bef9SDimitry Andric   // Group ID this object belongs to.
4672e8d8bef9SDimitry Andric   int GroupIndex = -1;
4673e8d8bef9SDimitry Andric   // This object should be placed first (closest to SP).
4674e8d8bef9SDimitry Andric   bool ObjectFirst = false;
4675e8d8bef9SDimitry Andric   // This object's group (which always contains the object with
4676e8d8bef9SDimitry Andric   // ObjectFirst==true) should be placed first.
4677e8d8bef9SDimitry Andric   bool GroupFirst = false;
46780fca6ea1SDimitry Andric 
46790fca6ea1SDimitry Andric   // Used to distinguish between FP and GPR accesses. The values are decided so
46800fca6ea1SDimitry Andric   // that they sort FPR < Hazard < GPR and they can be or'd together.
46810fca6ea1SDimitry Andric   unsigned Accesses = 0;
46820fca6ea1SDimitry Andric   enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };
4683e8d8bef9SDimitry Andric };
4684e8d8bef9SDimitry Andric 
4685e8d8bef9SDimitry Andric class GroupBuilder {
4686e8d8bef9SDimitry Andric   SmallVector<int, 8> CurrentMembers;
4687e8d8bef9SDimitry Andric   int NextGroupIndex = 0;
4688e8d8bef9SDimitry Andric   std::vector<FrameObject> &Objects;
4689e8d8bef9SDimitry Andric 
4690e8d8bef9SDimitry Andric public:
4691e8d8bef9SDimitry Andric   GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
4692e8d8bef9SDimitry Andric   void AddMember(int Index) { CurrentMembers.push_back(Index); }
4693e8d8bef9SDimitry Andric   void EndCurrentGroup() {
4694e8d8bef9SDimitry Andric     if (CurrentMembers.size() > 1) {
4695e8d8bef9SDimitry Andric       // Create a new group with the current member list. This might remove them
4696e8d8bef9SDimitry Andric       // from their pre-existing groups. That's OK, dealing with overlapping
4697e8d8bef9SDimitry Andric       // groups is too hard and unlikely to make a difference.
4698e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "group:");
4699e8d8bef9SDimitry Andric       for (int Index : CurrentMembers) {
4700e8d8bef9SDimitry Andric         Objects[Index].GroupIndex = NextGroupIndex;
4701e8d8bef9SDimitry Andric         LLVM_DEBUG(dbgs() << " " << Index);
4702e8d8bef9SDimitry Andric       }
4703e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "\n");
4704e8d8bef9SDimitry Andric       NextGroupIndex++;
4705e8d8bef9SDimitry Andric     }
4706e8d8bef9SDimitry Andric     CurrentMembers.clear();
4707e8d8bef9SDimitry Andric   }
4708e8d8bef9SDimitry Andric };
4709e8d8bef9SDimitry Andric 
4710e8d8bef9SDimitry Andric bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
4711e8d8bef9SDimitry Andric   // Objects at a lower index are closer to FP; objects at a higher index are
4712e8d8bef9SDimitry Andric   // closer to SP.
4713e8d8bef9SDimitry Andric   //
4714e8d8bef9SDimitry Andric   // For consistency in our comparison, all invalid objects are placed
4715e8d8bef9SDimitry Andric   // at the end. This also allows us to stop walking when we hit the
4716e8d8bef9SDimitry Andric   // first invalid item after it's all sorted.
4717e8d8bef9SDimitry Andric   //
47180fca6ea1SDimitry Andric   // If we want to include a stack hazard region, order FPR accesses < the
47190fca6ea1SDimitry Andric   // hazard object < GPRs accesses in order to create a separation between the
47200fca6ea1SDimitry Andric   // two. For the Accesses field 1 = FPR, 2 = Hazard Object, 4 = GPR.
47210fca6ea1SDimitry Andric   //
47220fca6ea1SDimitry Andric   // Otherwise the "first" object goes first (closest to SP), followed by the
47230fca6ea1SDimitry Andric   // members of the "first" group.
4724e8d8bef9SDimitry Andric   //
4725e8d8bef9SDimitry Andric   // The rest are sorted by the group index to keep the groups together.
4726e8d8bef9SDimitry Andric   // Higher numbered groups are more likely to be around longer (i.e. untagged
4727e8d8bef9SDimitry Andric   // in the function epilogue and not at some earlier point). Place them closer
4728e8d8bef9SDimitry Andric   // to SP.
4729e8d8bef9SDimitry Andric   //
4730e8d8bef9SDimitry Andric   // If all else equal, sort by the object index to keep the objects in the
4731e8d8bef9SDimitry Andric   // original order.
47320fca6ea1SDimitry Andric   return std::make_tuple(!A.IsValid, A.Accesses, A.ObjectFirst, A.GroupFirst,
47330fca6ea1SDimitry Andric                          A.GroupIndex, A.ObjectIndex) <
47340fca6ea1SDimitry Andric          std::make_tuple(!B.IsValid, B.Accesses, B.ObjectFirst, B.GroupFirst,
47350fca6ea1SDimitry Andric                          B.GroupIndex, B.ObjectIndex);
4736e8d8bef9SDimitry Andric }
4737e8d8bef9SDimitry Andric } // namespace
4738e8d8bef9SDimitry Andric 
4739e8d8bef9SDimitry Andric void AArch64FrameLowering::orderFrameObjects(
4740e8d8bef9SDimitry Andric     const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4741e8d8bef9SDimitry Andric   if (!OrderFrameObjects || ObjectsToAllocate.empty())
4742e8d8bef9SDimitry Andric     return;
4743e8d8bef9SDimitry Andric 
47440fca6ea1SDimitry Andric   const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
4745e8d8bef9SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
4746e8d8bef9SDimitry Andric   std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
4747e8d8bef9SDimitry Andric   for (auto &Obj : ObjectsToAllocate) {
4748e8d8bef9SDimitry Andric     FrameObjects[Obj].IsValid = true;
4749e8d8bef9SDimitry Andric     FrameObjects[Obj].ObjectIndex = Obj;
4750e8d8bef9SDimitry Andric   }
4751e8d8bef9SDimitry Andric 
47520fca6ea1SDimitry Andric   // Identify FPR vs GPR slots for hazards, and stack slots that are tagged at
47530fca6ea1SDimitry Andric   // the same time.
4754e8d8bef9SDimitry Andric   GroupBuilder GB(FrameObjects);
4755e8d8bef9SDimitry Andric   for (auto &MBB : MF) {
4756e8d8bef9SDimitry Andric     for (auto &MI : MBB) {
4757e8d8bef9SDimitry Andric       if (MI.isDebugInstr())
4758e8d8bef9SDimitry Andric         continue;
47590fca6ea1SDimitry Andric 
47600fca6ea1SDimitry Andric       if (AFI.hasStackHazardSlotIndex()) {
47610fca6ea1SDimitry Andric         std::optional<int> FI = getLdStFrameID(MI, MFI);
47620fca6ea1SDimitry Andric         if (FI && *FI >= 0 && *FI < (int)FrameObjects.size()) {
47630fca6ea1SDimitry Andric           if (MFI.getStackID(*FI) == TargetStackID::ScalableVector ||
47640fca6ea1SDimitry Andric               AArch64InstrInfo::isFpOrNEON(MI))
47650fca6ea1SDimitry Andric             FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
47660fca6ea1SDimitry Andric           else
47670fca6ea1SDimitry Andric             FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
47680fca6ea1SDimitry Andric         }
47690fca6ea1SDimitry Andric       }
47700fca6ea1SDimitry Andric 
4771e8d8bef9SDimitry Andric       int OpIndex;
4772e8d8bef9SDimitry Andric       switch (MI.getOpcode()) {
4773e8d8bef9SDimitry Andric       case AArch64::STGloop:
4774e8d8bef9SDimitry Andric       case AArch64::STZGloop:
4775e8d8bef9SDimitry Andric         OpIndex = 3;
4776e8d8bef9SDimitry Andric         break;
477706c3fb27SDimitry Andric       case AArch64::STGi:
477806c3fb27SDimitry Andric       case AArch64::STZGi:
477906c3fb27SDimitry Andric       case AArch64::ST2Gi:
478006c3fb27SDimitry Andric       case AArch64::STZ2Gi:
4781e8d8bef9SDimitry Andric         OpIndex = 1;
4782e8d8bef9SDimitry Andric         break;
4783e8d8bef9SDimitry Andric       default:
4784e8d8bef9SDimitry Andric         OpIndex = -1;
4785e8d8bef9SDimitry Andric       }
4786e8d8bef9SDimitry Andric 
4787e8d8bef9SDimitry Andric       int TaggedFI = -1;
4788e8d8bef9SDimitry Andric       if (OpIndex >= 0) {
4789e8d8bef9SDimitry Andric         const MachineOperand &MO = MI.getOperand(OpIndex);
4790e8d8bef9SDimitry Andric         if (MO.isFI()) {
4791e8d8bef9SDimitry Andric           int FI = MO.getIndex();
4792e8d8bef9SDimitry Andric           if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
4793e8d8bef9SDimitry Andric               FrameObjects[FI].IsValid)
4794e8d8bef9SDimitry Andric             TaggedFI = FI;
4795e8d8bef9SDimitry Andric         }
4796e8d8bef9SDimitry Andric       }
4797e8d8bef9SDimitry Andric 
4798e8d8bef9SDimitry Andric       // If this is a stack tagging instruction for a slot that is not part of a
4799e8d8bef9SDimitry Andric       // group yet, either start a new group or add it to the current one.
4800e8d8bef9SDimitry Andric       if (TaggedFI >= 0)
4801e8d8bef9SDimitry Andric         GB.AddMember(TaggedFI);
4802e8d8bef9SDimitry Andric       else
4803e8d8bef9SDimitry Andric         GB.EndCurrentGroup();
4804e8d8bef9SDimitry Andric     }
4805e8d8bef9SDimitry Andric     // Groups should never span multiple basic blocks.
4806e8d8bef9SDimitry Andric     GB.EndCurrentGroup();
4807e8d8bef9SDimitry Andric   }
4808e8d8bef9SDimitry Andric 
48090fca6ea1SDimitry Andric   if (AFI.hasStackHazardSlotIndex()) {
48100fca6ea1SDimitry Andric     FrameObjects[AFI.getStackHazardSlotIndex()].Accesses =
48110fca6ea1SDimitry Andric         FrameObject::AccessHazard;
48120fca6ea1SDimitry Andric     // If a stack object is unknown or both GPR and FPR, sort it into GPR.
48130fca6ea1SDimitry Andric     for (auto &Obj : FrameObjects)
48140fca6ea1SDimitry Andric       if (!Obj.Accesses ||
48150fca6ea1SDimitry Andric           Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
48160fca6ea1SDimitry Andric         Obj.Accesses = FrameObject::AccessGPR;
48170fca6ea1SDimitry Andric   }
48180fca6ea1SDimitry Andric 
4819e8d8bef9SDimitry Andric   // If the function's tagged base pointer is pinned to a stack slot, we want to
4820e8d8bef9SDimitry Andric   // put that slot first when possible. This will likely place it at SP + 0,
4821e8d8bef9SDimitry Andric   // and save one instruction when generating the base pointer because IRG does
4822e8d8bef9SDimitry Andric   // not allow an immediate offset.
4823bdd1243dSDimitry Andric   std::optional<int> TBPI = AFI.getTaggedBasePointerIndex();
4824e8d8bef9SDimitry Andric   if (TBPI) {
4825e8d8bef9SDimitry Andric     FrameObjects[*TBPI].ObjectFirst = true;
4826e8d8bef9SDimitry Andric     FrameObjects[*TBPI].GroupFirst = true;
4827e8d8bef9SDimitry Andric     int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
4828e8d8bef9SDimitry Andric     if (FirstGroupIndex >= 0)
4829e8d8bef9SDimitry Andric       for (FrameObject &Object : FrameObjects)
4830e8d8bef9SDimitry Andric         if (Object.GroupIndex == FirstGroupIndex)
4831e8d8bef9SDimitry Andric           Object.GroupFirst = true;
4832e8d8bef9SDimitry Andric   }
4833e8d8bef9SDimitry Andric 
4834e8d8bef9SDimitry Andric   llvm::stable_sort(FrameObjects, FrameObjectCompare);
4835e8d8bef9SDimitry Andric 
4836e8d8bef9SDimitry Andric   int i = 0;
4837e8d8bef9SDimitry Andric   for (auto &Obj : FrameObjects) {
4838e8d8bef9SDimitry Andric     // All invalid items are sorted at the end, so it's safe to stop.
4839e8d8bef9SDimitry Andric     if (!Obj.IsValid)
4840e8d8bef9SDimitry Andric       break;
4841e8d8bef9SDimitry Andric     ObjectsToAllocate[i++] = Obj.ObjectIndex;
4842e8d8bef9SDimitry Andric   }
4843e8d8bef9SDimitry Andric 
48440fca6ea1SDimitry Andric   LLVM_DEBUG({
48450fca6ea1SDimitry Andric     dbgs() << "Final frame order:\n";
48460fca6ea1SDimitry Andric     for (auto &Obj : FrameObjects) {
4847e8d8bef9SDimitry Andric       if (!Obj.IsValid)
4848e8d8bef9SDimitry Andric         break;
4849e8d8bef9SDimitry Andric       dbgs() << "  " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
4850e8d8bef9SDimitry Andric       if (Obj.ObjectFirst)
4851e8d8bef9SDimitry Andric         dbgs() << ", first";
4852e8d8bef9SDimitry Andric       if (Obj.GroupFirst)
4853e8d8bef9SDimitry Andric         dbgs() << ", group-first";
4854e8d8bef9SDimitry Andric       dbgs() << "\n";
48550fca6ea1SDimitry Andric     }
4856e8d8bef9SDimitry Andric   });
4857e8d8bef9SDimitry Andric }
48585f757f3fSDimitry Andric 
48595f757f3fSDimitry Andric /// Emit a loop to decrement SP until it is equal to TargetReg, with probes at
48605f757f3fSDimitry Andric /// least every ProbeSize bytes. Returns an iterator of the first instruction
48615f757f3fSDimitry Andric /// after the loop. The difference between SP and TargetReg must be an exact
48625f757f3fSDimitry Andric /// multiple of ProbeSize.
48635f757f3fSDimitry Andric MachineBasicBlock::iterator
48645f757f3fSDimitry Andric AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
48655f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, int64_t ProbeSize,
48665f757f3fSDimitry Andric     Register TargetReg) const {
48675f757f3fSDimitry Andric   MachineBasicBlock &MBB = *MBBI->getParent();
48685f757f3fSDimitry Andric   MachineFunction &MF = *MBB.getParent();
48695f757f3fSDimitry Andric   const AArch64InstrInfo *TII =
48705f757f3fSDimitry Andric       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
48715f757f3fSDimitry Andric   DebugLoc DL = MBB.findDebugLoc(MBBI);
48725f757f3fSDimitry Andric 
48735f757f3fSDimitry Andric   MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
48745f757f3fSDimitry Andric   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
48755f757f3fSDimitry Andric   MF.insert(MBBInsertPoint, LoopMBB);
48765f757f3fSDimitry Andric   MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
48775f757f3fSDimitry Andric   MF.insert(MBBInsertPoint, ExitMBB);
48785f757f3fSDimitry Andric 
48795f757f3fSDimitry Andric   // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not encodable
48805f757f3fSDimitry Andric   // in SUB).
48815f757f3fSDimitry Andric   emitFrameOffset(*LoopMBB, LoopMBB->end(), DL, AArch64::SP, AArch64::SP,
48825f757f3fSDimitry Andric                   StackOffset::getFixed(-ProbeSize), TII,
48835f757f3fSDimitry Andric                   MachineInstr::FrameSetup);
48845f757f3fSDimitry Andric   // STR XZR, [SP]
48855f757f3fSDimitry Andric   BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::STRXui))
48865f757f3fSDimitry Andric       .addReg(AArch64::XZR)
48875f757f3fSDimitry Andric       .addReg(AArch64::SP)
48885f757f3fSDimitry Andric       .addImm(0)
48895f757f3fSDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
48905f757f3fSDimitry Andric   // CMP SP, TargetReg
48915f757f3fSDimitry Andric   BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
48925f757f3fSDimitry Andric           AArch64::XZR)
48935f757f3fSDimitry Andric       .addReg(AArch64::SP)
48945f757f3fSDimitry Andric       .addReg(TargetReg)
48955f757f3fSDimitry Andric       .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
48965f757f3fSDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
48975f757f3fSDimitry Andric   // B.CC Loop
48985f757f3fSDimitry Andric   BuildMI(*LoopMBB, LoopMBB->end(), DL, TII->get(AArch64::Bcc))
48995f757f3fSDimitry Andric       .addImm(AArch64CC::NE)
49005f757f3fSDimitry Andric       .addMBB(LoopMBB)
49015f757f3fSDimitry Andric       .setMIFlags(MachineInstr::FrameSetup);
49025f757f3fSDimitry Andric 
49035f757f3fSDimitry Andric   LoopMBB->addSuccessor(ExitMBB);
49045f757f3fSDimitry Andric   LoopMBB->addSuccessor(LoopMBB);
49055f757f3fSDimitry Andric   // Synthesize the exit MBB.
49065f757f3fSDimitry Andric   ExitMBB->splice(ExitMBB->end(), &MBB, MBBI, MBB.end());
49075f757f3fSDimitry Andric   ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
49085f757f3fSDimitry Andric   MBB.addSuccessor(LoopMBB);
49095f757f3fSDimitry Andric   // Update liveins.
49100fca6ea1SDimitry Andric   fullyRecomputeLiveIns({ExitMBB, LoopMBB});
49115f757f3fSDimitry Andric 
49125f757f3fSDimitry Andric   return ExitMBB->begin();
49135f757f3fSDimitry Andric }
49145f757f3fSDimitry Andric 
49155f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbeFixed(
49165f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, Register ScratchReg, int64_t FrameSize,
49175f757f3fSDimitry Andric     StackOffset CFAOffset) const {
49185f757f3fSDimitry Andric   MachineBasicBlock *MBB = MBBI->getParent();
49195f757f3fSDimitry Andric   MachineFunction &MF = *MBB->getParent();
49205f757f3fSDimitry Andric   const AArch64InstrInfo *TII =
49215f757f3fSDimitry Andric       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
49225f757f3fSDimitry Andric   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
49235f757f3fSDimitry Andric   bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
49245f757f3fSDimitry Andric   bool HasFP = hasFP(MF);
49255f757f3fSDimitry Andric 
49265f757f3fSDimitry Andric   DebugLoc DL;
49275f757f3fSDimitry Andric   int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
49285f757f3fSDimitry Andric   int64_t NumBlocks = FrameSize / ProbeSize;
49295f757f3fSDimitry Andric   int64_t ResidualSize = FrameSize % ProbeSize;
49305f757f3fSDimitry Andric 
49315f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "Stack probing: total " << FrameSize << " bytes, "
49325f757f3fSDimitry Andric                     << NumBlocks << " blocks of " << ProbeSize
49335f757f3fSDimitry Andric                     << " bytes, plus " << ResidualSize << " bytes\n");
49345f757f3fSDimitry Andric 
49355f757f3fSDimitry Andric   // Decrement SP by NumBlock * ProbeSize bytes, with either unrolled or
49365f757f3fSDimitry Andric   // ordinary loop.
49375f757f3fSDimitry Andric   if (NumBlocks <= AArch64::StackProbeMaxLoopUnroll) {
49385f757f3fSDimitry Andric     for (int i = 0; i < NumBlocks; ++i) {
49395f757f3fSDimitry Andric       // SUB SP, SP, #ProbeSize (or equivalent if ProbeSize is not
49405f757f3fSDimitry Andric       // encodable in a SUB).
49415f757f3fSDimitry Andric       emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
49425f757f3fSDimitry Andric                       StackOffset::getFixed(-ProbeSize), TII,
49435f757f3fSDimitry Andric                       MachineInstr::FrameSetup, false, false, nullptr,
49445f757f3fSDimitry Andric                       EmitAsyncCFI && !HasFP, CFAOffset);
49455f757f3fSDimitry Andric       CFAOffset += StackOffset::getFixed(ProbeSize);
49465f757f3fSDimitry Andric       // STR XZR, [SP]
49475f757f3fSDimitry Andric       BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
49485f757f3fSDimitry Andric           .addReg(AArch64::XZR)
49495f757f3fSDimitry Andric           .addReg(AArch64::SP)
49505f757f3fSDimitry Andric           .addImm(0)
49515f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
49525f757f3fSDimitry Andric     }
49535f757f3fSDimitry Andric   } else if (NumBlocks != 0) {
49545f757f3fSDimitry Andric     // SUB ScratchReg, SP, #FrameSize (or equivalent if FrameSize is not
49555f757f3fSDimitry Andric     // encodable in ADD). ScrathReg may temporarily become the CFA register.
49565f757f3fSDimitry Andric     emitFrameOffset(*MBB, MBBI, DL, ScratchReg, AArch64::SP,
49575f757f3fSDimitry Andric                     StackOffset::getFixed(-ProbeSize * NumBlocks), TII,
49585f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, false, nullptr,
49595f757f3fSDimitry Andric                     EmitAsyncCFI && !HasFP, CFAOffset);
49605f757f3fSDimitry Andric     CFAOffset += StackOffset::getFixed(ProbeSize * NumBlocks);
49615f757f3fSDimitry Andric     MBBI = inlineStackProbeLoopExactMultiple(MBBI, ProbeSize, ScratchReg);
49625f757f3fSDimitry Andric     MBB = MBBI->getParent();
49635f757f3fSDimitry Andric     if (EmitAsyncCFI && !HasFP) {
49645f757f3fSDimitry Andric       // Set the CFA register back to SP.
49655f757f3fSDimitry Andric       const AArch64RegisterInfo &RegInfo =
49665f757f3fSDimitry Andric           *MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
49675f757f3fSDimitry Andric       unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP, true);
49685f757f3fSDimitry Andric       unsigned CFIIndex =
49695f757f3fSDimitry Andric           MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
49705f757f3fSDimitry Andric       BuildMI(*MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
49715f757f3fSDimitry Andric           .addCFIIndex(CFIIndex)
49725f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
49735f757f3fSDimitry Andric     }
49745f757f3fSDimitry Andric   }
49755f757f3fSDimitry Andric 
49765f757f3fSDimitry Andric   if (ResidualSize != 0) {
49775f757f3fSDimitry Andric     // SUB SP, SP, #ResidualSize (or equivalent if ResidualSize is not encodable
49785f757f3fSDimitry Andric     // in SUB).
49795f757f3fSDimitry Andric     emitFrameOffset(*MBB, MBBI, DL, AArch64::SP, AArch64::SP,
49805f757f3fSDimitry Andric                     StackOffset::getFixed(-ResidualSize), TII,
49815f757f3fSDimitry Andric                     MachineInstr::FrameSetup, false, false, nullptr,
49825f757f3fSDimitry Andric                     EmitAsyncCFI && !HasFP, CFAOffset);
49835f757f3fSDimitry Andric     if (ResidualSize > AArch64::StackProbeMaxUnprobedStack) {
49845f757f3fSDimitry Andric       // STR XZR, [SP]
49855f757f3fSDimitry Andric       BuildMI(*MBB, MBBI, DL, TII->get(AArch64::STRXui))
49865f757f3fSDimitry Andric           .addReg(AArch64::XZR)
49875f757f3fSDimitry Andric           .addReg(AArch64::SP)
49885f757f3fSDimitry Andric           .addImm(0)
49895f757f3fSDimitry Andric           .setMIFlags(MachineInstr::FrameSetup);
49905f757f3fSDimitry Andric     }
49915f757f3fSDimitry Andric   }
49925f757f3fSDimitry Andric }
49935f757f3fSDimitry Andric 
49945f757f3fSDimitry Andric void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
49955f757f3fSDimitry Andric                                             MachineBasicBlock &MBB) const {
49965f757f3fSDimitry Andric   // Get the instructions that need to be replaced. We emit at most two of
49975f757f3fSDimitry Andric   // these. Remember them in order to avoid complications coming from the need
49985f757f3fSDimitry Andric   // to traverse the block while potentially creating more blocks.
49995f757f3fSDimitry Andric   SmallVector<MachineInstr *, 4> ToReplace;
50005f757f3fSDimitry Andric   for (MachineInstr &MI : MBB)
50015f757f3fSDimitry Andric     if (MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
50025f757f3fSDimitry Andric         MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
50035f757f3fSDimitry Andric       ToReplace.push_back(&MI);
50045f757f3fSDimitry Andric 
50055f757f3fSDimitry Andric   for (MachineInstr *MI : ToReplace) {
50065f757f3fSDimitry Andric     if (MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
50075f757f3fSDimitry Andric       Register ScratchReg = MI->getOperand(0).getReg();
50085f757f3fSDimitry Andric       int64_t FrameSize = MI->getOperand(1).getImm();
50095f757f3fSDimitry Andric       StackOffset CFAOffset = StackOffset::get(MI->getOperand(2).getImm(),
50105f757f3fSDimitry Andric                                                MI->getOperand(3).getImm());
50115f757f3fSDimitry Andric       inlineStackProbeFixed(MI->getIterator(), ScratchReg, FrameSize,
50125f757f3fSDimitry Andric                             CFAOffset);
50135f757f3fSDimitry Andric     } else {
50145f757f3fSDimitry Andric       assert(MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
50155f757f3fSDimitry Andric              "Stack probe pseudo-instruction expected");
50165f757f3fSDimitry Andric       const AArch64InstrInfo *TII =
50175f757f3fSDimitry Andric           MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
50185f757f3fSDimitry Andric       Register TargetReg = MI->getOperand(0).getReg();
50195f757f3fSDimitry Andric       (void)TII->probedStackAlloc(MI->getIterator(), TargetReg, true);
50205f757f3fSDimitry Andric     }
50215f757f3fSDimitry Andric     MI->eraseFromParent();
50225f757f3fSDimitry Andric   }
50235f757f3fSDimitry Andric }
502462987288SDimitry Andric 
502562987288SDimitry Andric struct StackAccess {
502662987288SDimitry Andric   enum AccessType {
502762987288SDimitry Andric     NotAccessed = 0, // Stack object not accessed by load/store instructions.
502862987288SDimitry Andric     GPR = 1 << 0,    // A general purpose register.
502962987288SDimitry Andric     PPR = 1 << 1,    // A predicate register.
503062987288SDimitry Andric     FPR = 1 << 2,    // A floating point/Neon/SVE register.
503162987288SDimitry Andric   };
503262987288SDimitry Andric 
503362987288SDimitry Andric   int Idx;
503462987288SDimitry Andric   StackOffset Offset;
503562987288SDimitry Andric   int64_t Size;
503662987288SDimitry Andric   unsigned AccessTypes;
503762987288SDimitry Andric 
503862987288SDimitry Andric   StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {}
503962987288SDimitry Andric 
504062987288SDimitry Andric   bool operator<(const StackAccess &Rhs) const {
504162987288SDimitry Andric     return std::make_tuple(start(), Idx) <
504262987288SDimitry Andric            std::make_tuple(Rhs.start(), Rhs.Idx);
504362987288SDimitry Andric   }
504462987288SDimitry Andric 
504562987288SDimitry Andric   bool isCPU() const {
504662987288SDimitry Andric     // Predicate register load and store instructions execute on the CPU.
504762987288SDimitry Andric     return AccessTypes & (AccessType::GPR | AccessType::PPR);
504862987288SDimitry Andric   }
504962987288SDimitry Andric   bool isSME() const { return AccessTypes & AccessType::FPR; }
505062987288SDimitry Andric   bool isMixed() const { return isCPU() && isSME(); }
505162987288SDimitry Andric 
505262987288SDimitry Andric   int64_t start() const { return Offset.getFixed() + Offset.getScalable(); }
505362987288SDimitry Andric   int64_t end() const { return start() + Size; }
505462987288SDimitry Andric 
505562987288SDimitry Andric   std::string getTypeString() const {
505662987288SDimitry Andric     switch (AccessTypes) {
505762987288SDimitry Andric     case AccessType::FPR:
505862987288SDimitry Andric       return "FPR";
505962987288SDimitry Andric     case AccessType::PPR:
506062987288SDimitry Andric       return "PPR";
506162987288SDimitry Andric     case AccessType::GPR:
506262987288SDimitry Andric       return "GPR";
506362987288SDimitry Andric     case AccessType::NotAccessed:
506462987288SDimitry Andric       return "NA";
506562987288SDimitry Andric     default:
506662987288SDimitry Andric       return "Mixed";
506762987288SDimitry Andric     }
506862987288SDimitry Andric   }
506962987288SDimitry Andric 
507062987288SDimitry Andric   void print(raw_ostream &OS) const {
507162987288SDimitry Andric     OS << getTypeString() << " stack object at [SP"
507262987288SDimitry Andric        << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
507362987288SDimitry Andric     if (Offset.getScalable())
507462987288SDimitry Andric       OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
507562987288SDimitry Andric          << " * vscale";
507662987288SDimitry Andric     OS << "]";
507762987288SDimitry Andric   }
507862987288SDimitry Andric };
507962987288SDimitry Andric 
508062987288SDimitry Andric static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) {
508162987288SDimitry Andric   SA.print(OS);
508262987288SDimitry Andric   return OS;
508362987288SDimitry Andric }
508462987288SDimitry Andric 
508562987288SDimitry Andric void AArch64FrameLowering::emitRemarks(
508662987288SDimitry Andric     const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
508762987288SDimitry Andric 
508862987288SDimitry Andric   SMEAttrs Attrs(MF.getFunction());
508962987288SDimitry Andric   if (Attrs.hasNonStreamingInterfaceAndBody())
509062987288SDimitry Andric     return;
509162987288SDimitry Andric 
509262987288SDimitry Andric   const uint64_t HazardSize =
509362987288SDimitry Andric       (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
509462987288SDimitry Andric 
509562987288SDimitry Andric   if (HazardSize == 0)
509662987288SDimitry Andric     return;
509762987288SDimitry Andric 
509862987288SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
509962987288SDimitry Andric   // Bail if function has no stack objects.
510062987288SDimitry Andric   if (!MFI.hasStackObjects())
510162987288SDimitry Andric     return;
510262987288SDimitry Andric 
510362987288SDimitry Andric   std::vector<StackAccess> StackAccesses(MFI.getNumObjects());
510462987288SDimitry Andric 
510562987288SDimitry Andric   size_t NumFPLdSt = 0;
510662987288SDimitry Andric   size_t NumNonFPLdSt = 0;
510762987288SDimitry Andric 
510862987288SDimitry Andric   // Collect stack accesses via Load/Store instructions.
510962987288SDimitry Andric   for (const MachineBasicBlock &MBB : MF) {
511062987288SDimitry Andric     for (const MachineInstr &MI : MBB) {
511162987288SDimitry Andric       if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
511262987288SDimitry Andric         continue;
511362987288SDimitry Andric       for (MachineMemOperand *MMO : MI.memoperands()) {
511462987288SDimitry Andric         std::optional<int> FI = getMMOFrameID(MMO, MFI);
511562987288SDimitry Andric         if (FI && !MFI.isDeadObjectIndex(*FI)) {
511662987288SDimitry Andric           int FrameIdx = *FI;
511762987288SDimitry Andric 
511862987288SDimitry Andric           size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects();
511962987288SDimitry Andric           if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
512062987288SDimitry Andric             StackAccesses[ArrIdx].Idx = FrameIdx;
512162987288SDimitry Andric             StackAccesses[ArrIdx].Offset =
512262987288SDimitry Andric                 getFrameIndexReferenceFromSP(MF, FrameIdx);
512362987288SDimitry Andric             StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
512462987288SDimitry Andric           }
512562987288SDimitry Andric 
512662987288SDimitry Andric           unsigned RegTy = StackAccess::AccessType::GPR;
512762987288SDimitry Andric           if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
512862987288SDimitry Andric             if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
512962987288SDimitry Andric               RegTy = StackAccess::PPR;
513062987288SDimitry Andric             else
513162987288SDimitry Andric               RegTy = StackAccess::FPR;
513262987288SDimitry Andric           } else if (AArch64InstrInfo::isFpOrNEON(MI)) {
513362987288SDimitry Andric             RegTy = StackAccess::FPR;
513462987288SDimitry Andric           }
513562987288SDimitry Andric 
513662987288SDimitry Andric           StackAccesses[ArrIdx].AccessTypes |= RegTy;
513762987288SDimitry Andric 
513862987288SDimitry Andric           if (RegTy == StackAccess::FPR)
513962987288SDimitry Andric             ++NumFPLdSt;
514062987288SDimitry Andric           else
514162987288SDimitry Andric             ++NumNonFPLdSt;
514262987288SDimitry Andric         }
514362987288SDimitry Andric       }
514462987288SDimitry Andric     }
514562987288SDimitry Andric   }
514662987288SDimitry Andric 
514762987288SDimitry Andric   if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
514862987288SDimitry Andric     return;
514962987288SDimitry Andric 
515062987288SDimitry Andric   llvm::sort(StackAccesses);
515162987288SDimitry Andric   StackAccesses.erase(llvm::remove_if(StackAccesses,
515262987288SDimitry Andric                                       [](const StackAccess &S) {
515362987288SDimitry Andric                                         return S.AccessTypes ==
515462987288SDimitry Andric                                                StackAccess::NotAccessed;
515562987288SDimitry Andric                                       }),
515662987288SDimitry Andric                       StackAccesses.end());
515762987288SDimitry Andric 
515862987288SDimitry Andric   SmallVector<const StackAccess *> MixedObjects;
515962987288SDimitry Andric   SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
516062987288SDimitry Andric 
516162987288SDimitry Andric   if (StackAccesses.front().isMixed())
516262987288SDimitry Andric     MixedObjects.push_back(&StackAccesses.front());
516362987288SDimitry Andric 
516462987288SDimitry Andric   for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
516562987288SDimitry Andric        It != End; ++It) {
516662987288SDimitry Andric     const auto &First = *It;
516762987288SDimitry Andric     const auto &Second = *(It + 1);
516862987288SDimitry Andric 
516962987288SDimitry Andric     if (Second.isMixed())
517062987288SDimitry Andric       MixedObjects.push_back(&Second);
517162987288SDimitry Andric 
517262987288SDimitry Andric     if ((First.isSME() && Second.isCPU()) ||
517362987288SDimitry Andric         (First.isCPU() && Second.isSME())) {
517462987288SDimitry Andric       uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
517562987288SDimitry Andric       if (Distance < HazardSize)
517662987288SDimitry Andric         HazardPairs.emplace_back(&First, &Second);
517762987288SDimitry Andric     }
517862987288SDimitry Andric   }
517962987288SDimitry Andric 
518062987288SDimitry Andric   auto EmitRemark = [&](llvm::StringRef Str) {
518162987288SDimitry Andric     ORE->emit([&]() {
518262987288SDimitry Andric       auto R = MachineOptimizationRemarkAnalysis(
518362987288SDimitry Andric           "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
518462987288SDimitry Andric       return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
518562987288SDimitry Andric     });
518662987288SDimitry Andric   };
518762987288SDimitry Andric 
518862987288SDimitry Andric   for (const auto &P : HazardPairs)
518962987288SDimitry Andric     EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
519062987288SDimitry Andric 
519162987288SDimitry Andric   for (const auto *Obj : MixedObjects)
519262987288SDimitry Andric     EmitRemark(
519362987288SDimitry Andric         formatv("{0} accessed by both GP and FP instructions", *Obj).str());
519462987288SDimitry Andric }
5195