xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp (revision 36b606ae6aa4b24061096ba18582e0a08ccd5dba)
10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AArch64Subtarget.h"
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AArch64.h"
160b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h"
180b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h"
205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h"
220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
270fca6ea1SDimitry Andric #include "llvm/Support/SipHash.h"
2806c3fb27SDimitry Andric #include "llvm/TargetParser/AArch64TargetParser.h"
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget"
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR
350b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC
360b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc"
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric static cl::opt<bool>
390b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
400b57cec5SDimitry Andric                      "converter pass"), cl::init(true), cl::Hidden);
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it.
430b57cec5SDimitry Andric static cl::opt<bool>
440b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
450b57cec5SDimitry Andric                          "an address is ignored"), cl::init(false), cl::Hidden);
460b57cec5SDimitry Andric 
470fca6ea1SDimitry Andric static cl::opt<bool> MachOUseNonLazyBind(
480fca6ea1SDimitry Andric     "aarch64-macho-enable-nonlazybind",
490fca6ea1SDimitry Andric     cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
500fca6ea1SDimitry Andric     cl::Hidden);
510b57cec5SDimitry Andric 
52fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
53fe6060f1SDimitry Andric                            cl::desc("Enable the use of AA during codegen."));
545ffd83dbSDimitry Andric 
5581ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
5681ad6265SDimitry Andric     "aarch64-insert-extract-base-cost",
5781ad6265SDimitry Andric     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
5881ad6265SDimitry Andric 
59bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register
60bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments
61bdd1243dSDimitry Andric // to function call.
62bdd1243dSDimitry Andric static cl::list<std::string>
63bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64bdd1243dSDimitry Andric                   "registers, so they can't be used by register allocator. "
65bdd1243dSDimitry Andric                   "Should only be used for testing register allocator."),
66bdd1243dSDimitry Andric                   cl::CommaSeparated, cl::Hidden);
67bdd1243dSDimitry Andric 
685f757f3fSDimitry Andric static cl::opt<AArch64PAuth::AuthCheckMethod>
695f757f3fSDimitry Andric     AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
705f757f3fSDimitry Andric                                cl::Hidden,
715f757f3fSDimitry Andric                                cl::desc("Override the variant of check applied "
725f757f3fSDimitry Andric                                         "to authenticated LR during tail call"),
735f757f3fSDimitry Andric                                cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
745f757f3fSDimitry Andric 
755f757f3fSDimitry Andric static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
765f757f3fSDimitry Andric     "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
775f757f3fSDimitry Andric     cl::desc("Set minimum number of entries to use a jump table on AArch64"));
785f757f3fSDimitry Andric 
7981ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
8081ad6265SDimitry Andric   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
8181ad6265SDimitry Andric     return OverrideVectorInsertExtractBaseCost;
8281ad6265SDimitry Andric   return VectorInsertExtractBaseCost;
8381ad6265SDimitry Andric }
8481ad6265SDimitry Andric 
85349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
865f757f3fSDimitry Andric     StringRef FS, StringRef CPUString, StringRef TuneCPUString,
875f757f3fSDimitry Andric     bool HasMinSize) {
880b57cec5SDimitry Andric   // Determine default and user-specified characteristics
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   if (CPUString.empty())
910b57cec5SDimitry Andric     CPUString = "generic";
920b57cec5SDimitry Andric 
93349cc55cSDimitry Andric   if (TuneCPUString.empty())
94349cc55cSDimitry Andric     TuneCPUString = CPUString;
95349cc55cSDimitry Andric 
96349cc55cSDimitry Andric   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
975f757f3fSDimitry Andric   initializeProperties(HasMinSize);
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric   return *this;
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric 
1025f757f3fSDimitry Andric void AArch64Subtarget::initializeProperties(bool HasMinSize) {
1030b57cec5SDimitry Andric   // Initialize CPU specific properties. We should add a tablegen feature for
1040b57cec5SDimitry Andric   // this in the future so we can specify it together with the subtarget
1050b57cec5SDimitry Andric   // features.
1060b57cec5SDimitry Andric   switch (ARMProcFamily) {
1070b57cec5SDimitry Andric   case Others:
1080b57cec5SDimitry Andric     break;
1095ffd83dbSDimitry Andric   case Carmel:
1105ffd83dbSDimitry Andric     CacheLineSize = 64;
1115ffd83dbSDimitry Andric     break;
1120b57cec5SDimitry Andric   case CortexA35:
1130b57cec5SDimitry Andric   case CortexA53:
1140b57cec5SDimitry Andric   case CortexA55:
1150fca6ea1SDimitry Andric   case CortexR82:
1160fca6ea1SDimitry Andric   case CortexR82AE:
11706c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
11806c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
11981ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1200b57cec5SDimitry Andric     break;
1210b57cec5SDimitry Andric   case CortexA57:
1220b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
12306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
12406c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
12581ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1268bcb0991SDimitry Andric     break;
1278bcb0991SDimitry Andric   case CortexA65:
12806c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
1290b57cec5SDimitry Andric     break;
1300b57cec5SDimitry Andric   case CortexA72:
1310b57cec5SDimitry Andric   case CortexA73:
1320b57cec5SDimitry Andric   case CortexA75:
13306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
13406c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
13581ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
13681ad6265SDimitry Andric     break;
1370b57cec5SDimitry Andric   case CortexA76:
1385ffd83dbSDimitry Andric   case CortexA77:
1395ffd83dbSDimitry Andric   case CortexA78:
1400fca6ea1SDimitry Andric   case CortexA78AE:
141e8d8bef9SDimitry Andric   case CortexA78C:
1425ffd83dbSDimitry Andric   case CortexX1:
14306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
14406c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
14581ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
1460b57cec5SDimitry Andric     break;
147349cc55cSDimitry Andric   case CortexA510:
1485f757f3fSDimitry Andric   case CortexA520:
14906c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
15081ad6265SDimitry Andric     VScaleForTuning = 1;
15106c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
15281ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
15381ad6265SDimitry Andric     break;
154349cc55cSDimitry Andric   case CortexA710:
155bdd1243dSDimitry Andric   case CortexA715:
1565f757f3fSDimitry Andric   case CortexA720:
1570fca6ea1SDimitry Andric   case CortexA725:
158349cc55cSDimitry Andric   case CortexX2:
159bdd1243dSDimitry Andric   case CortexX3:
1605f757f3fSDimitry Andric   case CortexX4:
1610fca6ea1SDimitry Andric   case CortexX925:
16206c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
163349cc55cSDimitry Andric     VScaleForTuning = 1;
16406c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
16581ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
166349cc55cSDimitry Andric     break;
1675ffd83dbSDimitry Andric   case A64FX:
1685ffd83dbSDimitry Andric     CacheLineSize = 256;
16906c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
17006c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
171e8d8bef9SDimitry Andric     MaxInterleaveFactor = 4;
172e8d8bef9SDimitry Andric     PrefetchDistance = 128;
173e8d8bef9SDimitry Andric     MinPrefetchStride = 1024;
174e8d8bef9SDimitry Andric     MaxPrefetchIterationsAhead = 4;
175349cc55cSDimitry Andric     VScaleForTuning = 4;
1765ffd83dbSDimitry Andric     break;
177480093f4SDimitry Andric   case AppleA7:
178480093f4SDimitry Andric   case AppleA10:
179480093f4SDimitry Andric   case AppleA11:
180480093f4SDimitry Andric   case AppleA12:
181480093f4SDimitry Andric   case AppleA13:
182e8d8bef9SDimitry Andric   case AppleA14:
183bdd1243dSDimitry Andric   case AppleA15:
184bdd1243dSDimitry Andric   case AppleA16:
1855f757f3fSDimitry Andric   case AppleA17:
1860fca6ea1SDimitry Andric   case AppleM4:
1870b57cec5SDimitry Andric     CacheLineSize = 64;
1880b57cec5SDimitry Andric     PrefetchDistance = 280;
1890b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1900b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 3;
191bdd1243dSDimitry Andric     switch (ARMProcFamily) {
192bdd1243dSDimitry Andric     case AppleA14:
193bdd1243dSDimitry Andric     case AppleA15:
194bdd1243dSDimitry Andric     case AppleA16:
1955f757f3fSDimitry Andric     case AppleA17:
1960fca6ea1SDimitry Andric     case AppleM4:
197bdd1243dSDimitry Andric       MaxInterleaveFactor = 4;
198bdd1243dSDimitry Andric       break;
199bdd1243dSDimitry Andric     default:
200bdd1243dSDimitry Andric       break;
201bdd1243dSDimitry Andric     }
2020b57cec5SDimitry Andric     break;
2030b57cec5SDimitry Andric   case ExynosM3:
2040b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2050b57cec5SDimitry Andric     MaxJumpTableSize = 20;
20606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(32);
20706c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
2080b57cec5SDimitry Andric     break;
2090b57cec5SDimitry Andric   case Falkor:
2100b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2110b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2120b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2130b57cec5SDimitry Andric     CacheLineSize = 128;
2140b57cec5SDimitry Andric     PrefetchDistance = 820;
2150b57cec5SDimitry Andric     MinPrefetchStride = 2048;
2160b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 8;
2170b57cec5SDimitry Andric     break;
2180b57cec5SDimitry Andric   case Kryo:
2190b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2200b57cec5SDimitry Andric     VectorInsertExtractBaseCost = 2;
2210b57cec5SDimitry Andric     CacheLineSize = 128;
2220b57cec5SDimitry Andric     PrefetchDistance = 740;
2230b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2240b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 11;
2250b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2260b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2270b57cec5SDimitry Andric     break;
2288bcb0991SDimitry Andric   case NeoverseE1:
22906c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
2308bcb0991SDimitry Andric     break;
2318bcb0991SDimitry Andric   case NeoverseN1:
23206c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
23306c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
23404eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
235349cc55cSDimitry Andric     break;
236e8d8bef9SDimitry Andric   case NeoverseN2:
2370fca6ea1SDimitry Andric   case NeoverseN3:
238bdd1243dSDimitry Andric   case NeoverseV2:
2390fca6ea1SDimitry Andric   case NeoverseV3:
24006c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
24106c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
24204eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
243349cc55cSDimitry Andric     VScaleForTuning = 1;
244349cc55cSDimitry Andric     break;
245e8d8bef9SDimitry Andric   case NeoverseV1:
24606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
24706c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
24804eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
249349cc55cSDimitry Andric     VScaleForTuning = 2;
25006c3fb27SDimitry Andric     DefaultSVETFOpts = TailFoldingOpts::Simple;
251349cc55cSDimitry Andric     break;
252349cc55cSDimitry Andric   case Neoverse512TVB:
25306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
254349cc55cSDimitry Andric     VScaleForTuning = 1;
255349cc55cSDimitry Andric     MaxInterleaveFactor = 4;
2568bcb0991SDimitry Andric     break;
2570b57cec5SDimitry Andric   case Saphira:
2580b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2590b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2600b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2610b57cec5SDimitry Andric     break;
2620b57cec5SDimitry Andric   case ThunderX2T99:
2630b57cec5SDimitry Andric     CacheLineSize = 64;
26406c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
26506c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2660b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2670b57cec5SDimitry Andric     PrefetchDistance = 128;
2680b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2690b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 4;
2700b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2710b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2720b57cec5SDimitry Andric     break;
2730b57cec5SDimitry Andric   case ThunderX:
2740b57cec5SDimitry Andric   case ThunderXT88:
2750b57cec5SDimitry Andric   case ThunderXT81:
2760b57cec5SDimitry Andric   case ThunderXT83:
2770b57cec5SDimitry Andric     CacheLineSize = 128;
27806c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
27906c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2800b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2810b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2820b57cec5SDimitry Andric     break;
2830b57cec5SDimitry Andric   case TSV110:
2840b57cec5SDimitry Andric     CacheLineSize = 64;
28506c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
28606c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2870b57cec5SDimitry Andric     break;
288e837bb5cSDimitry Andric   case ThunderX3T110:
289e837bb5cSDimitry Andric     CacheLineSize = 64;
29006c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
29106c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
292e837bb5cSDimitry Andric     MaxInterleaveFactor = 4;
293e837bb5cSDimitry Andric     PrefetchDistance = 128;
294e837bb5cSDimitry Andric     MinPrefetchStride = 1024;
295e837bb5cSDimitry Andric     MaxPrefetchIterationsAhead = 4;
296e837bb5cSDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
297e837bb5cSDimitry Andric     MinVectorRegisterBitWidth = 128;
298e837bb5cSDimitry Andric     break;
2992a66634dSDimitry Andric   case Ampere1:
300bdd1243dSDimitry Andric   case Ampere1A:
3014c2d3b02SDimitry Andric   case Ampere1B:
3022a66634dSDimitry Andric     CacheLineSize = 64;
30306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(64);
30406c3fb27SDimitry Andric     PrefLoopAlignment = Align(64);
3052a66634dSDimitry Andric     MaxInterleaveFactor = 4;
3062a66634dSDimitry Andric     break;
3070fca6ea1SDimitry Andric   case Oryon:
3080fca6ea1SDimitry Andric     CacheLineSize = 64;
3090fca6ea1SDimitry Andric     PrefFunctionAlignment = Align(16);
3100fca6ea1SDimitry Andric     MaxInterleaveFactor = 4;
3110fca6ea1SDimitry Andric     PrefetchDistance = 128;
3120fca6ea1SDimitry Andric     MinPrefetchStride = 1024;
3130fca6ea1SDimitry Andric     break;
3140b57cec5SDimitry Andric   }
3155f757f3fSDimitry Andric 
3165f757f3fSDimitry Andric   if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
3175f757f3fSDimitry Andric     MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric 
320bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
321bdd1243dSDimitry Andric                                    StringRef TuneCPU, StringRef FS,
322fe6060f1SDimitry Andric                                    const TargetMachine &TM, bool LittleEndian,
323fe6060f1SDimitry Andric                                    unsigned MinSVEVectorSizeInBitsOverride,
324bdd1243dSDimitry Andric                                    unsigned MaxSVEVectorSizeInBitsOverride,
3250fca6ea1SDimitry Andric                                    bool IsStreaming, bool IsStreamingCompatible,
3265f757f3fSDimitry Andric                                    bool HasMinSize)
327349cc55cSDimitry Andric     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
3280b57cec5SDimitry Andric       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
329bdd1243dSDimitry Andric       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
3300b57cec5SDimitry Andric       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
3310fca6ea1SDimitry Andric       IsLittle(LittleEndian), IsStreaming(IsStreaming),
3320fca6ea1SDimitry Andric       IsStreamingCompatible(IsStreamingCompatible),
333fe6060f1SDimitry Andric       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
334fe6060f1SDimitry Andric       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
3355f757f3fSDimitry Andric       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
336349cc55cSDimitry Andric       TLInfo(TM, *this) {
3370b57cec5SDimitry Andric   if (AArch64::isX18ReservedByDefault(TT))
3380b57cec5SDimitry Andric     ReserveXRegister.set(18);
3390b57cec5SDimitry Andric 
3400b57cec5SDimitry Andric   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
3415ffd83dbSDimitry Andric   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
3420b57cec5SDimitry Andric   Legalizer.reset(new AArch64LegalizerInfo(*this));
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric   // FIXME: At this point, we can't rely on Subtarget having RBI.
3470b57cec5SDimitry Andric   // It's awkward to mix passing RBI and the Subtarget; should we pass
3480b57cec5SDimitry Andric   // TII/TRI as well?
3490b57cec5SDimitry Andric   InstSelector.reset(createAArch64InstructionSelector(
3500b57cec5SDimitry Andric       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric   RegBankInfo.reset(RBI);
353bdd1243dSDimitry Andric 
354bdd1243dSDimitry Andric   auto TRI = getRegisterInfo();
355bdd1243dSDimitry Andric   StringSet<> ReservedRegNames;
356bdd1243dSDimitry Andric   ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
357bdd1243dSDimitry Andric   for (unsigned i = 0; i < 29; ++i) {
358bdd1243dSDimitry Andric     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
359bdd1243dSDimitry Andric       ReserveXRegisterForRA.set(i);
360bdd1243dSDimitry Andric   }
361bdd1243dSDimitry Andric   // X30 is named LR, so we can't use TRI->getName to check X30.
362bdd1243dSDimitry Andric   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
363bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(30);
364bdd1243dSDimitry Andric   // X29 is named FP, so we can't use TRI->getName to check X29.
365bdd1243dSDimitry Andric   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
366bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(29);
3675f757f3fSDimitry Andric 
3685f757f3fSDimitry Andric   AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM));
3690b57cec5SDimitry Andric }
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const {
3720b57cec5SDimitry Andric   return CallLoweringInfo.get();
3730b57cec5SDimitry Andric }
3740b57cec5SDimitry Andric 
3755ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
3765ffd83dbSDimitry Andric   return InlineAsmLoweringInfo.get();
3775ffd83dbSDimitry Andric }
3785ffd83dbSDimitry Andric 
3798bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
3800b57cec5SDimitry Andric   return InstSelector.get();
3810b57cec5SDimitry Andric }
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
3840b57cec5SDimitry Andric   return Legalizer.get();
3850b57cec5SDimitry Andric }
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
3880b57cec5SDimitry Andric   return RegBankInfo.get();
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be
3920b57cec5SDimitry Andric /// referenced for the current subtarget.
3938bcb0991SDimitry Andric unsigned
3940b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
3950b57cec5SDimitry Andric                                           const TargetMachine &TM) const {
3960b57cec5SDimitry Andric   // MachO large model always goes via a GOT, simply to get a single 8-byte
3970b57cec5SDimitry Andric   // absolute relocation on all global addresses.
3980b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
3990b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4000b57cec5SDimitry Andric 
40106c3fb27SDimitry Andric   // All globals dynamically protected by MTE must have their address tags
40206c3fb27SDimitry Andric   // synthesized. This is done by having the loader stash the tag in the GOT
40306c3fb27SDimitry Andric   // entry. Force all tagged globals (even ones with internal linkage) through
40406c3fb27SDimitry Andric   // the GOT.
40506c3fb27SDimitry Andric   if (GV->isTagged())
40606c3fb27SDimitry Andric     return AArch64II::MO_GOT;
40706c3fb27SDimitry Andric 
4080fca6ea1SDimitry Andric   if (!TM.shouldAssumeDSOLocal(GV)) {
409bdd1243dSDimitry Andric     if (GV->hasDLLImportStorageClass()) {
4100b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
411bdd1243dSDimitry Andric     }
4120b57cec5SDimitry Andric     if (getTargetTriple().isOSWindows())
4130b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
4140b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4150b57cec5SDimitry Andric   }
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   // The small code model's direct accesses use ADRP, which cannot
4180b57cec5SDimitry Andric   // necessarily produce the value 0 (if the code is above 4GB).
4190b57cec5SDimitry Andric   // Same for the tiny code model, where we have a pc relative LDR.
4200b57cec5SDimitry Andric   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
4210b57cec5SDimitry Andric       GV->hasExternalWeakLinkage())
4220b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4230b57cec5SDimitry Andric 
4248bcb0991SDimitry Andric   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
4258bcb0991SDimitry Andric   // that their nominal addresses are tagged and outside of the code model. In
4268bcb0991SDimitry Andric   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
4278bcb0991SDimitry Andric   // tag if necessary based on MO_TAGGED.
4288bcb0991SDimitry Andric   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
4298bcb0991SDimitry Andric     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
4308bcb0991SDimitry Andric 
4310b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4320b57cec5SDimitry Andric }
4330b57cec5SDimitry Andric 
4348bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference(
4350b57cec5SDimitry Andric     const GlobalValue *GV, const TargetMachine &TM) const {
4360b57cec5SDimitry Andric   // MachO large model always goes via a GOT, because we don't have the
4370b57cec5SDimitry Andric   // relocations available to do anything else..
4380b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
4390b57cec5SDimitry Andric       !GV->hasInternalLinkage())
4400b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric   // NonLazyBind goes via GOT unless we know it's available locally.
4430b57cec5SDimitry Andric   auto *F = dyn_cast<Function>(GV);
4440fca6ea1SDimitry Andric   if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
4450fca6ea1SDimitry Andric       F->hasFnAttribute(Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
4460b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4470b57cec5SDimitry Andric 
448bdd1243dSDimitry Andric   if (getTargetTriple().isOSWindows()) {
4497a6dacacSDimitry Andric     if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
4507a6dacacSDimitry Andric       if (GV->hasDLLImportStorageClass()) {
4517a6dacacSDimitry Andric         // On Arm64EC, if we're calling a symbol from the import table
4527a6dacacSDimitry Andric         // directly, use MO_ARM64EC_CALLMANGLE.
4537a6dacacSDimitry Andric         return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT |
4547a6dacacSDimitry Andric                AArch64II::MO_ARM64EC_CALLMANGLE;
4557a6dacacSDimitry Andric       }
4567a6dacacSDimitry Andric       if (GV->hasExternalLinkage()) {
4577a6dacacSDimitry Andric         // If we're calling a symbol directly, use the mangled form in the
4587a6dacacSDimitry Andric         // call instruction.
4597a6dacacSDimitry Andric         return AArch64II::MO_ARM64EC_CALLMANGLE;
4607a6dacacSDimitry Andric       }
461bdd1243dSDimitry Andric     }
462bdd1243dSDimitry Andric 
463480093f4SDimitry Andric     // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
464480093f4SDimitry Andric     return ClassifyGlobalReference(GV, TM);
465bdd1243dSDimitry Andric   }
466480093f4SDimitry Andric 
4670b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4680b57cec5SDimitry Andric }
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
4710b57cec5SDimitry Andric                                            unsigned NumRegionInstrs) const {
4720b57cec5SDimitry Andric   // LNT run (at least on Cyclone) showed reasonably significant gains for
4730b57cec5SDimitry Andric   // bi-directional scheduling. 253.perlbmk.
4740b57cec5SDimitry Andric   Policy.OnlyTopDown = false;
4750b57cec5SDimitry Andric   Policy.OnlyBottomUp = false;
4760b57cec5SDimitry Andric   // Enabling or Disabling the latency heuristic is a close call: It seems to
4770b57cec5SDimitry Andric   // help nearly no benchmark on out-of-order architectures, on the other hand
4780b57cec5SDimitry Andric   // it regresses register pressure on a few benchmarking.
4790b57cec5SDimitry Andric   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
4800b57cec5SDimitry Andric }
4810b57cec5SDimitry Andric 
4820fca6ea1SDimitry Andric void AArch64Subtarget::adjustSchedDependency(
4830fca6ea1SDimitry Andric     SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
4840fca6ea1SDimitry Andric     const TargetSchedModel *SchedModel) const {
4850fca6ea1SDimitry Andric   if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
4860fca6ea1SDimitry Andric       !Def->isInstr() || !Use->isInstr() ||
4870fca6ea1SDimitry Andric       (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
4880fca6ea1SDimitry Andric        Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
4890fca6ea1SDimitry Andric     return;
4900fca6ea1SDimitry Andric 
4910fca6ea1SDimitry Andric   // If the Def is a BUNDLE, find the last instruction in the bundle that defs
4920fca6ea1SDimitry Andric   // the register.
4930fca6ea1SDimitry Andric   const MachineInstr *DefMI = Def->getInstr();
4940fca6ea1SDimitry Andric   if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
4950fca6ea1SDimitry Andric     Register Reg = DefMI->getOperand(DefOpIdx).getReg();
4960fca6ea1SDimitry Andric     for (const auto &Op : const_mi_bundle_ops(*DefMI)) {
4970fca6ea1SDimitry Andric       if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
4980fca6ea1SDimitry Andric         DefMI = Op.getParent();
4990fca6ea1SDimitry Andric         DefOpIdx = Op.getOperandNo();
5000fca6ea1SDimitry Andric       }
5010fca6ea1SDimitry Andric     }
5020fca6ea1SDimitry Andric   }
5030fca6ea1SDimitry Andric 
5040fca6ea1SDimitry Andric   // If the Use is a BUNDLE, find the first instruction that uses the Reg.
5050fca6ea1SDimitry Andric   const MachineInstr *UseMI = Use->getInstr();
5060fca6ea1SDimitry Andric   if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
5070fca6ea1SDimitry Andric     Register Reg = UseMI->getOperand(UseOpIdx).getReg();
5080fca6ea1SDimitry Andric     for (const auto &Op : const_mi_bundle_ops(*UseMI)) {
5090fca6ea1SDimitry Andric       if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
5100fca6ea1SDimitry Andric         UseMI = Op.getParent();
5110fca6ea1SDimitry Andric         UseOpIdx = Op.getOperandNo();
5120fca6ea1SDimitry Andric         break;
5130fca6ea1SDimitry Andric       }
5140fca6ea1SDimitry Andric     }
5150fca6ea1SDimitry Andric   }
5160fca6ea1SDimitry Andric 
5170fca6ea1SDimitry Andric   Dep.setLatency(
5180fca6ea1SDimitry Andric       SchedModel->computeOperandLatency(DefMI, DefOpIdx, UseMI, UseOpIdx));
5190fca6ea1SDimitry Andric }
5200fca6ea1SDimitry Andric 
5210b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const {
5220b57cec5SDimitry Andric   return EnableEarlyIfConvert;
5230b57cec5SDimitry Andric }
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
5260b57cec5SDimitry Andric   if (!UseAddressTopByteIgnored)
5270b57cec5SDimitry Andric     return false;
5280b57cec5SDimitry Andric 
52981ad6265SDimitry Andric   if (TargetTriple.isDriverKit())
53081ad6265SDimitry Andric     return true;
5310b57cec5SDimitry Andric   if (TargetTriple.isiOS()) {
5320eae32dcSDimitry Andric     return TargetTriple.getiOSVersion() >= VersionTuple(8);
5330b57cec5SDimitry Andric   }
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric   return false;
5360b57cec5SDimitry Andric }
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint>
5390b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const {
5408bcb0991SDimitry Andric   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
5410b57cec5SDimitry Andric }
5420b57cec5SDimitry Andric 
5430b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
5440b57cec5SDimitry Andric   // We usually compute max call frame size after ISel. Do the computation now
5450b57cec5SDimitry Andric   // if the .mir file didn't specify it. Note that this will probably give you
5460b57cec5SDimitry Andric   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
5470b57cec5SDimitry Andric   // instructions, specify explicitly if you need it to be correct.
5480b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
5490b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed())
5500b57cec5SDimitry Andric     MFI.computeMaxCallFrameSize(MF);
5510b57cec5SDimitry Andric }
5525ffd83dbSDimitry Andric 
553fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; }
554bdd1243dSDimitry Andric 
5555f757f3fSDimitry Andric // If return address signing is enabled, tail calls are emitted as follows:
5565f757f3fSDimitry Andric //
5575f757f3fSDimitry Andric // ```
5585f757f3fSDimitry Andric //   <authenticate LR>
5595f757f3fSDimitry Andric //   <check LR>
5605f757f3fSDimitry Andric //   TCRETURN          ; the callee may sign and spill the LR in its prologue
5615f757f3fSDimitry Andric // ```
5625f757f3fSDimitry Andric //
5635f757f3fSDimitry Andric // LR may require explicit checking because if FEAT_FPAC is not implemented
5645f757f3fSDimitry Andric // and LR was tampered with, then `<authenticate LR>` will not generate an
5655f757f3fSDimitry Andric // exception on its own. Later, if the callee spills the signed LR value and
5665f757f3fSDimitry Andric // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
5675f757f3fSDimitry Andric // the higher bits of LR thus hiding the authentication failure.
568*36b606aeSDimitry Andric AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
569*36b606aeSDimitry Andric     const MachineFunction &MF) const {
570*36b606aeSDimitry Andric   // TODO: Check subtarget for the scheme. Present variant is a default for
571*36b606aeSDimitry Andric   // pauthtest ABI.
572*36b606aeSDimitry Andric   if (MF.getFunction().hasFnAttribute("ptrauth-returns") &&
573*36b606aeSDimitry Andric       MF.getFunction().hasFnAttribute("ptrauth-auth-traps"))
574*36b606aeSDimitry Andric     return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
5755f757f3fSDimitry Andric   if (AuthenticatedLRCheckMethod.getNumOccurrences())
5765f757f3fSDimitry Andric     return AuthenticatedLRCheckMethod;
5775f757f3fSDimitry Andric 
5785f757f3fSDimitry Andric   // At now, use None by default because checks may introduce an unexpected
5795f757f3fSDimitry Andric   // performance regression or incompatibility with execute-only mappings.
5805f757f3fSDimitry Andric   return AArch64PAuth::AuthCheckMethod::None;
581bdd1243dSDimitry Andric }
5820fca6ea1SDimitry Andric 
5830fca6ea1SDimitry Andric std::optional<uint16_t>
5840fca6ea1SDimitry Andric AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
5850fca6ea1SDimitry Andric     const Function &ParentFn) const {
5860fca6ea1SDimitry Andric   if (!ParentFn.hasFnAttribute("ptrauth-indirect-gotos"))
5870fca6ea1SDimitry Andric     return std::nullopt;
5880fca6ea1SDimitry Andric   // We currently have one simple mechanism for all targets.
5890fca6ea1SDimitry Andric   // This isn't ABI, so we can always do better in the future.
5900fca6ea1SDimitry Andric   return getPointerAuthStableSipHash(
5910fca6ea1SDimitry Andric       (Twine(ParentFn.getName()) + " blockaddress").str());
5920fca6ea1SDimitry Andric }
5930fca6ea1SDimitry Andric 
5940fca6ea1SDimitry Andric bool AArch64Subtarget::enableMachinePipeliner() const {
5950fca6ea1SDimitry Andric   return getSchedModel().hasInstrSchedModel();
5960fca6ea1SDimitry Andric }
597