xref: /llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp (revision 6e1ea7e5a7b6e581bf9a030b98a7f63ee2833278)
1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64Subtarget.h"
14 
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64PBQPRegAlloc.h"
18 #include "AArch64TargetMachine.h"
19 #include "GISel/AArch64CallLowering.h"
20 #include "GISel/AArch64LegalizerInfo.h"
21 #include "GISel/AArch64RegisterBankInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineScheduler.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/Support/SipHash.h"
28 #include "llvm/TargetParser/AArch64TargetParser.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "aarch64-subtarget"
33 
34 #define GET_SUBTARGETINFO_CTOR
35 #define GET_SUBTARGETINFO_TARGET_DESC
36 #include "AArch64GenSubtargetInfo.inc"
37 
38 static cl::opt<bool>
39 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
40                      "converter pass"), cl::init(true), cl::Hidden);
41 
42 // If OS supports TBI, use this flag to enable it.
43 static cl::opt<bool>
44 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
45                          "an address is ignored"), cl::init(false), cl::Hidden);
46 
47 static cl::opt<bool> MachOUseNonLazyBind(
48     "aarch64-macho-enable-nonlazybind",
49     cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
50     cl::Hidden);
51 
52 static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
53                            cl::desc("Enable the use of AA during codegen."));
54 
55 static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56     "aarch64-insert-extract-base-cost",
57     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58 
59 // Reserve a list of X# registers, so they are unavailable for register
60 // allocator, but can still be used as ABI requests, such as passing arguments
61 // to function call.
62 static cl::list<std::string>
63 ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64                   "registers, so they can't be used by register allocator. "
65                   "Should only be used for testing register allocator."),
66                   cl::CommaSeparated, cl::Hidden);
67 
68 static cl::opt<AArch64PAuth::AuthCheckMethod>
69     AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
70                                cl::Hidden,
71                                cl::desc("Override the variant of check applied "
72                                         "to authenticated LR during tail call"),
73                                cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
74 
75 static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
76     "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
77     cl::desc("Set minimum number of entries to use a jump table on AArch64"));
78 
79 static cl::opt<unsigned> AArch64StreamingHazardSize(
80     "aarch64-streaming-hazard-size",
81     cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
82     cl::init(0), cl::Hidden);
83 
84 static cl::alias AArch64StreamingStackHazardSize(
85     "aarch64-stack-hazard-size",
86     cl::desc("alias for -aarch64-streaming-hazard-size"),
87     cl::aliasopt(AArch64StreamingHazardSize));
88 
89 // Subreg liveness tracking is disabled by default for now until all issues
90 // are ironed out. This option allows the feature to be used in tests.
91 static cl::opt<bool>
92     EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
93                                  cl::init(false), cl::Hidden,
94                                  cl::desc("Enable subreg liveness tracking"));
95 
96 static cl::opt<bool>
97     UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(false), cl::Hidden,
98                    cl::desc("Prefer add+cnt over addvl/inc/dec"));
99 
100 unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
101   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
102     return OverrideVectorInsertExtractBaseCost;
103   return VectorInsertExtractBaseCost;
104 }
105 
106 AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
107     StringRef FS, StringRef CPUString, StringRef TuneCPUString,
108     bool HasMinSize) {
109   // Determine default and user-specified characteristics
110 
111   if (CPUString.empty())
112     CPUString = "generic";
113 
114   if (TuneCPUString.empty())
115     TuneCPUString = CPUString;
116 
117   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
118   initializeProperties(HasMinSize);
119 
120   return *this;
121 }
122 
123 void AArch64Subtarget::initializeProperties(bool HasMinSize) {
124   // Initialize CPU specific properties. We should add a tablegen feature for
125   // this in the future so we can specify it together with the subtarget
126   // features.
127   switch (ARMProcFamily) {
128   case Others:
129     break;
130   case Carmel:
131     CacheLineSize = 64;
132     break;
133   case CortexA35:
134   case CortexA53:
135   case CortexA55:
136   case CortexR82:
137   case CortexR82AE:
138     PrefFunctionAlignment = Align(16);
139     PrefLoopAlignment = Align(16);
140     MaxBytesForLoopAlignment = 8;
141     break;
142   case CortexA57:
143     MaxInterleaveFactor = 4;
144     PrefFunctionAlignment = Align(16);
145     PrefLoopAlignment = Align(16);
146     MaxBytesForLoopAlignment = 8;
147     break;
148   case CortexA65:
149     PrefFunctionAlignment = Align(8);
150     break;
151   case CortexA72:
152   case CortexA73:
153   case CortexA75:
154     PrefFunctionAlignment = Align(16);
155     PrefLoopAlignment = Align(16);
156     MaxBytesForLoopAlignment = 8;
157     break;
158   case CortexA76:
159   case CortexA77:
160   case CortexA78:
161   case CortexA78AE:
162   case CortexA78C:
163   case CortexX1:
164     PrefFunctionAlignment = Align(16);
165     PrefLoopAlignment = Align(32);
166     MaxBytesForLoopAlignment = 16;
167     break;
168   case CortexA510:
169   case CortexA520:
170     PrefFunctionAlignment = Align(16);
171     VScaleForTuning = 1;
172     PrefLoopAlignment = Align(16);
173     MaxBytesForLoopAlignment = 8;
174     break;
175   case CortexA710:
176   case CortexA715:
177   case CortexA720:
178   case CortexA725:
179   case CortexX2:
180   case CortexX3:
181   case CortexX4:
182   case CortexX925:
183     PrefFunctionAlignment = Align(16);
184     VScaleForTuning = 1;
185     PrefLoopAlignment = Align(32);
186     MaxBytesForLoopAlignment = 16;
187     break;
188   case A64FX:
189     CacheLineSize = 256;
190     PrefFunctionAlignment = Align(8);
191     PrefLoopAlignment = Align(4);
192     MaxInterleaveFactor = 4;
193     PrefetchDistance = 128;
194     MinPrefetchStride = 1024;
195     MaxPrefetchIterationsAhead = 4;
196     VScaleForTuning = 4;
197     break;
198   case MONAKA:
199     VScaleForTuning = 2;
200     break;
201   case AppleA7:
202   case AppleA10:
203   case AppleA11:
204   case AppleA12:
205   case AppleA13:
206   case AppleA14:
207   case AppleA15:
208   case AppleA16:
209   case AppleA17:
210   case AppleM4:
211     CacheLineSize = 64;
212     PrefetchDistance = 280;
213     MinPrefetchStride = 2048;
214     MaxPrefetchIterationsAhead = 3;
215     switch (ARMProcFamily) {
216     case AppleA14:
217     case AppleA15:
218     case AppleA16:
219     case AppleA17:
220     case AppleM4:
221       MaxInterleaveFactor = 4;
222       break;
223     default:
224       break;
225     }
226     break;
227   case ExynosM3:
228     MaxInterleaveFactor = 4;
229     MaxJumpTableSize = 20;
230     PrefFunctionAlignment = Align(32);
231     PrefLoopAlignment = Align(16);
232     break;
233   case Falkor:
234     MaxInterleaveFactor = 4;
235     // FIXME: remove this to enable 64-bit SLP if performance looks good.
236     MinVectorRegisterBitWidth = 128;
237     CacheLineSize = 128;
238     PrefetchDistance = 820;
239     MinPrefetchStride = 2048;
240     MaxPrefetchIterationsAhead = 8;
241     break;
242   case Kryo:
243     MaxInterleaveFactor = 4;
244     VectorInsertExtractBaseCost = 2;
245     CacheLineSize = 128;
246     PrefetchDistance = 740;
247     MinPrefetchStride = 1024;
248     MaxPrefetchIterationsAhead = 11;
249     // FIXME: remove this to enable 64-bit SLP if performance looks good.
250     MinVectorRegisterBitWidth = 128;
251     break;
252   case NeoverseE1:
253     PrefFunctionAlignment = Align(8);
254     break;
255   case NeoverseN1:
256     PrefFunctionAlignment = Align(16);
257     PrefLoopAlignment = Align(32);
258     MaxBytesForLoopAlignment = 16;
259     break;
260   case NeoverseV2:
261   case NeoverseV3:
262     EpilogueVectorizationMinVF = 8;
263     MaxInterleaveFactor = 4;
264     ScatterOverhead = 13;
265     LLVM_FALLTHROUGH;
266   case NeoverseN2:
267   case NeoverseN3:
268     PrefFunctionAlignment = Align(16);
269     PrefLoopAlignment = Align(32);
270     MaxBytesForLoopAlignment = 16;
271     VScaleForTuning = 1;
272     break;
273   case NeoverseV1:
274     PrefFunctionAlignment = Align(16);
275     PrefLoopAlignment = Align(32);
276     MaxBytesForLoopAlignment = 16;
277     VScaleForTuning = 2;
278     DefaultSVETFOpts = TailFoldingOpts::Simple;
279     break;
280   case Neoverse512TVB:
281     PrefFunctionAlignment = Align(16);
282     VScaleForTuning = 1;
283     MaxInterleaveFactor = 4;
284     break;
285   case Saphira:
286     MaxInterleaveFactor = 4;
287     // FIXME: remove this to enable 64-bit SLP if performance looks good.
288     MinVectorRegisterBitWidth = 128;
289     break;
290   case ThunderX2T99:
291     CacheLineSize = 64;
292     PrefFunctionAlignment = Align(8);
293     PrefLoopAlignment = Align(4);
294     MaxInterleaveFactor = 4;
295     PrefetchDistance = 128;
296     MinPrefetchStride = 1024;
297     MaxPrefetchIterationsAhead = 4;
298     // FIXME: remove this to enable 64-bit SLP if performance looks good.
299     MinVectorRegisterBitWidth = 128;
300     break;
301   case ThunderX:
302   case ThunderXT88:
303   case ThunderXT81:
304   case ThunderXT83:
305     CacheLineSize = 128;
306     PrefFunctionAlignment = Align(8);
307     PrefLoopAlignment = Align(4);
308     // FIXME: remove this to enable 64-bit SLP if performance looks good.
309     MinVectorRegisterBitWidth = 128;
310     break;
311   case TSV110:
312     CacheLineSize = 64;
313     PrefFunctionAlignment = Align(16);
314     PrefLoopAlignment = Align(4);
315     break;
316   case ThunderX3T110:
317     CacheLineSize = 64;
318     PrefFunctionAlignment = Align(16);
319     PrefLoopAlignment = Align(4);
320     MaxInterleaveFactor = 4;
321     PrefetchDistance = 128;
322     MinPrefetchStride = 1024;
323     MaxPrefetchIterationsAhead = 4;
324     // FIXME: remove this to enable 64-bit SLP if performance looks good.
325     MinVectorRegisterBitWidth = 128;
326     break;
327   case Ampere1:
328   case Ampere1A:
329   case Ampere1B:
330     CacheLineSize = 64;
331     PrefFunctionAlignment = Align(64);
332     PrefLoopAlignment = Align(64);
333     MaxInterleaveFactor = 4;
334     break;
335   case Oryon:
336     CacheLineSize = 64;
337     PrefFunctionAlignment = Align(16);
338     MaxInterleaveFactor = 4;
339     PrefetchDistance = 128;
340     MinPrefetchStride = 1024;
341     break;
342   }
343 
344   if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
345     MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
346 }
347 
348 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
349                                    StringRef TuneCPU, StringRef FS,
350                                    const TargetMachine &TM, bool LittleEndian,
351                                    unsigned MinSVEVectorSizeInBitsOverride,
352                                    unsigned MaxSVEVectorSizeInBitsOverride,
353                                    bool IsStreaming, bool IsStreamingCompatible,
354                                    bool HasMinSize)
355     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
356       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
357       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
358       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
359       IsLittle(LittleEndian), IsStreaming(IsStreaming),
360       IsStreamingCompatible(IsStreamingCompatible),
361       StreamingHazardSize(
362           AArch64StreamingHazardSize.getNumOccurrences() > 0
363               ? std::optional<unsigned>(AArch64StreamingHazardSize)
364               : std::nullopt),
365       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
366       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
367       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
368       TLInfo(TM, *this) {
369   if (AArch64::isX18ReservedByDefault(TT))
370     ReserveXRegister.set(18);
371 
372   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
373   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
374   Legalizer.reset(new AArch64LegalizerInfo(*this));
375 
376   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
377 
378   // FIXME: At this point, we can't rely on Subtarget having RBI.
379   // It's awkward to mix passing RBI and the Subtarget; should we pass
380   // TII/TRI as well?
381   InstSelector.reset(createAArch64InstructionSelector(
382       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
383 
384   RegBankInfo.reset(RBI);
385 
386   auto TRI = getRegisterInfo();
387   StringSet<> ReservedRegNames;
388   ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
389   for (unsigned i = 0; i < 29; ++i) {
390     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
391       ReserveXRegisterForRA.set(i);
392   }
393   // X30 is named LR, so we can't use TRI->getName to check X30.
394   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
395     ReserveXRegisterForRA.set(30);
396   // X29 is named FP, so we can't use TRI->getName to check X29.
397   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
398     ReserveXRegisterForRA.set(29);
399 
400   EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
401 }
402 
403 const CallLowering *AArch64Subtarget::getCallLowering() const {
404   return CallLoweringInfo.get();
405 }
406 
407 const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
408   return InlineAsmLoweringInfo.get();
409 }
410 
411 InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
412   return InstSelector.get();
413 }
414 
415 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
416   return Legalizer.get();
417 }
418 
419 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
420   return RegBankInfo.get();
421 }
422 
423 /// Find the target operand flags that describe how a global value should be
424 /// referenced for the current subtarget.
425 unsigned
426 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
427                                           const TargetMachine &TM) const {
428   // MachO large model always goes via a GOT, simply to get a single 8-byte
429   // absolute relocation on all global addresses.
430   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
431     return AArch64II::MO_GOT;
432 
433   // All globals dynamically protected by MTE must have their address tags
434   // synthesized. This is done by having the loader stash the tag in the GOT
435   // entry. Force all tagged globals (even ones with internal linkage) through
436   // the GOT.
437   if (GV->isTagged())
438     return AArch64II::MO_GOT;
439 
440   if (!TM.shouldAssumeDSOLocal(GV)) {
441     if (GV->hasDLLImportStorageClass()) {
442       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
443     }
444     if (getTargetTriple().isOSWindows())
445       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
446     return AArch64II::MO_GOT;
447   }
448 
449   // The small code model's direct accesses use ADRP, which cannot
450   // necessarily produce the value 0 (if the code is above 4GB).
451   // Same for the tiny code model, where we have a pc relative LDR.
452   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
453       GV->hasExternalWeakLinkage())
454     return AArch64II::MO_GOT;
455 
456   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
457   // that their nominal addresses are tagged and outside of the code model. In
458   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
459   // tag if necessary based on MO_TAGGED.
460   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
461     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
462 
463   return AArch64II::MO_NO_FLAG;
464 }
465 
466 unsigned AArch64Subtarget::classifyGlobalFunctionReference(
467     const GlobalValue *GV, const TargetMachine &TM) const {
468   // MachO large model always goes via a GOT, because we don't have the
469   // relocations available to do anything else..
470   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
471       !GV->hasInternalLinkage())
472     return AArch64II::MO_GOT;
473 
474   // NonLazyBind goes via GOT unless we know it's available locally.
475   auto *F = dyn_cast<Function>(GV);
476   if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
477       F->hasFnAttribute(Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
478     return AArch64II::MO_GOT;
479 
480   if (getTargetTriple().isOSWindows()) {
481     if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
482       if (GV->hasDLLImportStorageClass()) {
483         // On Arm64EC, if we're calling a symbol from the import table
484         // directly, use MO_ARM64EC_CALLMANGLE.
485         return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT |
486                AArch64II::MO_ARM64EC_CALLMANGLE;
487       }
488       if (GV->hasExternalLinkage()) {
489         // If we're calling a symbol directly, use the mangled form in the
490         // call instruction.
491         return AArch64II::MO_ARM64EC_CALLMANGLE;
492       }
493     }
494 
495     // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
496     return ClassifyGlobalReference(GV, TM);
497   }
498 
499   return AArch64II::MO_NO_FLAG;
500 }
501 
502 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
503                                            unsigned NumRegionInstrs) const {
504   // LNT run (at least on Cyclone) showed reasonably significant gains for
505   // bi-directional scheduling. 253.perlbmk.
506   Policy.OnlyTopDown = false;
507   Policy.OnlyBottomUp = false;
508   // Enabling or Disabling the latency heuristic is a close call: It seems to
509   // help nearly no benchmark on out-of-order architectures, on the other hand
510   // it regresses register pressure on a few benchmarking.
511   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
512 }
513 
514 void AArch64Subtarget::adjustSchedDependency(
515     SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
516     const TargetSchedModel *SchedModel) const {
517   if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
518       !Def->isInstr() || !Use->isInstr() ||
519       (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
520        Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
521     return;
522 
523   // If the Def is a BUNDLE, find the last instruction in the bundle that defs
524   // the register.
525   const MachineInstr *DefMI = Def->getInstr();
526   if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
527     Register Reg = DefMI->getOperand(DefOpIdx).getReg();
528     for (const auto &Op : const_mi_bundle_ops(*DefMI)) {
529       if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
530         DefMI = Op.getParent();
531         DefOpIdx = Op.getOperandNo();
532       }
533     }
534   }
535 
536   // If the Use is a BUNDLE, find the first instruction that uses the Reg.
537   const MachineInstr *UseMI = Use->getInstr();
538   if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
539     Register Reg = UseMI->getOperand(UseOpIdx).getReg();
540     for (const auto &Op : const_mi_bundle_ops(*UseMI)) {
541       if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
542         UseMI = Op.getParent();
543         UseOpIdx = Op.getOperandNo();
544         break;
545       }
546     }
547   }
548 
549   Dep.setLatency(
550       SchedModel->computeOperandLatency(DefMI, DefOpIdx, UseMI, UseOpIdx));
551 }
552 
553 bool AArch64Subtarget::enableEarlyIfConversion() const {
554   return EnableEarlyIfConvert;
555 }
556 
557 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
558   if (!UseAddressTopByteIgnored)
559     return false;
560 
561   if (TargetTriple.isDriverKit())
562     return true;
563   if (TargetTriple.isiOS()) {
564     return TargetTriple.getiOSVersion() >= VersionTuple(8);
565   }
566 
567   return false;
568 }
569 
570 std::unique_ptr<PBQPRAConstraint>
571 AArch64Subtarget::getCustomPBQPConstraints() const {
572   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
573 }
574 
575 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
576   // We usually compute max call frame size after ISel. Do the computation now
577   // if the .mir file didn't specify it. Note that this will probably give you
578   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
579   // instructions, specify explicitly if you need it to be correct.
580   MachineFrameInfo &MFI = MF.getFrameInfo();
581   if (!MFI.isMaxCallFrameSizeComputed())
582     MFI.computeMaxCallFrameSize(MF);
583 }
584 
585 bool AArch64Subtarget::useAA() const { return UseAA; }
586 
587 bool AArch64Subtarget::useScalarIncVL() const {
588   // If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL
589   // is not otherwise set, enable it by default.
590   if (UseScalarIncVL.getNumOccurrences())
591     return UseScalarIncVL;
592   return hasSVE2() || hasSME();
593 }
594 
595 // If return address signing is enabled, tail calls are emitted as follows:
596 //
597 // ```
598 //   <authenticate LR>
599 //   <check LR>
600 //   TCRETURN          ; the callee may sign and spill the LR in its prologue
601 // ```
602 //
603 // LR may require explicit checking because if FEAT_FPAC is not implemented
604 // and LR was tampered with, then `<authenticate LR>` will not generate an
605 // exception on its own. Later, if the callee spills the signed LR value and
606 // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
607 // the higher bits of LR thus hiding the authentication failure.
608 AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
609     const MachineFunction &MF) const {
610   // TODO: Check subtarget for the scheme. Present variant is a default for
611   // pauthtest ABI.
612   if (MF.getFunction().hasFnAttribute("ptrauth-returns") &&
613       MF.getFunction().hasFnAttribute("ptrauth-auth-traps"))
614     return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
615   if (AuthenticatedLRCheckMethod.getNumOccurrences())
616     return AuthenticatedLRCheckMethod;
617 
618   // At now, use None by default because checks may introduce an unexpected
619   // performance regression or incompatibility with execute-only mappings.
620   return AArch64PAuth::AuthCheckMethod::None;
621 }
622 
623 std::optional<uint16_t>
624 AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
625     const Function &ParentFn) const {
626   if (!ParentFn.hasFnAttribute("ptrauth-indirect-gotos"))
627     return std::nullopt;
628   // We currently have one simple mechanism for all targets.
629   // This isn't ABI, so we can always do better in the future.
630   return getPointerAuthStableSipHash(
631       (Twine(ParentFn.getName()) + " blockaddress").str());
632 }
633 
634 bool AArch64Subtarget::enableMachinePipeliner() const {
635   return getSchedModel().hasInstrSchedModel();
636 }
637