1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the AArch64 specific subclass of TargetSubtarget. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64Subtarget.h" 14 15 #include "AArch64.h" 16 #include "AArch64InstrInfo.h" 17 #include "AArch64PBQPRegAlloc.h" 18 #include "AArch64TargetMachine.h" 19 #include "GISel/AArch64CallLowering.h" 20 #include "GISel/AArch64LegalizerInfo.h" 21 #include "GISel/AArch64RegisterBankInfo.h" 22 #include "MCTargetDesc/AArch64AddressingModes.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineScheduler.h" 26 #include "llvm/IR/GlobalValue.h" 27 #include "llvm/Support/SipHash.h" 28 #include "llvm/TargetParser/AArch64TargetParser.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "aarch64-subtarget" 33 34 #define GET_SUBTARGETINFO_CTOR 35 #define GET_SUBTARGETINFO_TARGET_DESC 36 #include "AArch64GenSubtargetInfo.inc" 37 38 static cl::opt<bool> 39 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 40 "converter pass"), cl::init(true), cl::Hidden); 41 42 // If OS supports TBI, use this flag to enable it. 43 static cl::opt<bool> 44 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 45 "an address is ignored"), cl::init(false), cl::Hidden); 46 47 static cl::opt<bool> MachOUseNonLazyBind( 48 "aarch64-macho-enable-nonlazybind", 49 cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"), 50 cl::Hidden); 51 52 static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true), 53 cl::desc("Enable the use of AA during codegen.")); 54 55 static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost( 56 "aarch64-insert-extract-base-cost", 57 cl::desc("Base cost of vector insert/extract element"), cl::Hidden); 58 59 // Reserve a list of X# registers, so they are unavailable for register 60 // allocator, but can still be used as ABI requests, such as passing arguments 61 // to function call. 62 static cl::list<std::string> 63 ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical " 64 "registers, so they can't be used by register allocator. " 65 "Should only be used for testing register allocator."), 66 cl::CommaSeparated, cl::Hidden); 67 68 static cl::opt<AArch64PAuth::AuthCheckMethod> 69 AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method", 70 cl::Hidden, 71 cl::desc("Override the variant of check applied " 72 "to authenticated LR during tail call"), 73 cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR)); 74 75 static cl::opt<unsigned> AArch64MinimumJumpTableEntries( 76 "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, 77 cl::desc("Set minimum number of entries to use a jump table on AArch64")); 78 79 static cl::opt<unsigned> AArch64StreamingHazardSize( 80 "aarch64-streaming-hazard-size", 81 cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."), 82 cl::init(0), cl::Hidden); 83 84 static cl::alias AArch64StreamingStackHazardSize( 85 "aarch64-stack-hazard-size", 86 cl::desc("alias for -aarch64-streaming-hazard-size"), 87 cl::aliasopt(AArch64StreamingHazardSize)); 88 89 // Subreg liveness tracking is disabled by default for now until all issues 90 // are ironed out. This option allows the feature to be used in tests. 91 static cl::opt<bool> 92 EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking", 93 cl::init(false), cl::Hidden, 94 cl::desc("Enable subreg liveness tracking")); 95 96 static cl::opt<bool> 97 UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(false), cl::Hidden, 98 cl::desc("Prefer add+cnt over addvl/inc/dec")); 99 100 unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { 101 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) 102 return OverrideVectorInsertExtractBaseCost; 103 return VectorInsertExtractBaseCost; 104 } 105 106 AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( 107 StringRef FS, StringRef CPUString, StringRef TuneCPUString, 108 bool HasMinSize) { 109 // Determine default and user-specified characteristics 110 111 if (CPUString.empty()) 112 CPUString = "generic"; 113 114 if (TuneCPUString.empty()) 115 TuneCPUString = CPUString; 116 117 ParseSubtargetFeatures(CPUString, TuneCPUString, FS); 118 initializeProperties(HasMinSize); 119 120 return *this; 121 } 122 123 void AArch64Subtarget::initializeProperties(bool HasMinSize) { 124 // Initialize CPU specific properties. We should add a tablegen feature for 125 // this in the future so we can specify it together with the subtarget 126 // features. 127 switch (ARMProcFamily) { 128 case Others: 129 break; 130 case Carmel: 131 CacheLineSize = 64; 132 break; 133 case CortexA35: 134 case CortexA53: 135 case CortexA55: 136 case CortexR82: 137 case CortexR82AE: 138 PrefFunctionAlignment = Align(16); 139 PrefLoopAlignment = Align(16); 140 MaxBytesForLoopAlignment = 8; 141 break; 142 case CortexA57: 143 MaxInterleaveFactor = 4; 144 PrefFunctionAlignment = Align(16); 145 PrefLoopAlignment = Align(16); 146 MaxBytesForLoopAlignment = 8; 147 break; 148 case CortexA65: 149 PrefFunctionAlignment = Align(8); 150 break; 151 case CortexA72: 152 case CortexA73: 153 case CortexA75: 154 PrefFunctionAlignment = Align(16); 155 PrefLoopAlignment = Align(16); 156 MaxBytesForLoopAlignment = 8; 157 break; 158 case CortexA76: 159 case CortexA77: 160 case CortexA78: 161 case CortexA78AE: 162 case CortexA78C: 163 case CortexX1: 164 PrefFunctionAlignment = Align(16); 165 PrefLoopAlignment = Align(32); 166 MaxBytesForLoopAlignment = 16; 167 break; 168 case CortexA510: 169 case CortexA520: 170 PrefFunctionAlignment = Align(16); 171 VScaleForTuning = 1; 172 PrefLoopAlignment = Align(16); 173 MaxBytesForLoopAlignment = 8; 174 break; 175 case CortexA710: 176 case CortexA715: 177 case CortexA720: 178 case CortexA725: 179 case CortexX2: 180 case CortexX3: 181 case CortexX4: 182 case CortexX925: 183 PrefFunctionAlignment = Align(16); 184 VScaleForTuning = 1; 185 PrefLoopAlignment = Align(32); 186 MaxBytesForLoopAlignment = 16; 187 break; 188 case A64FX: 189 CacheLineSize = 256; 190 PrefFunctionAlignment = Align(8); 191 PrefLoopAlignment = Align(4); 192 MaxInterleaveFactor = 4; 193 PrefetchDistance = 128; 194 MinPrefetchStride = 1024; 195 MaxPrefetchIterationsAhead = 4; 196 VScaleForTuning = 4; 197 break; 198 case MONAKA: 199 VScaleForTuning = 2; 200 break; 201 case AppleA7: 202 case AppleA10: 203 case AppleA11: 204 case AppleA12: 205 case AppleA13: 206 case AppleA14: 207 case AppleA15: 208 case AppleA16: 209 case AppleA17: 210 case AppleM4: 211 CacheLineSize = 64; 212 PrefetchDistance = 280; 213 MinPrefetchStride = 2048; 214 MaxPrefetchIterationsAhead = 3; 215 switch (ARMProcFamily) { 216 case AppleA14: 217 case AppleA15: 218 case AppleA16: 219 case AppleA17: 220 case AppleM4: 221 MaxInterleaveFactor = 4; 222 break; 223 default: 224 break; 225 } 226 break; 227 case ExynosM3: 228 MaxInterleaveFactor = 4; 229 MaxJumpTableSize = 20; 230 PrefFunctionAlignment = Align(32); 231 PrefLoopAlignment = Align(16); 232 break; 233 case Falkor: 234 MaxInterleaveFactor = 4; 235 // FIXME: remove this to enable 64-bit SLP if performance looks good. 236 MinVectorRegisterBitWidth = 128; 237 CacheLineSize = 128; 238 PrefetchDistance = 820; 239 MinPrefetchStride = 2048; 240 MaxPrefetchIterationsAhead = 8; 241 break; 242 case Kryo: 243 MaxInterleaveFactor = 4; 244 VectorInsertExtractBaseCost = 2; 245 CacheLineSize = 128; 246 PrefetchDistance = 740; 247 MinPrefetchStride = 1024; 248 MaxPrefetchIterationsAhead = 11; 249 // FIXME: remove this to enable 64-bit SLP if performance looks good. 250 MinVectorRegisterBitWidth = 128; 251 break; 252 case NeoverseE1: 253 PrefFunctionAlignment = Align(8); 254 break; 255 case NeoverseN1: 256 PrefFunctionAlignment = Align(16); 257 PrefLoopAlignment = Align(32); 258 MaxBytesForLoopAlignment = 16; 259 break; 260 case NeoverseV2: 261 case NeoverseV3: 262 EpilogueVectorizationMinVF = 8; 263 MaxInterleaveFactor = 4; 264 ScatterOverhead = 13; 265 LLVM_FALLTHROUGH; 266 case NeoverseN2: 267 case NeoverseN3: 268 PrefFunctionAlignment = Align(16); 269 PrefLoopAlignment = Align(32); 270 MaxBytesForLoopAlignment = 16; 271 VScaleForTuning = 1; 272 break; 273 case NeoverseV1: 274 PrefFunctionAlignment = Align(16); 275 PrefLoopAlignment = Align(32); 276 MaxBytesForLoopAlignment = 16; 277 VScaleForTuning = 2; 278 DefaultSVETFOpts = TailFoldingOpts::Simple; 279 break; 280 case Neoverse512TVB: 281 PrefFunctionAlignment = Align(16); 282 VScaleForTuning = 1; 283 MaxInterleaveFactor = 4; 284 break; 285 case Saphira: 286 MaxInterleaveFactor = 4; 287 // FIXME: remove this to enable 64-bit SLP if performance looks good. 288 MinVectorRegisterBitWidth = 128; 289 break; 290 case ThunderX2T99: 291 CacheLineSize = 64; 292 PrefFunctionAlignment = Align(8); 293 PrefLoopAlignment = Align(4); 294 MaxInterleaveFactor = 4; 295 PrefetchDistance = 128; 296 MinPrefetchStride = 1024; 297 MaxPrefetchIterationsAhead = 4; 298 // FIXME: remove this to enable 64-bit SLP if performance looks good. 299 MinVectorRegisterBitWidth = 128; 300 break; 301 case ThunderX: 302 case ThunderXT88: 303 case ThunderXT81: 304 case ThunderXT83: 305 CacheLineSize = 128; 306 PrefFunctionAlignment = Align(8); 307 PrefLoopAlignment = Align(4); 308 // FIXME: remove this to enable 64-bit SLP if performance looks good. 309 MinVectorRegisterBitWidth = 128; 310 break; 311 case TSV110: 312 CacheLineSize = 64; 313 PrefFunctionAlignment = Align(16); 314 PrefLoopAlignment = Align(4); 315 break; 316 case ThunderX3T110: 317 CacheLineSize = 64; 318 PrefFunctionAlignment = Align(16); 319 PrefLoopAlignment = Align(4); 320 MaxInterleaveFactor = 4; 321 PrefetchDistance = 128; 322 MinPrefetchStride = 1024; 323 MaxPrefetchIterationsAhead = 4; 324 // FIXME: remove this to enable 64-bit SLP if performance looks good. 325 MinVectorRegisterBitWidth = 128; 326 break; 327 case Ampere1: 328 case Ampere1A: 329 case Ampere1B: 330 CacheLineSize = 64; 331 PrefFunctionAlignment = Align(64); 332 PrefLoopAlignment = Align(64); 333 MaxInterleaveFactor = 4; 334 break; 335 case Oryon: 336 CacheLineSize = 64; 337 PrefFunctionAlignment = Align(16); 338 MaxInterleaveFactor = 4; 339 PrefetchDistance = 128; 340 MinPrefetchStride = 1024; 341 break; 342 } 343 344 if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize) 345 MinimumJumpTableEntries = AArch64MinimumJumpTableEntries; 346 } 347 348 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, 349 StringRef TuneCPU, StringRef FS, 350 const TargetMachine &TM, bool LittleEndian, 351 unsigned MinSVEVectorSizeInBitsOverride, 352 unsigned MaxSVEVectorSizeInBitsOverride, 353 bool IsStreaming, bool IsStreamingCompatible, 354 bool HasMinSize) 355 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), 356 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), 357 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()), 358 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), 359 IsLittle(LittleEndian), IsStreaming(IsStreaming), 360 IsStreamingCompatible(IsStreamingCompatible), 361 StreamingHazardSize( 362 AArch64StreamingHazardSize.getNumOccurrences() > 0 363 ? std::optional<unsigned>(AArch64StreamingHazardSize) 364 : std::nullopt), 365 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), 366 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), 367 InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)), 368 TLInfo(TM, *this) { 369 if (AArch64::isX18ReservedByDefault(TT)) 370 ReserveXRegister.set(18); 371 372 CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); 373 InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); 374 Legalizer.reset(new AArch64LegalizerInfo(*this)); 375 376 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 377 378 // FIXME: At this point, we can't rely on Subtarget having RBI. 379 // It's awkward to mix passing RBI and the Subtarget; should we pass 380 // TII/TRI as well? 381 InstSelector.reset(createAArch64InstructionSelector( 382 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 383 384 RegBankInfo.reset(RBI); 385 386 auto TRI = getRegisterInfo(); 387 StringSet<> ReservedRegNames; 388 ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end()); 389 for (unsigned i = 0; i < 29; ++i) { 390 if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i))) 391 ReserveXRegisterForRA.set(i); 392 } 393 // X30 is named LR, so we can't use TRI->getName to check X30. 394 if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR")) 395 ReserveXRegisterForRA.set(30); 396 // X29 is named FP, so we can't use TRI->getName to check X29. 397 if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP")) 398 ReserveXRegisterForRA.set(29); 399 400 EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); 401 } 402 403 const CallLowering *AArch64Subtarget::getCallLowering() const { 404 return CallLoweringInfo.get(); 405 } 406 407 const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const { 408 return InlineAsmLoweringInfo.get(); 409 } 410 411 InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 412 return InstSelector.get(); 413 } 414 415 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 416 return Legalizer.get(); 417 } 418 419 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 420 return RegBankInfo.get(); 421 } 422 423 /// Find the target operand flags that describe how a global value should be 424 /// referenced for the current subtarget. 425 unsigned 426 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 427 const TargetMachine &TM) const { 428 // MachO large model always goes via a GOT, simply to get a single 8-byte 429 // absolute relocation on all global addresses. 430 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 431 return AArch64II::MO_GOT; 432 433 // All globals dynamically protected by MTE must have their address tags 434 // synthesized. This is done by having the loader stash the tag in the GOT 435 // entry. Force all tagged globals (even ones with internal linkage) through 436 // the GOT. 437 if (GV->isTagged()) 438 return AArch64II::MO_GOT; 439 440 if (!TM.shouldAssumeDSOLocal(GV)) { 441 if (GV->hasDLLImportStorageClass()) { 442 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 443 } 444 if (getTargetTriple().isOSWindows()) 445 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; 446 return AArch64II::MO_GOT; 447 } 448 449 // The small code model's direct accesses use ADRP, which cannot 450 // necessarily produce the value 0 (if the code is above 4GB). 451 // Same for the tiny code model, where we have a pc relative LDR. 452 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && 453 GV->hasExternalWeakLinkage()) 454 return AArch64II::MO_GOT; 455 456 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate 457 // that their nominal addresses are tagged and outside of the code model. In 458 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the 459 // tag if necessary based on MO_TAGGED. 460 if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType())) 461 return AArch64II::MO_NC | AArch64II::MO_TAGGED; 462 463 return AArch64II::MO_NO_FLAG; 464 } 465 466 unsigned AArch64Subtarget::classifyGlobalFunctionReference( 467 const GlobalValue *GV, const TargetMachine &TM) const { 468 // MachO large model always goes via a GOT, because we don't have the 469 // relocations available to do anything else.. 470 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 471 !GV->hasInternalLinkage()) 472 return AArch64II::MO_GOT; 473 474 // NonLazyBind goes via GOT unless we know it's available locally. 475 auto *F = dyn_cast<Function>(GV); 476 if ((!isTargetMachO() || MachOUseNonLazyBind) && F && 477 F->hasFnAttribute(Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV)) 478 return AArch64II::MO_GOT; 479 480 if (getTargetTriple().isOSWindows()) { 481 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) { 482 if (GV->hasDLLImportStorageClass()) { 483 // On Arm64EC, if we're calling a symbol from the import table 484 // directly, use MO_ARM64EC_CALLMANGLE. 485 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT | 486 AArch64II::MO_ARM64EC_CALLMANGLE; 487 } 488 if (GV->hasExternalLinkage()) { 489 // If we're calling a symbol directly, use the mangled form in the 490 // call instruction. 491 return AArch64II::MO_ARM64EC_CALLMANGLE; 492 } 493 } 494 495 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB. 496 return ClassifyGlobalReference(GV, TM); 497 } 498 499 return AArch64II::MO_NO_FLAG; 500 } 501 502 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 503 unsigned NumRegionInstrs) const { 504 // LNT run (at least on Cyclone) showed reasonably significant gains for 505 // bi-directional scheduling. 253.perlbmk. 506 Policy.OnlyTopDown = false; 507 Policy.OnlyBottomUp = false; 508 // Enabling or Disabling the latency heuristic is a close call: It seems to 509 // help nearly no benchmark on out-of-order architectures, on the other hand 510 // it regresses register pressure on a few benchmarking. 511 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 512 } 513 514 void AArch64Subtarget::adjustSchedDependency( 515 SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, 516 const TargetSchedModel *SchedModel) const { 517 if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() || 518 !Def->isInstr() || !Use->isInstr() || 519 (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE && 520 Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE)) 521 return; 522 523 // If the Def is a BUNDLE, find the last instruction in the bundle that defs 524 // the register. 525 const MachineInstr *DefMI = Def->getInstr(); 526 if (DefMI->getOpcode() == TargetOpcode::BUNDLE) { 527 Register Reg = DefMI->getOperand(DefOpIdx).getReg(); 528 for (const auto &Op : const_mi_bundle_ops(*DefMI)) { 529 if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) { 530 DefMI = Op.getParent(); 531 DefOpIdx = Op.getOperandNo(); 532 } 533 } 534 } 535 536 // If the Use is a BUNDLE, find the first instruction that uses the Reg. 537 const MachineInstr *UseMI = Use->getInstr(); 538 if (UseMI->getOpcode() == TargetOpcode::BUNDLE) { 539 Register Reg = UseMI->getOperand(UseOpIdx).getReg(); 540 for (const auto &Op : const_mi_bundle_ops(*UseMI)) { 541 if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) { 542 UseMI = Op.getParent(); 543 UseOpIdx = Op.getOperandNo(); 544 break; 545 } 546 } 547 } 548 549 Dep.setLatency( 550 SchedModel->computeOperandLatency(DefMI, DefOpIdx, UseMI, UseOpIdx)); 551 } 552 553 bool AArch64Subtarget::enableEarlyIfConversion() const { 554 return EnableEarlyIfConvert; 555 } 556 557 bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 558 if (!UseAddressTopByteIgnored) 559 return false; 560 561 if (TargetTriple.isDriverKit()) 562 return true; 563 if (TargetTriple.isiOS()) { 564 return TargetTriple.getiOSVersion() >= VersionTuple(8); 565 } 566 567 return false; 568 } 569 570 std::unique_ptr<PBQPRAConstraint> 571 AArch64Subtarget::getCustomPBQPConstraints() const { 572 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr; 573 } 574 575 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { 576 // We usually compute max call frame size after ISel. Do the computation now 577 // if the .mir file didn't specify it. Note that this will probably give you 578 // bogus values after PEI has eliminated the callframe setup/destroy pseudo 579 // instructions, specify explicitly if you need it to be correct. 580 MachineFrameInfo &MFI = MF.getFrameInfo(); 581 if (!MFI.isMaxCallFrameSizeComputed()) 582 MFI.computeMaxCallFrameSize(MF); 583 } 584 585 bool AArch64Subtarget::useAA() const { return UseAA; } 586 587 bool AArch64Subtarget::useScalarIncVL() const { 588 // If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL 589 // is not otherwise set, enable it by default. 590 if (UseScalarIncVL.getNumOccurrences()) 591 return UseScalarIncVL; 592 return hasSVE2() || hasSME(); 593 } 594 595 // If return address signing is enabled, tail calls are emitted as follows: 596 // 597 // ``` 598 // <authenticate LR> 599 // <check LR> 600 // TCRETURN ; the callee may sign and spill the LR in its prologue 601 // ``` 602 // 603 // LR may require explicit checking because if FEAT_FPAC is not implemented 604 // and LR was tampered with, then `<authenticate LR>` will not generate an 605 // exception on its own. Later, if the callee spills the signed LR value and 606 // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces 607 // the higher bits of LR thus hiding the authentication failure. 608 AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod( 609 const MachineFunction &MF) const { 610 // TODO: Check subtarget for the scheme. Present variant is a default for 611 // pauthtest ABI. 612 if (MF.getFunction().hasFnAttribute("ptrauth-returns") && 613 MF.getFunction().hasFnAttribute("ptrauth-auth-traps")) 614 return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI; 615 if (AuthenticatedLRCheckMethod.getNumOccurrences()) 616 return AuthenticatedLRCheckMethod; 617 618 // At now, use None by default because checks may introduce an unexpected 619 // performance regression or incompatibility with execute-only mappings. 620 return AArch64PAuth::AuthCheckMethod::None; 621 } 622 623 std::optional<uint16_t> 624 AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled( 625 const Function &ParentFn) const { 626 if (!ParentFn.hasFnAttribute("ptrauth-indirect-gotos")) 627 return std::nullopt; 628 // We currently have one simple mechanism for all targets. 629 // This isn't ABI, so we can always do better in the future. 630 return getPointerAuthStableSipHash( 631 (Twine(ParentFn.getName()) + " blockaddress").str()); 632 } 633 634 bool AArch64Subtarget::enableMachinePipeliner() const { 635 return getSchedModel().hasInstrSchedModel(); 636 } 637