1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file declares the AArch64 specific subclass of TargetSubtarget. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 15 16 #include "AArch64FrameLowering.h" 17 #include "AArch64ISelLowering.h" 18 #include "AArch64InstrInfo.h" 19 #include "AArch64PointerAuth.h" 20 #include "AArch64RegisterInfo.h" 21 #include "AArch64SelectionDAGInfo.h" 22 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 23 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 26 #include "llvm/CodeGen/RegisterBankInfo.h" 27 #include "llvm/CodeGen/TargetSubtargetInfo.h" 28 #include "llvm/IR/DataLayout.h" 29 30 #define GET_SUBTARGETINFO_HEADER 31 #include "AArch64GenSubtargetInfo.inc" 32 33 namespace llvm { 34 class GlobalValue; 35 class StringRef; 36 class Triple; 37 38 class AArch64Subtarget final : public AArch64GenSubtargetInfo { 39 public: 40 enum ARMProcFamilyEnum : uint8_t { 41 Others, 42 #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, 43 #include "llvm/TargetParser/AArch64TargetParserDef.inc" 44 #undef ARM_PROCESSOR_FAMILY 45 }; 46 47 protected: 48 /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. 49 ARMProcFamilyEnum ARMProcFamily = Others; 50 51 // Enable 64-bit vectorization in SLP. 52 unsigned MinVectorRegisterBitWidth = 64; 53 54 // Bool members corresponding to the SubtargetFeatures defined in tablegen 55 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ 56 bool ATTRIBUTE = DEFAULT; 57 #include "AArch64GenSubtargetInfo.inc" 58 59 unsigned EpilogueVectorizationMinVF = 16; 60 uint8_t MaxInterleaveFactor = 2; 61 uint8_t VectorInsertExtractBaseCost = 2; 62 uint16_t CacheLineSize = 0; 63 // Default scatter/gather overhead. 64 unsigned ScatterOverhead = 10; 65 unsigned GatherOverhead = 10; 66 uint16_t PrefetchDistance = 0; 67 uint16_t MinPrefetchStride = 1; 68 unsigned MaxPrefetchIterationsAhead = UINT_MAX; 69 Align PrefFunctionAlignment; 70 Align PrefLoopAlignment; 71 unsigned MaxBytesForLoopAlignment = 0; 72 unsigned MinimumJumpTableEntries = 4; 73 unsigned MaxJumpTableSize = 0; 74 75 // ReserveXRegister[i] - X#i is not available as a general purpose register. 76 BitVector ReserveXRegister; 77 78 // ReserveXRegisterForRA[i] - X#i is not available for register allocator. 79 BitVector ReserveXRegisterForRA; 80 81 // CustomCallUsedXRegister[i] - X#i call saved. 82 BitVector CustomCallSavedXRegs; 83 84 bool IsLittle; 85 86 bool IsStreaming; 87 bool IsStreamingCompatible; 88 std::optional<unsigned> StreamingHazardSize; 89 unsigned MinSVEVectorSizeInBits; 90 unsigned MaxSVEVectorSizeInBits; 91 unsigned VScaleForTuning = 1; 92 TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; 93 94 bool EnableSubregLiveness; 95 96 /// TargetTriple - What processor and OS we're targeting. 97 Triple TargetTriple; 98 99 AArch64FrameLowering FrameLowering; 100 AArch64InstrInfo InstrInfo; 101 AArch64SelectionDAGInfo TSInfo; 102 AArch64TargetLowering TLInfo; 103 104 /// GlobalISel related APIs. 105 std::unique_ptr<CallLowering> CallLoweringInfo; 106 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 107 std::unique_ptr<InstructionSelector> InstSelector; 108 std::unique_ptr<LegalizerInfo> Legalizer; 109 std::unique_ptr<RegisterBankInfo> RegBankInfo; 110 111 private: 112 /// initializeSubtargetDependencies - Initializes using CPUString and the 113 /// passed in feature string so that we can use initializer lists for 114 /// subtarget initialization. 115 AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, 116 StringRef CPUString, 117 StringRef TuneCPUString, 118 bool HasMinSize); 119 120 /// Initialize properties based on the selected processor family. 121 void initializeProperties(bool HasMinSize); 122 123 public: 124 /// This constructor initializes the data members to match that 125 /// of the specified triple. 126 AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, 127 StringRef FS, const TargetMachine &TM, bool LittleEndian, 128 unsigned MinSVEVectorSizeInBitsOverride = 0, 129 unsigned MaxSVEVectorSizeInBitsOverride = 0, 130 bool IsStreaming = false, bool IsStreamingCompatible = false, 131 bool HasMinSize = false); 132 133 // Getters for SubtargetFeatures defined in tablegen 134 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ 135 bool GETTER() const { return ATTRIBUTE; } 136 #include "AArch64GenSubtargetInfo.inc" 137 138 const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { 139 return &TSInfo; 140 } 141 const AArch64FrameLowering *getFrameLowering() const override { 142 return &FrameLowering; 143 } 144 const AArch64TargetLowering *getTargetLowering() const override { 145 return &TLInfo; 146 } 147 const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } 148 const AArch64RegisterInfo *getRegisterInfo() const override { 149 return &getInstrInfo()->getRegisterInfo(); 150 } 151 const CallLowering *getCallLowering() const override; 152 const InlineAsmLowering *getInlineAsmLowering() const override; 153 InstructionSelector *getInstructionSelector() const override; 154 const LegalizerInfo *getLegalizerInfo() const override; 155 const RegisterBankInfo *getRegBankInfo() const override; 156 const Triple &getTargetTriple() const { return TargetTriple; } 157 bool enableMachineScheduler() const override { return true; } 158 bool enablePostRAScheduler() const override { return usePostRAScheduler(); } 159 bool enableSubRegLiveness() const override { return EnableSubregLiveness; } 160 161 bool enableMachinePipeliner() const override; 162 bool useDFAforSMS() const override { return false; } 163 164 /// Returns ARM processor family. 165 /// Avoid this function! CPU specifics should be kept local to this class 166 /// and preferably modeled with SubtargetFeatures or properties in 167 /// initializeProperties(). 168 ARMProcFamilyEnum getProcFamily() const { 169 return ARMProcFamily; 170 } 171 172 bool isXRaySupported() const override { return true; } 173 174 /// Returns true if the function has a streaming body. 175 bool isStreaming() const { return IsStreaming; } 176 177 /// Returns true if the function has a streaming-compatible body. 178 bool isStreamingCompatible() const { return IsStreamingCompatible; } 179 180 /// Returns the size of memory region that if accessed by both the CPU and 181 /// the SME unit could result in a hazard. 0 = disabled. 182 unsigned getStreamingHazardSize() const { 183 return StreamingHazardSize.value_or( 184 !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0); 185 } 186 187 /// Returns true if the target has NEON and the function at runtime is known 188 /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE 189 /// mode, which disables NEON instructions). 190 bool isNeonAvailable() const { 191 return hasNEON() && 192 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 193 } 194 195 /// Returns true if the target has SVE and can use the full range of SVE 196 /// instructions, for example because it knows the function is known not to be 197 /// in streaming-SVE mode or when the target has FEAT_FA64 enabled. 198 bool isSVEAvailable() const { 199 return hasSVE() && 200 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 201 } 202 203 /// Returns true if the target has access to the streaming-compatible subset 204 /// of SVE instructions. 205 bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); } 206 207 /// Returns true if the target has access to either the full range of SVE 208 /// instructions, or the streaming-compatible subset of SVE instructions. 209 bool isSVEorStreamingSVEAvailable() const { 210 return hasSVE() || isStreamingSVEAvailable(); 211 } 212 213 unsigned getMinVectorRegisterBitWidth() const { 214 // Don't assume any minimum vector size when PSTATE.SM may not be 0, because 215 // we don't yet support streaming-compatible codegen support that we trust 216 // is safe for functions that may be executed in streaming-SVE mode. 217 // By returning '0' here, we disable vectorization. 218 if (!isSVEAvailable() && !isNeonAvailable()) 219 return 0; 220 return MinVectorRegisterBitWidth; 221 } 222 223 bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } 224 bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; } 225 unsigned getNumXRegisterReserved() const { 226 BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs()); 227 AllReservedX |= ReserveXRegister; 228 AllReservedX |= ReserveXRegisterForRA; 229 return AllReservedX.count(); 230 } 231 bool isLRReservedForRA() const { return ReserveLRForRA; } 232 bool isXRegCustomCalleeSaved(size_t i) const { 233 return CustomCallSavedXRegs[i]; 234 } 235 bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } 236 237 /// Return true if the CPU supports any kind of instruction fusion. 238 bool hasFusion() const { 239 return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || 240 hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || 241 hasFuseAdrpAdd() || hasFuseLiterals(); 242 } 243 244 unsigned getEpilogueVectorizationMinVF() const { 245 return EpilogueVectorizationMinVF; 246 } 247 unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } 248 unsigned getVectorInsertExtractBaseCost() const; 249 unsigned getCacheLineSize() const override { return CacheLineSize; } 250 unsigned getScatterOverhead() const { return ScatterOverhead; } 251 unsigned getGatherOverhead() const { return GatherOverhead; } 252 unsigned getPrefetchDistance() const override { return PrefetchDistance; } 253 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 254 unsigned NumStridedMemAccesses, 255 unsigned NumPrefetches, 256 bool HasCall) const override { 257 return MinPrefetchStride; 258 } 259 unsigned getMaxPrefetchIterationsAhead() const override { 260 return MaxPrefetchIterationsAhead; 261 } 262 Align getPrefFunctionAlignment() const { 263 return PrefFunctionAlignment; 264 } 265 Align getPrefLoopAlignment() const { return PrefLoopAlignment; } 266 267 unsigned getMaxBytesForLoopAlignment() const { 268 return MaxBytesForLoopAlignment; 269 } 270 271 unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } 272 unsigned getMinimumJumpTableEntries() const { 273 return MinimumJumpTableEntries; 274 } 275 276 /// CPU has TBI (top byte of addresses is ignored during HW address 277 /// translation) and OS enables it. 278 bool supportsAddressTopByteIgnored() const; 279 280 bool isLittleEndian() const { return IsLittle; } 281 282 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } 283 bool isTargetIOS() const { return TargetTriple.isiOS(); } 284 bool isTargetLinux() const { return TargetTriple.isOSLinux(); } 285 bool isTargetWindows() const { return TargetTriple.isOSWindows(); } 286 bool isTargetAndroid() const { return TargetTriple.isAndroid(); } 287 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } 288 bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); } 289 290 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } 291 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } 292 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } 293 294 bool isTargetILP32() const { 295 return TargetTriple.isArch32Bit() || 296 TargetTriple.getEnvironment() == Triple::GNUILP32; 297 } 298 299 bool useAA() const override; 300 301 bool addrSinkUsingGEPs() const override { 302 // Keeping GEPs inbounds is important for exploiting AArch64 303 // addressing-modes in ILP32 mode. 304 return useAA() || isTargetILP32(); 305 } 306 307 bool useSmallAddressing() const { 308 switch (TLInfo.getTargetMachine().getCodeModel()) { 309 case CodeModel::Kernel: 310 // Kernel is currently allowed only for Fuchsia targets, 311 // where it is the same as Small for almost all purposes. 312 case CodeModel::Small: 313 return true; 314 default: 315 return false; 316 } 317 } 318 319 /// ParseSubtargetFeatures - Parses features string setting specified 320 /// subtarget options. Definition of function is auto generated by tblgen. 321 void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 322 323 /// ClassifyGlobalReference - Find the target operand flags that describe 324 /// how a global value should be referenced for the current subtarget. 325 unsigned ClassifyGlobalReference(const GlobalValue *GV, 326 const TargetMachine &TM) const; 327 328 unsigned classifyGlobalFunctionReference(const GlobalValue *GV, 329 const TargetMachine &TM) const; 330 331 /// This function is design to compatible with the function def in other 332 /// targets and escape build error about the virtual function def in base 333 /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it. 334 unsigned char 335 classifyGlobalFunctionReference(const GlobalValue *GV) const override { 336 return 0; 337 } 338 339 void overrideSchedPolicy(MachineSchedPolicy &Policy, 340 unsigned NumRegionInstrs) const override; 341 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 342 SDep &Dep, 343 const TargetSchedModel *SchedModel) const override; 344 345 bool enableEarlyIfConversion() const override; 346 347 std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; 348 349 bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const { 350 switch (CC) { 351 case CallingConv::C: 352 case CallingConv::Fast: 353 case CallingConv::Swift: 354 case CallingConv::SwiftTail: 355 return isTargetWindows(); 356 case CallingConv::PreserveNone: 357 return IsVarArg && isTargetWindows(); 358 case CallingConv::Win64: 359 return true; 360 default: 361 return false; 362 } 363 } 364 365 /// Return whether FrameLowering should always set the "extended frame 366 /// present" bit in FP, or set it based on a symbol in the runtime. 367 bool swiftAsyncContextIsDynamicallySet() const { 368 // Older OS versions (particularly system unwinders) are confused by the 369 // Swift extended frame, so when building code that might be run on them we 370 // must dynamically query the concurrency library to determine whether 371 // extended frames should be flagged as present. 372 const Triple &TT = getTargetTriple(); 373 374 unsigned Major = TT.getOSVersion().getMajor(); 375 switch(TT.getOS()) { 376 default: 377 return false; 378 case Triple::IOS: 379 case Triple::TvOS: 380 return Major < 15; 381 case Triple::WatchOS: 382 return Major < 8; 383 case Triple::MacOSX: 384 case Triple::Darwin: 385 return Major < 12; 386 } 387 } 388 389 void mirFileLoaded(MachineFunction &MF) const override; 390 391 // Return the known range for the bit length of SVE data registers. A value 392 // of 0 means nothing is known about that particular limit beyong what's 393 // implied by the architecture. 394 unsigned getMaxSVEVectorSizeInBits() const { 395 assert(isSVEorStreamingSVEAvailable() && 396 "Tried to get SVE vector length without SVE support!"); 397 return MaxSVEVectorSizeInBits; 398 } 399 400 unsigned getMinSVEVectorSizeInBits() const { 401 assert(isSVEorStreamingSVEAvailable() && 402 "Tried to get SVE vector length without SVE support!"); 403 return MinSVEVectorSizeInBits; 404 } 405 406 bool useSVEForFixedLengthVectors() const { 407 if (!isSVEorStreamingSVEAvailable()) 408 return false; 409 410 // Prefer NEON unless larger SVE registers are available. 411 return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256; 412 } 413 414 bool useSVEForFixedLengthVectors(EVT VT) const { 415 if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector()) 416 return false; 417 return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock || 418 !isNeonAvailable(); 419 } 420 421 unsigned getVScaleForTuning() const { return VScaleForTuning; } 422 423 TailFoldingOpts getSVETailFoldingDefaultOpts() const { 424 return DefaultSVETFOpts; 425 } 426 427 /// Returns true to use the addvl/inc/dec instructions, as opposed to separate 428 /// add + cnt instructions. 429 bool useScalarIncVL() const; 430 431 const char* getChkStkName() const { 432 if (isWindowsArm64EC()) 433 return "#__chkstk_arm64ec"; 434 return "__chkstk"; 435 } 436 437 const char* getSecurityCheckCookieName() const { 438 if (isWindowsArm64EC()) 439 return "#__security_check_cookie_arm64ec"; 440 return "__security_check_cookie"; 441 } 442 443 /// Choose a method of checking LR before performing a tail call. 444 AArch64PAuth::AuthCheckMethod 445 getAuthenticatedLRCheckMethod(const MachineFunction &MF) const; 446 447 /// Compute the integer discriminator for a given BlockAddress constant, if 448 /// blockaddress signing is enabled, or std::nullopt otherwise. 449 /// Blockaddress signing is controlled by the function attribute 450 /// "ptrauth-indirect-gotos" on the parent function. 451 /// Note that this assumes the discriminator is independent of the indirect 452 /// goto branch site itself, i.e., it's the same for all BlockAddresses in 453 /// a function. 454 std::optional<uint16_t> 455 getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const; 456 }; 457 } // End llvm namespace 458 459 #endif 460