xref: /llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h (revision 6e1ea7e5a7b6e581bf9a030b98a7f63ee2833278)
1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15 
16 #include "AArch64FrameLowering.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64PointerAuth.h"
20 #include "AArch64RegisterInfo.h"
21 #include "AArch64SelectionDAGInfo.h"
22 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
23 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
26 #include "llvm/CodeGen/RegisterBankInfo.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 
30 #define GET_SUBTARGETINFO_HEADER
31 #include "AArch64GenSubtargetInfo.inc"
32 
33 namespace llvm {
34 class GlobalValue;
35 class StringRef;
36 class Triple;
37 
38 class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39 public:
40   enum ARMProcFamilyEnum : uint8_t {
41     Others,
42 #define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
43 #include "llvm/TargetParser/AArch64TargetParserDef.inc"
44 #undef ARM_PROCESSOR_FAMILY
45   };
46 
47 protected:
48   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49   ARMProcFamilyEnum ARMProcFamily = Others;
50 
51   // Enable 64-bit vectorization in SLP.
52   unsigned MinVectorRegisterBitWidth = 64;
53 
54 // Bool members corresponding to the SubtargetFeatures defined in tablegen
55 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
56   bool ATTRIBUTE = DEFAULT;
57 #include "AArch64GenSubtargetInfo.inc"
58 
59   unsigned EpilogueVectorizationMinVF = 16;
60   uint8_t MaxInterleaveFactor = 2;
61   uint8_t VectorInsertExtractBaseCost = 2;
62   uint16_t CacheLineSize = 0;
63   // Default scatter/gather overhead.
64   unsigned ScatterOverhead = 10;
65   unsigned GatherOverhead = 10;
66   uint16_t PrefetchDistance = 0;
67   uint16_t MinPrefetchStride = 1;
68   unsigned MaxPrefetchIterationsAhead = UINT_MAX;
69   Align PrefFunctionAlignment;
70   Align PrefLoopAlignment;
71   unsigned MaxBytesForLoopAlignment = 0;
72   unsigned MinimumJumpTableEntries = 4;
73   unsigned MaxJumpTableSize = 0;
74 
75   // ReserveXRegister[i] - X#i is not available as a general purpose register.
76   BitVector ReserveXRegister;
77 
78   // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
79   BitVector ReserveXRegisterForRA;
80 
81   // CustomCallUsedXRegister[i] - X#i call saved.
82   BitVector CustomCallSavedXRegs;
83 
84   bool IsLittle;
85 
86   bool IsStreaming;
87   bool IsStreamingCompatible;
88   std::optional<unsigned> StreamingHazardSize;
89   unsigned MinSVEVectorSizeInBits;
90   unsigned MaxSVEVectorSizeInBits;
91   unsigned VScaleForTuning = 1;
92   TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
93 
94   bool EnableSubregLiveness;
95 
96   /// TargetTriple - What processor and OS we're targeting.
97   Triple TargetTriple;
98 
99   AArch64FrameLowering FrameLowering;
100   AArch64InstrInfo InstrInfo;
101   AArch64SelectionDAGInfo TSInfo;
102   AArch64TargetLowering TLInfo;
103 
104   /// GlobalISel related APIs.
105   std::unique_ptr<CallLowering> CallLoweringInfo;
106   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
107   std::unique_ptr<InstructionSelector> InstSelector;
108   std::unique_ptr<LegalizerInfo> Legalizer;
109   std::unique_ptr<RegisterBankInfo> RegBankInfo;
110 
111 private:
112   /// initializeSubtargetDependencies - Initializes using CPUString and the
113   /// passed in feature string so that we can use initializer lists for
114   /// subtarget initialization.
115   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
116                                                     StringRef CPUString,
117                                                     StringRef TuneCPUString,
118                                                     bool HasMinSize);
119 
120   /// Initialize properties based on the selected processor family.
121   void initializeProperties(bool HasMinSize);
122 
123 public:
124   /// This constructor initializes the data members to match that
125   /// of the specified triple.
126   AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
127                    StringRef FS, const TargetMachine &TM, bool LittleEndian,
128                    unsigned MinSVEVectorSizeInBitsOverride = 0,
129                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
130                    bool IsStreaming = false, bool IsStreamingCompatible = false,
131                    bool HasMinSize = false);
132 
133 // Getters for SubtargetFeatures defined in tablegen
134 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
135   bool GETTER() const { return ATTRIBUTE; }
136 #include "AArch64GenSubtargetInfo.inc"
137 
138   const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
139     return &TSInfo;
140   }
141   const AArch64FrameLowering *getFrameLowering() const override {
142     return &FrameLowering;
143   }
144   const AArch64TargetLowering *getTargetLowering() const override {
145     return &TLInfo;
146   }
147   const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
148   const AArch64RegisterInfo *getRegisterInfo() const override {
149     return &getInstrInfo()->getRegisterInfo();
150   }
151   const CallLowering *getCallLowering() const override;
152   const InlineAsmLowering *getInlineAsmLowering() const override;
153   InstructionSelector *getInstructionSelector() const override;
154   const LegalizerInfo *getLegalizerInfo() const override;
155   const RegisterBankInfo *getRegBankInfo() const override;
156   const Triple &getTargetTriple() const { return TargetTriple; }
157   bool enableMachineScheduler() const override { return true; }
158   bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
159   bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
160 
161   bool enableMachinePipeliner() const override;
162   bool useDFAforSMS() const override { return false; }
163 
164   /// Returns ARM processor family.
165   /// Avoid this function! CPU specifics should be kept local to this class
166   /// and preferably modeled with SubtargetFeatures or properties in
167   /// initializeProperties().
168   ARMProcFamilyEnum getProcFamily() const {
169     return ARMProcFamily;
170   }
171 
172   bool isXRaySupported() const override { return true; }
173 
174   /// Returns true if the function has a streaming body.
175   bool isStreaming() const { return IsStreaming; }
176 
177   /// Returns true if the function has a streaming-compatible body.
178   bool isStreamingCompatible() const { return IsStreamingCompatible; }
179 
180   /// Returns the size of memory region that if accessed by both the CPU and
181   /// the SME unit could result in a hazard. 0 = disabled.
182   unsigned getStreamingHazardSize() const {
183     return StreamingHazardSize.value_or(
184         !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0);
185   }
186 
187   /// Returns true if the target has NEON and the function at runtime is known
188   /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
189   /// mode, which disables NEON instructions).
190   bool isNeonAvailable() const {
191     return hasNEON() &&
192            (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
193   }
194 
195   /// Returns true if the target has SVE and can use the full range of SVE
196   /// instructions, for example because it knows the function is known not to be
197   /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
198   bool isSVEAvailable() const {
199     return hasSVE() &&
200            (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
201   }
202 
203   /// Returns true if the target has access to the streaming-compatible subset
204   /// of SVE instructions.
205   bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); }
206 
207   /// Returns true if the target has access to either the full range of SVE
208   /// instructions, or the streaming-compatible subset of SVE instructions.
209   bool isSVEorStreamingSVEAvailable() const {
210     return hasSVE() || isStreamingSVEAvailable();
211   }
212 
213   unsigned getMinVectorRegisterBitWidth() const {
214     // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
215     // we don't yet support streaming-compatible codegen support that we trust
216     // is safe for functions that may be executed in streaming-SVE mode.
217     // By returning '0' here, we disable vectorization.
218     if (!isSVEAvailable() && !isNeonAvailable())
219       return 0;
220     return MinVectorRegisterBitWidth;
221   }
222 
223   bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
224   bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
225   unsigned getNumXRegisterReserved() const {
226     BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
227     AllReservedX |= ReserveXRegister;
228     AllReservedX |= ReserveXRegisterForRA;
229     return AllReservedX.count();
230   }
231   bool isLRReservedForRA() const { return ReserveLRForRA; }
232   bool isXRegCustomCalleeSaved(size_t i) const {
233     return CustomCallSavedXRegs[i];
234   }
235   bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
236 
237   /// Return true if the CPU supports any kind of instruction fusion.
238   bool hasFusion() const {
239     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
240            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
241            hasFuseAdrpAdd() || hasFuseLiterals();
242   }
243 
244   unsigned getEpilogueVectorizationMinVF() const {
245     return EpilogueVectorizationMinVF;
246   }
247   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
248   unsigned getVectorInsertExtractBaseCost() const;
249   unsigned getCacheLineSize() const override { return CacheLineSize; }
250   unsigned getScatterOverhead() const { return ScatterOverhead; }
251   unsigned getGatherOverhead() const { return GatherOverhead; }
252   unsigned getPrefetchDistance() const override { return PrefetchDistance; }
253   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
254                                 unsigned NumStridedMemAccesses,
255                                 unsigned NumPrefetches,
256                                 bool HasCall) const override {
257     return MinPrefetchStride;
258   }
259   unsigned getMaxPrefetchIterationsAhead() const override {
260     return MaxPrefetchIterationsAhead;
261   }
262   Align getPrefFunctionAlignment() const {
263     return PrefFunctionAlignment;
264   }
265   Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
266 
267   unsigned getMaxBytesForLoopAlignment() const {
268     return MaxBytesForLoopAlignment;
269   }
270 
271   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
272   unsigned getMinimumJumpTableEntries() const {
273     return MinimumJumpTableEntries;
274   }
275 
276   /// CPU has TBI (top byte of addresses is ignored during HW address
277   /// translation) and OS enables it.
278   bool supportsAddressTopByteIgnored() const;
279 
280   bool isLittleEndian() const { return IsLittle; }
281 
282   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
283   bool isTargetIOS() const { return TargetTriple.isiOS(); }
284   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
285   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
286   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
287   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
288   bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
289 
290   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
291   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
292   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
293 
294   bool isTargetILP32() const {
295     return TargetTriple.isArch32Bit() ||
296            TargetTriple.getEnvironment() == Triple::GNUILP32;
297   }
298 
299   bool useAA() const override;
300 
301   bool addrSinkUsingGEPs() const override {
302     // Keeping GEPs inbounds is important for exploiting AArch64
303     // addressing-modes in ILP32 mode.
304     return useAA() || isTargetILP32();
305   }
306 
307   bool useSmallAddressing() const {
308     switch (TLInfo.getTargetMachine().getCodeModel()) {
309       case CodeModel::Kernel:
310         // Kernel is currently allowed only for Fuchsia targets,
311         // where it is the same as Small for almost all purposes.
312       case CodeModel::Small:
313         return true;
314       default:
315         return false;
316     }
317   }
318 
319   /// ParseSubtargetFeatures - Parses features string setting specified
320   /// subtarget options.  Definition of function is auto generated by tblgen.
321   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
322 
323   /// ClassifyGlobalReference - Find the target operand flags that describe
324   /// how a global value should be referenced for the current subtarget.
325   unsigned ClassifyGlobalReference(const GlobalValue *GV,
326                                    const TargetMachine &TM) const;
327 
328   unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
329                                            const TargetMachine &TM) const;
330 
331   /// This function is design to compatible with the function def in other
332   /// targets and escape build error about the virtual function def in base
333   /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
334   unsigned char
335   classifyGlobalFunctionReference(const GlobalValue *GV) const override {
336     return 0;
337   }
338 
339   void overrideSchedPolicy(MachineSchedPolicy &Policy,
340                            unsigned NumRegionInstrs) const override;
341   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
342                              SDep &Dep,
343                              const TargetSchedModel *SchedModel) const override;
344 
345   bool enableEarlyIfConversion() const override;
346 
347   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
348 
349   bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
350     switch (CC) {
351     case CallingConv::C:
352     case CallingConv::Fast:
353     case CallingConv::Swift:
354     case CallingConv::SwiftTail:
355       return isTargetWindows();
356     case CallingConv::PreserveNone:
357       return IsVarArg && isTargetWindows();
358     case CallingConv::Win64:
359       return true;
360     default:
361       return false;
362     }
363   }
364 
365   /// Return whether FrameLowering should always set the "extended frame
366   /// present" bit in FP, or set it based on a symbol in the runtime.
367   bool swiftAsyncContextIsDynamicallySet() const {
368     // Older OS versions (particularly system unwinders) are confused by the
369     // Swift extended frame, so when building code that might be run on them we
370     // must dynamically query the concurrency library to determine whether
371     // extended frames should be flagged as present.
372     const Triple &TT = getTargetTriple();
373 
374     unsigned Major = TT.getOSVersion().getMajor();
375     switch(TT.getOS()) {
376     default:
377       return false;
378     case Triple::IOS:
379     case Triple::TvOS:
380       return Major < 15;
381     case Triple::WatchOS:
382       return Major < 8;
383     case Triple::MacOSX:
384     case Triple::Darwin:
385       return Major < 12;
386     }
387   }
388 
389   void mirFileLoaded(MachineFunction &MF) const override;
390 
391   // Return the known range for the bit length of SVE data registers. A value
392   // of 0 means nothing is known about that particular limit beyong what's
393   // implied by the architecture.
394   unsigned getMaxSVEVectorSizeInBits() const {
395     assert(isSVEorStreamingSVEAvailable() &&
396            "Tried to get SVE vector length without SVE support!");
397     return MaxSVEVectorSizeInBits;
398   }
399 
400   unsigned getMinSVEVectorSizeInBits() const {
401     assert(isSVEorStreamingSVEAvailable() &&
402            "Tried to get SVE vector length without SVE support!");
403     return MinSVEVectorSizeInBits;
404   }
405 
406   bool useSVEForFixedLengthVectors() const {
407     if (!isSVEorStreamingSVEAvailable())
408       return false;
409 
410     // Prefer NEON unless larger SVE registers are available.
411     return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
412   }
413 
414   bool useSVEForFixedLengthVectors(EVT VT) const {
415     if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
416       return false;
417     return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
418            !isNeonAvailable();
419   }
420 
421   unsigned getVScaleForTuning() const { return VScaleForTuning; }
422 
423   TailFoldingOpts getSVETailFoldingDefaultOpts() const {
424     return DefaultSVETFOpts;
425   }
426 
427   /// Returns true to use the addvl/inc/dec instructions, as opposed to separate
428   /// add + cnt instructions.
429   bool useScalarIncVL() const;
430 
431   const char* getChkStkName() const {
432     if (isWindowsArm64EC())
433       return "#__chkstk_arm64ec";
434     return "__chkstk";
435   }
436 
437   const char* getSecurityCheckCookieName() const {
438     if (isWindowsArm64EC())
439       return "#__security_check_cookie_arm64ec";
440     return "__security_check_cookie";
441   }
442 
443   /// Choose a method of checking LR before performing a tail call.
444   AArch64PAuth::AuthCheckMethod
445   getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
446 
447   /// Compute the integer discriminator for a given BlockAddress constant, if
448   /// blockaddress signing is enabled, or std::nullopt otherwise.
449   /// Blockaddress signing is controlled by the function attribute
450   /// "ptrauth-indirect-gotos" on the parent function.
451   /// Note that this assumes the discriminator is independent of the indirect
452   /// goto branch site itself, i.e., it's the same for all BlockAddresses in
453   /// a function.
454   std::optional<uint16_t>
455   getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const;
456 };
457 } // End llvm namespace
458 
459 #endif
460