xref: /llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (revision 7dbd6cd2946ec3a9b4ad2dfd7ead177baac15bd7)
1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "AMDGPUSubtarget.h"
13 #include "SIDefines.h"
14 #include "llvm/IR/CallingConv.h"
15 #include "llvm/IR/InstrTypes.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/Support/Alignment.h"
18 #include <array>
19 #include <functional>
20 #include <utility>
21 
22 struct amd_kernel_code_t;
23 
24 namespace llvm {
25 
26 struct Align;
27 class Argument;
28 class Function;
29 class GlobalValue;
30 class MCInstrInfo;
31 class MCRegisterClass;
32 class MCRegisterInfo;
33 class MCSubtargetInfo;
34 class StringRef;
35 class Triple;
36 class raw_ostream;
37 
38 namespace AMDGPU {
39 
40 struct AMDGPUMCKernelCodeT;
41 struct IsaVersion;
42 
43 /// Generic target versions emitted by this version of LLVM.
44 ///
45 /// These numbers are incremented every time a codegen breaking change occurs
46 /// within a generic family.
47 namespace GenericVersion {
48 static constexpr unsigned GFX9 = 1;
49 static constexpr unsigned GFX9_4 = 1;
50 static constexpr unsigned GFX10_1 = 1;
51 static constexpr unsigned GFX10_3 = 1;
52 static constexpr unsigned GFX11 = 1;
53 static constexpr unsigned GFX12 = 1;
54 } // namespace GenericVersion
55 
56 enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
57 
58 enum class FPType { None, FP4, FP8 };
59 
60 /// \returns True if \p STI is AMDHSA.
61 bool isHsaAbi(const MCSubtargetInfo &STI);
62 
63 /// \returns Code object version from the IR module flag.
64 unsigned getAMDHSACodeObjectVersion(const Module &M);
65 
66 /// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
67 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
68 
69 /// \returns The default HSA code object version. This should only be used when
70 /// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
71 /// flag or a .amdhsa_code_object_version directive)
72 unsigned getDefaultAMDHSACodeObjectVersion();
73 
74 /// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
75 /// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
76 uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
77 
78 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
79 unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
80 
81 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
82 unsigned getHostcallImplicitArgPosition(unsigned COV);
83 
84 unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
85 unsigned getCompletionActionImplicitArgPosition(unsigned COV);
86 
87 struct GcnBufferFormatInfo {
88   unsigned Format;
89   unsigned BitsPerComp;
90   unsigned NumComponents;
91   unsigned NumFormat;
92   unsigned DataFormat;
93 };
94 
95 struct MAIInstInfo {
96   uint16_t Opcode;
97   bool is_dgemm;
98   bool is_gfx940_xdl;
99 };
100 
101 struct MFMA_F8F6F4_Info {
102   unsigned Opcode;
103   unsigned F8F8Opcode;
104   uint8_t NumRegsSrcA;
105   uint8_t NumRegsSrcB;
106 };
107 
108 struct CvtScaleF32_F32F16ToF8F4_Info {
109   unsigned Opcode;
110 };
111 
112 #define GET_MIMGBaseOpcode_DECL
113 #define GET_MIMGDim_DECL
114 #define GET_MIMGEncoding_DECL
115 #define GET_MIMGLZMapping_DECL
116 #define GET_MIMGMIPMapping_DECL
117 #define GET_MIMGBiASMapping_DECL
118 #define GET_MAIInstInfoTable_DECL
119 #define GET_MAIInstInfoTable_DECL
120 #define GET_isMFMA_F8F6F4Table_DECL
121 #define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
122 #include "AMDGPUGenSearchableTables.inc"
123 
124 namespace IsaInfo {
125 
126 enum {
127   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
128   // doesn't spill SGPRs as much as when 80 is set.
129   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
130   TRAP_NUM_SGPRS = 16
131 };
132 
133 enum class TargetIDSetting {
134   Unsupported,
135   Any,
136   Off,
137   On
138 };
139 
140 class AMDGPUTargetID {
141 private:
142   const MCSubtargetInfo &STI;
143   TargetIDSetting XnackSetting;
144   TargetIDSetting SramEccSetting;
145 
146 public:
147   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
148   ~AMDGPUTargetID() = default;
149 
150   /// \return True if the current xnack setting is not "Unsupported".
151   bool isXnackSupported() const {
152     return XnackSetting != TargetIDSetting::Unsupported;
153   }
154 
155   /// \returns True if the current xnack setting is "On" or "Any".
156   bool isXnackOnOrAny() const {
157     return XnackSetting == TargetIDSetting::On ||
158         XnackSetting == TargetIDSetting::Any;
159   }
160 
161   /// \returns True if current xnack setting is "On" or "Off",
162   /// false otherwise.
163   bool isXnackOnOrOff() const {
164     return getXnackSetting() == TargetIDSetting::On ||
165         getXnackSetting() == TargetIDSetting::Off;
166   }
167 
168   /// \returns The current xnack TargetIDSetting, possible options are
169   /// "Unsupported", "Any", "Off", and "On".
170   TargetIDSetting getXnackSetting() const {
171     return XnackSetting;
172   }
173 
174   /// Sets xnack setting to \p NewXnackSetting.
175   void setXnackSetting(TargetIDSetting NewXnackSetting) {
176     XnackSetting = NewXnackSetting;
177   }
178 
179   /// \return True if the current sramecc setting is not "Unsupported".
180   bool isSramEccSupported() const {
181     return SramEccSetting != TargetIDSetting::Unsupported;
182   }
183 
184   /// \returns True if the current sramecc setting is "On" or "Any".
185   bool isSramEccOnOrAny() const {
186   return SramEccSetting == TargetIDSetting::On ||
187       SramEccSetting == TargetIDSetting::Any;
188   }
189 
190   /// \returns True if current sramecc setting is "On" or "Off",
191   /// false otherwise.
192   bool isSramEccOnOrOff() const {
193     return getSramEccSetting() == TargetIDSetting::On ||
194         getSramEccSetting() == TargetIDSetting::Off;
195   }
196 
197   /// \returns The current sramecc TargetIDSetting, possible options are
198   /// "Unsupported", "Any", "Off", and "On".
199   TargetIDSetting getSramEccSetting() const {
200     return SramEccSetting;
201   }
202 
203   /// Sets sramecc setting to \p NewSramEccSetting.
204   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
205     SramEccSetting = NewSramEccSetting;
206   }
207 
208   void setTargetIDFromFeaturesString(StringRef FS);
209   void setTargetIDFromTargetIDStream(StringRef TargetID);
210 
211   /// \returns String representation of an object.
212   std::string toString() const;
213 };
214 
215 /// \returns Wavefront size for given subtarget \p STI.
216 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
217 
218 /// \returns Local memory size in bytes for given subtarget \p STI.
219 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
220 
221 /// \returns Maximum addressable local memory size in bytes for given subtarget
222 /// \p STI.
223 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
224 
225 /// \returns Number of execution units per compute unit for given subtarget \p
226 /// STI.
227 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
228 
229 /// \returns Maximum number of work groups per compute unit for given subtarget
230 /// \p STI and limited by given \p FlatWorkGroupSize.
231 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
232                                unsigned FlatWorkGroupSize);
233 
234 /// \returns Minimum number of waves per execution unit for given subtarget \p
235 /// STI.
236 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
237 
238 /// \returns Maximum number of waves per execution unit for given subtarget \p
239 /// STI without any kind of limitation.
240 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
241 
242 /// \returns Number of waves per execution unit required to support the given \p
243 /// FlatWorkGroupSize.
244 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
245                                    unsigned FlatWorkGroupSize);
246 
247 /// \returns Minimum flat work group size for given subtarget \p STI.
248 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
249 
250 /// \returns Maximum flat work group size for given subtarget \p STI.
251 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
252 
253 /// \returns Number of waves per work group for given subtarget \p STI and
254 /// \p FlatWorkGroupSize.
255 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
256                               unsigned FlatWorkGroupSize);
257 
258 /// \returns SGPR allocation granularity for given subtarget \p STI.
259 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
260 
261 /// \returns SGPR encoding granularity for given subtarget \p STI.
262 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
263 
264 /// \returns Total number of SGPRs for given subtarget \p STI.
265 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
266 
267 /// \returns Addressable number of SGPRs for given subtarget \p STI.
268 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
269 
270 /// \returns Minimum number of SGPRs that meets the given number of waves per
271 /// execution unit requirement for given subtarget \p STI.
272 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
273 
274 /// \returns Maximum number of SGPRs that meets the given number of waves per
275 /// execution unit requirement for given subtarget \p STI.
276 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
277                         bool Addressable);
278 
279 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
280 /// STI when the given special registers are used.
281 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
282                           bool FlatScrUsed, bool XNACKUsed);
283 
284 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
285 /// STI when the given special registers are used. XNACK is inferred from
286 /// \p STI.
287 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
288                           bool FlatScrUsed);
289 
290 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
291 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
292 /// register counts.
293 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
294 
295 /// \returns VGPR allocation granularity for given subtarget \p STI.
296 ///
297 /// For subtargets which support it, \p EnableWavefrontSize32 should match
298 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
299 unsigned
300 getVGPRAllocGranule(const MCSubtargetInfo *STI,
301                     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
302 
303 /// \returns VGPR encoding granularity for given subtarget \p STI.
304 ///
305 /// For subtargets which support it, \p EnableWavefrontSize32 should match
306 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
307 unsigned getVGPREncodingGranule(
308     const MCSubtargetInfo *STI,
309     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
310 
311 /// \returns Total number of VGPRs for given subtarget \p STI.
312 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
313 
314 /// \returns Addressable number of architectural VGPRs for a given subtarget \p
315 /// STI.
316 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
317 
318 /// \returns Addressable number of VGPRs for given subtarget \p STI.
319 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
320 
321 /// \returns Minimum number of VGPRs that meets given number of waves per
322 /// execution unit requirement for given subtarget \p STI.
323 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
324 
325 /// \returns Maximum number of VGPRs that meets given number of waves per
326 /// execution unit requirement for given subtarget \p STI.
327 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
328 
329 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given
330 /// subtarget \p STI.
331 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
332                                       unsigned NumVGPRs);
333 
334 /// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
335 /// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
336 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
337                                       unsigned MaxWaves,
338                                       unsigned TotalNumVGPRs);
339 
340 /// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
341 /// Gen.
342 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
343                                   AMDGPUSubtarget::Generation Gen);
344 
345 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
346 /// \p NumVGPRs are used. We actually return the number of blocks -1, since
347 /// that's what we encode.
348 ///
349 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
350 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
351 unsigned getEncodedNumVGPRBlocks(
352     const MCSubtargetInfo *STI, unsigned NumVGPRs,
353     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
354 
355 /// \returns Number of VGPR blocks that need to be allocated for the given
356 /// subtarget \p STI when \p NumVGPRs are used.
357 unsigned getAllocatedNumVGPRBlocks(
358     const MCSubtargetInfo *STI, unsigned NumVGPRs,
359     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
360 
361 } // end namespace IsaInfo
362 
363 // Represents a field in an encoded value.
364 template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
365 struct EncodingField {
366   static_assert(HighBit >= LowBit, "Invalid bit range!");
367   static constexpr unsigned Offset = LowBit;
368   static constexpr unsigned Width = HighBit - LowBit + 1;
369 
370   using ValueType = unsigned;
371   static constexpr ValueType Default = D;
372 
373   ValueType Value;
374   constexpr EncodingField(ValueType Value) : Value(Value) {}
375 
376   constexpr uint64_t encode() const { return Value; }
377   static ValueType decode(uint64_t Encoded) { return Encoded; }
378 };
379 
380 // Represents a single bit in an encoded value.
381 template <unsigned Bit, unsigned D = 0>
382 using EncodingBit = EncodingField<Bit, Bit, D>;
383 
384 // A helper for encoding and decoding multiple fields.
385 template <typename... Fields> struct EncodingFields {
386   static constexpr uint64_t encode(Fields... Values) {
387     return ((Values.encode() << Values.Offset) | ...);
388   }
389 
390   static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
391     return {Fields::decode((Encoded >> Fields::Offset) &
392                            maxUIntN(Fields::Width))...};
393   }
394 };
395 
396 LLVM_READONLY
397 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
398 
399 LLVM_READONLY
400 inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
401   return getNamedOperandIdx(Opcode, NamedIdx) != -1;
402 }
403 
404 LLVM_READONLY
405 int getSOPPWithRelaxation(uint16_t Opcode);
406 
407 struct MIMGBaseOpcodeInfo {
408   MIMGBaseOpcode BaseOpcode;
409   bool Store;
410   bool Atomic;
411   bool AtomicX2;
412   bool Sampler;
413   bool Gather4;
414 
415   uint8_t NumExtraArgs;
416   bool Gradients;
417   bool G16;
418   bool Coordinates;
419   bool LodOrClampOrMip;
420   bool HasD16;
421   bool MSAA;
422   bool BVH;
423   bool A16;
424   bool NoReturn;
425 };
426 
427 LLVM_READONLY
428 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
429 
430 LLVM_READONLY
431 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
432 
433 struct MIMGDimInfo {
434   MIMGDim Dim;
435   uint8_t NumCoords;
436   uint8_t NumGradients;
437   bool MSAA;
438   bool DA;
439   uint8_t Encoding;
440   const char *AsmSuffix;
441 };
442 
443 LLVM_READONLY
444 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
445 
446 LLVM_READONLY
447 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
448 
449 LLVM_READONLY
450 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
451 
452 struct MIMGLZMappingInfo {
453   MIMGBaseOpcode L;
454   MIMGBaseOpcode LZ;
455 };
456 
457 struct MIMGMIPMappingInfo {
458   MIMGBaseOpcode MIP;
459   MIMGBaseOpcode NONMIP;
460 };
461 
462 struct MIMGBiasMappingInfo {
463   MIMGBaseOpcode Bias;
464   MIMGBaseOpcode NoBias;
465 };
466 
467 struct MIMGOffsetMappingInfo {
468   MIMGBaseOpcode Offset;
469   MIMGBaseOpcode NoOffset;
470 };
471 
472 struct MIMGG16MappingInfo {
473   MIMGBaseOpcode G;
474   MIMGBaseOpcode G16;
475 };
476 
477 LLVM_READONLY
478 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
479 
480 struct WMMAOpcodeMappingInfo {
481   unsigned Opcode2Addr;
482   unsigned Opcode3Addr;
483 };
484 
485 LLVM_READONLY
486 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
487 
488 LLVM_READONLY
489 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
490 
491 LLVM_READONLY
492 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
493 
494 LLVM_READONLY
495 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
496 
497 LLVM_READONLY
498 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
499                   unsigned VDataDwords, unsigned VAddrDwords);
500 
501 LLVM_READONLY
502 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
503 
504 LLVM_READONLY
505 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
506                            const MIMGDimInfo *Dim, bool IsA16,
507                            bool IsG16Supported);
508 
509 struct MIMGInfo {
510   uint16_t Opcode;
511   uint16_t BaseOpcode;
512   uint8_t MIMGEncoding;
513   uint8_t VDataDwords;
514   uint8_t VAddrDwords;
515   uint8_t VAddrOperands;
516 };
517 
518 LLVM_READONLY
519 const MIMGInfo *getMIMGInfo(unsigned Opc);
520 
521 LLVM_READONLY
522 int getMTBUFBaseOpcode(unsigned Opc);
523 
524 LLVM_READONLY
525 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
526 
527 LLVM_READONLY
528 int getMTBUFElements(unsigned Opc);
529 
530 LLVM_READONLY
531 bool getMTBUFHasVAddr(unsigned Opc);
532 
533 LLVM_READONLY
534 bool getMTBUFHasSrsrc(unsigned Opc);
535 
536 LLVM_READONLY
537 bool getMTBUFHasSoffset(unsigned Opc);
538 
539 LLVM_READONLY
540 int getMUBUFBaseOpcode(unsigned Opc);
541 
542 LLVM_READONLY
543 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
544 
545 LLVM_READONLY
546 int getMUBUFElements(unsigned Opc);
547 
548 LLVM_READONLY
549 bool getMUBUFHasVAddr(unsigned Opc);
550 
551 LLVM_READONLY
552 bool getMUBUFHasSrsrc(unsigned Opc);
553 
554 LLVM_READONLY
555 bool getMUBUFHasSoffset(unsigned Opc);
556 
557 LLVM_READONLY
558 bool getMUBUFIsBufferInv(unsigned Opc);
559 
560 LLVM_READONLY
561 bool getMUBUFTfe(unsigned Opc);
562 
563 LLVM_READONLY
564 bool getSMEMIsBuffer(unsigned Opc);
565 
566 LLVM_READONLY
567 bool getVOP1IsSingle(unsigned Opc);
568 
569 LLVM_READONLY
570 bool getVOP2IsSingle(unsigned Opc);
571 
572 LLVM_READONLY
573 bool getVOP3IsSingle(unsigned Opc);
574 
575 LLVM_READONLY
576 bool isVOPC64DPP(unsigned Opc);
577 
578 LLVM_READONLY
579 bool isVOPCAsmOnly(unsigned Opc);
580 
581 /// Returns true if MAI operation is a double precision GEMM.
582 LLVM_READONLY
583 bool getMAIIsDGEMM(unsigned Opc);
584 
585 LLVM_READONLY
586 bool getMAIIsGFX940XDL(unsigned Opc);
587 
588 struct CanBeVOPD {
589   bool X;
590   bool Y;
591 };
592 
593 /// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
594 LLVM_READONLY
595 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
596 
597 LLVM_READONLY
598 CanBeVOPD getCanBeVOPD(unsigned Opc);
599 
600 LLVM_READNONE
601 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
602 
603 LLVM_READONLY
604 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
605                                                       unsigned BLGP,
606                                                       unsigned F8F8Opcode);
607 
608 LLVM_READONLY
609 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
610                                                   uint8_t NumComponents,
611                                                   uint8_t NumFormat,
612                                                   const MCSubtargetInfo &STI);
613 LLVM_READONLY
614 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
615                                                   const MCSubtargetInfo &STI);
616 
617 LLVM_READONLY
618 int getMCOpcode(uint16_t Opcode, unsigned Gen);
619 
620 LLVM_READONLY
621 unsigned getVOPDOpcode(unsigned Opc);
622 
623 LLVM_READONLY
624 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
625 
626 LLVM_READONLY
627 bool isVOPD(unsigned Opc);
628 
629 LLVM_READNONE
630 bool isMAC(unsigned Opc);
631 
632 LLVM_READNONE
633 bool isPermlane16(unsigned Opc);
634 
635 LLVM_READNONE
636 bool isGenericAtomic(unsigned Opc);
637 
638 LLVM_READNONE
639 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
640 
641 namespace VOPD {
642 
643 enum Component : unsigned {
644   DST = 0,
645   SRC0,
646   SRC1,
647   SRC2,
648 
649   DST_NUM = 1,
650   MAX_SRC_NUM = 3,
651   MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
652 };
653 
654 // LSB mask for VGPR banks per VOPD component operand.
655 // 4 banks result in a mask 3, setting 2 lower bits.
656 constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
657 
658 enum ComponentIndex : unsigned { X = 0, Y = 1 };
659 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
660 constexpr unsigned COMPONENTS_NUM = 2;
661 
662 // Properties of VOPD components.
663 class ComponentProps {
664 private:
665   unsigned SrcOperandsNum = 0;
666   unsigned MandatoryLiteralIdx = ~0u;
667   bool HasSrc2Acc = false;
668 
669 public:
670   ComponentProps() = default;
671   ComponentProps(const MCInstrDesc &OpDesc);
672 
673   // Return the total number of src operands this component has.
674   unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
675 
676   // Return the number of src operands of this component visible to the parser.
677   unsigned getCompParsedSrcOperandsNum() const {
678     return SrcOperandsNum - HasSrc2Acc;
679   }
680 
681   // Return true iif this component has a mandatory literal.
682   bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
683 
684   // If this component has a mandatory literal, return component operand
685   // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
686   unsigned getMandatoryLiteralCompOperandIndex() const {
687     assert(hasMandatoryLiteral());
688     return MandatoryLiteralIdx;
689   }
690 
691   // Return true iif this component has operand
692   // with component index CompSrcIdx and this operand may be a register.
693   bool hasRegSrcOperand(unsigned CompSrcIdx) const {
694     assert(CompSrcIdx < Component::MAX_SRC_NUM);
695     return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
696   }
697 
698   // Return true iif this component has tied src2.
699   bool hasSrc2Acc() const { return HasSrc2Acc; }
700 
701 private:
702   bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
703     assert(CompSrcIdx < Component::MAX_SRC_NUM);
704     return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
705   }
706 };
707 
708 enum ComponentKind : unsigned {
709   SINGLE = 0,  // A single VOP1 or VOP2 instruction which may be used in VOPD.
710   COMPONENT_X, // A VOPD instruction, X component.
711   COMPONENT_Y, // A VOPD instruction, Y component.
712   MAX = COMPONENT_Y
713 };
714 
715 // Interface functions of this class map VOPD component operand indices
716 // to indices of operands in MachineInstr/MCInst or parsed operands array.
717 //
718 // Note that this class operates with 3 kinds of indices:
719 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
720 // - MC operand indices (they refer operands in a MachineInstr/MCInst);
721 // - parsed operand indices (they refer operands in parsed operands array).
722 //
723 // For SINGLE components mapping between these indices is trivial.
724 // But things get more complicated for COMPONENT_X and
725 // COMPONENT_Y because these components share the same
726 // MachineInstr/MCInst and the same parsed operands array.
727 // Below is an example of component operand to parsed operand
728 // mapping for the following instruction:
729 //
730 //   v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
731 //
732 //                          PARSED        COMPONENT         PARSED
733 // COMPONENT               OPERANDS     OPERAND INDEX    OPERAND INDEX
734 // -------------------------------------------------------------------
735 //                     "v_dual_add_f32"                        0
736 // v_dual_add_f32            v255          0 (DST)    -->      1
737 //                           v4            1 (SRC0)   -->      2
738 //                           v5            2 (SRC1)   -->      3
739 //                          "::"                               4
740 //                     "v_dual_mov_b32"                        5
741 // v_dual_mov_b32            v6            0 (DST)    -->      6
742 //                           v1            1 (SRC0)   -->      7
743 // -------------------------------------------------------------------
744 //
745 class ComponentLayout {
746 private:
747   // Regular MachineInstr/MCInst operands are ordered as follows:
748   //   dst, src0 [, other src operands]
749   // VOPD MachineInstr/MCInst operands are ordered as follows:
750   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
751   // Each ComponentKind has operand indices defined below.
752   static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
753   static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */};
754 
755   // Parsed operands of regular instructions are ordered as follows:
756   //   Mnemo dst src0 [vsrc1 ...]
757   // Parsed VOPD operands are ordered as follows:
758   //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
759   //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
760   // Each ComponentKind has operand indices defined below.
761   static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
762                                                 4 /* + OpX.ParsedSrcNum */};
763   static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
764       2, 2, 5 /* + OpX.ParsedSrcNum */};
765 
766 private:
767   const ComponentKind Kind;
768   const ComponentProps PrevComp;
769 
770 public:
771   // Create layout for COMPONENT_X or SINGLE component.
772   ComponentLayout(ComponentKind Kind) : Kind(Kind) {
773     assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
774   }
775 
776   // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
777   ComponentLayout(const ComponentProps &OpXProps)
778       : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {}
779 
780 public:
781   // Return the index of dst operand in MCInst operands.
782   unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
783 
784   // Return the index of the specified src operand in MCInst operands.
785   unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
786     assert(CompSrcIdx < Component::MAX_SRC_NUM);
787     return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
788   }
789 
790   // Return the index of dst operand in the parsed operands array.
791   unsigned getIndexOfDstInParsedOperands() const {
792     return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
793   }
794 
795   // Return the index of the specified src operand in the parsed operands array.
796   unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
797     assert(CompSrcIdx < Component::MAX_SRC_NUM);
798     return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
799   }
800 
801 private:
802   unsigned getPrevCompSrcNum() const {
803     return PrevComp.getCompSrcOperandsNum();
804   }
805   unsigned getPrevCompParsedSrcNum() const {
806     return PrevComp.getCompParsedSrcOperandsNum();
807   }
808 };
809 
810 // Layout and properties of VOPD components.
811 class ComponentInfo : public ComponentLayout, public ComponentProps {
812 public:
813   // Create ComponentInfo for COMPONENT_X or SINGLE component.
814   ComponentInfo(const MCInstrDesc &OpDesc,
815                 ComponentKind Kind = ComponentKind::SINGLE)
816       : ComponentLayout(Kind), ComponentProps(OpDesc) {}
817 
818   // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
819   ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
820       : ComponentLayout(OpXProps), ComponentProps(OpDesc) {}
821 
822   // Map component operand index to parsed operand index.
823   // Return 0 if the specified operand does not exist.
824   unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
825 };
826 
827 // Properties of VOPD instructions.
828 class InstInfo {
829 private:
830   const ComponentInfo CompInfo[COMPONENTS_NUM];
831 
832 public:
833   using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
834 
835   InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
836       : CompInfo{OpX, OpY} {}
837 
838   InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
839       : CompInfo{OprInfoX, OprInfoY} {}
840 
841   const ComponentInfo &operator[](size_t ComponentIdx) const {
842     assert(ComponentIdx < COMPONENTS_NUM);
843     return CompInfo[ComponentIdx];
844   }
845 
846   // Check VOPD operands constraints.
847   // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
848   // for the specified component and MC operand. The callback must return 0
849   // if the operand is not a register or not a VGPR.
850   // If \p SkipSrc is set to true then constraints for source operands are not
851   // checked.
852   bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
853                          bool SkipSrc = false) const {
854     return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
855   }
856 
857   // Check VOPD operands constraints.
858   // Return the index of an invalid component operand, if any.
859   // If \p SkipSrc is set to true then constraints for source operands are not
860   // checked.
861   std::optional<unsigned> getInvalidCompOperandIndex(
862       std::function<unsigned(unsigned, unsigned)> GetRegIdx,
863       bool SkipSrc = false) const;
864 
865 private:
866   RegIndices
867   getRegIndices(unsigned ComponentIdx,
868                 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
869 };
870 
871 } // namespace VOPD
872 
873 LLVM_READONLY
874 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
875 
876 LLVM_READONLY
877 // Get properties of 2 single VOP1/VOP2 instructions
878 // used as components to create a VOPD instruction.
879 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
880 
881 LLVM_READONLY
882 // Get properties of VOPD X and Y components.
883 VOPD::InstInfo
884 getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
885 
886 LLVM_READONLY
887 bool isTrue16Inst(unsigned Opc);
888 
889 LLVM_READONLY
890 FPType getFPDstSelType(unsigned Opc);
891 
892 LLVM_READONLY
893 bool isInvalidSingleUseConsumerInst(unsigned Opc);
894 
895 LLVM_READONLY
896 bool isInvalidSingleUseProducerInst(unsigned Opc);
897 
898 bool isDPMACCInstruction(unsigned Opc);
899 
900 LLVM_READONLY
901 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
902 
903 LLVM_READONLY
904 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
905 
906 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
907                                const MCSubtargetInfo *STI);
908 
909 bool isGroupSegment(const GlobalValue *GV);
910 bool isGlobalSegment(const GlobalValue *GV);
911 bool isReadOnlySegment(const GlobalValue *GV);
912 
913 /// \returns True if constants should be emitted to .text section for given
914 /// target triple \p TT, false otherwise.
915 bool shouldEmitConstantsToTextSection(const Triple &TT);
916 
917 /// \returns Integer value requested using \p F's \p Name attribute.
918 ///
919 /// \returns \p Default if attribute is not present.
920 ///
921 /// \returns \p Default and emits error if requested value cannot be converted
922 /// to integer.
923 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
924 
925 /// \returns A pair of integer values requested using \p F's \p Name attribute
926 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
927 /// is false).
928 ///
929 /// \returns \p Default if attribute is not present.
930 ///
931 /// \returns \p Default and emits error if one of the requested values cannot be
932 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
933 /// not present.
934 std::pair<unsigned, unsigned>
935 getIntegerPairAttribute(const Function &F, StringRef Name,
936                         std::pair<unsigned, unsigned> Default,
937                         bool OnlyFirstRequired = false);
938 
939 /// \returns A pair of integer values requested using \p F's \p Name attribute
940 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
941 /// is false).
942 ///
943 /// \returns \p std::nullopt if attribute is not present.
944 ///
945 /// \returns \p std::nullopt and emits error if one of the requested values
946 /// cannot be converted to integer, or \p OnlyFirstRequired is false and
947 /// "second" value is not present.
948 std::optional<std::pair<unsigned, std::optional<unsigned>>>
949 getIntegerPairAttribute(const Function &F, StringRef Name,
950                         bool OnlyFirstRequired = false);
951 
952 /// \returns Generate a vector of integer values requested using \p F's \p Name
953 /// attribute.
954 ///
955 /// \returns true if exactly Size (>2) number of integers are found in the
956 /// attribute.
957 ///
958 /// \returns false if any error occurs.
959 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
960                                              unsigned Size,
961                                              unsigned DefaultVal = 0);
962 
963 /// Represents the counter values to wait for in an s_waitcnt instruction.
964 ///
965 /// Large values (including the maximum possible integer) can be used to
966 /// represent "don't care" waits.
967 struct Waitcnt {
968   unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
969   unsigned ExpCnt = ~0u;
970   unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
971   unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
972   unsigned SampleCnt = ~0u; // gfx12+ only.
973   unsigned BvhCnt = ~0u;    // gfx12+ only.
974   unsigned KmCnt = ~0u;     // gfx12+ only.
975 
976   Waitcnt() = default;
977   // Pre-gfx12 constructor.
978   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
979       : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt),
980         SampleCnt(~0u), BvhCnt(~0u), KmCnt(~0u) {}
981 
982   // gfx12+ constructor.
983   Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
984           unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt)
985       : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
986         SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
987 
988   bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
989 
990   bool hasWaitExceptStoreCnt() const {
991     return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
992            SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u;
993   }
994 
995   bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
996 
997   Waitcnt combined(const Waitcnt &Other) const {
998     // Does the right thing provided self and Other are either both pre-gfx12
999     // or both gfx12+.
1000     return Waitcnt(
1001         std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
1002         std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
1003         std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
1004         std::min(KmCnt, Other.KmCnt));
1005   }
1006 };
1007 
1008 // The following methods are only meaningful on targets that support
1009 // S_WAITCNT.
1010 
1011 /// \returns Vmcnt bit mask for given isa \p Version.
1012 unsigned getVmcntBitMask(const IsaVersion &Version);
1013 
1014 /// \returns Expcnt bit mask for given isa \p Version.
1015 unsigned getExpcntBitMask(const IsaVersion &Version);
1016 
1017 /// \returns Lgkmcnt bit mask for given isa \p Version.
1018 unsigned getLgkmcntBitMask(const IsaVersion &Version);
1019 
1020 /// \returns Waitcnt bit mask for given isa \p Version.
1021 unsigned getWaitcntBitMask(const IsaVersion &Version);
1022 
1023 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1024 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1025 
1026 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1027 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1028 
1029 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1030 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1031 
1032 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1033 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1034 /// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1035 /// which needs it is deprecated
1036 ///
1037 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1038 ///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
1039 ///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
1040 ///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11)
1041 ///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
1042 ///     \p Expcnt = \p Waitcnt[2:0]       (gfx11)
1043 ///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
1044 ///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
1045 ///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11)
1046 ///
1047 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1048                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
1049 
1050 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1051 
1052 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1053 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1054                      unsigned Vmcnt);
1055 
1056 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1057 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1058                       unsigned Expcnt);
1059 
1060 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1061 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1062                        unsigned Lgkmcnt);
1063 
1064 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1065 /// \p Version. Should not be used on gfx12+, the instruction which needs
1066 /// it is deprecated
1067 ///
1068 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1069 ///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
1070 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
1071 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
1072 ///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
1073 ///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11)
1074 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
1075 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
1076 ///     Waitcnt[15:10] = \p Vmcnt       (gfx11)
1077 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
1078 ///
1079 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1080 /// isa \p Version.
1081 ///
1082 unsigned encodeWaitcnt(const IsaVersion &Version,
1083                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
1084 
1085 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1086 
1087 // The following methods are only meaningful on targets that support
1088 // S_WAIT_*CNT, introduced with gfx12.
1089 
1090 /// \returns Loadcnt bit mask for given isa \p Version.
1091 /// Returns 0 for versions that do not support LOADcnt
1092 unsigned getLoadcntBitMask(const IsaVersion &Version);
1093 
1094 /// \returns Samplecnt bit mask for given isa \p Version.
1095 /// Returns 0 for versions that do not support SAMPLEcnt
1096 unsigned getSamplecntBitMask(const IsaVersion &Version);
1097 
1098 /// \returns Bvhcnt bit mask for given isa \p Version.
1099 /// Returns 0 for versions that do not support BVHcnt
1100 unsigned getBvhcntBitMask(const IsaVersion &Version);
1101 
1102 /// \returns Dscnt bit mask for given isa \p Version.
1103 /// Returns 0 for versions that do not support DScnt
1104 unsigned getDscntBitMask(const IsaVersion &Version);
1105 
1106 /// \returns Dscnt bit mask for given isa \p Version.
1107 /// Returns 0 for versions that do not support KMcnt
1108 unsigned getKmcntBitMask(const IsaVersion &Version);
1109 
1110 /// \return STOREcnt or VScnt bit mask for given isa \p Version.
1111 /// returns 0 for versions that do not support STOREcnt or VScnt.
1112 /// STOREcnt and VScnt are the same counter, the name used
1113 /// depends on the ISA version.
1114 unsigned getStorecntBitMask(const IsaVersion &Version);
1115 
1116 // The following are only meaningful on targets that support
1117 // S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1118 
1119 /// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1120 /// isa \p Version.
1121 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1122 
1123 /// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1124 /// isa \p Version.
1125 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1126 
1127 /// \returns \p Loadcnt and \p Dscnt components of \p Decoded  encoded as an
1128 /// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1129 /// \p Version.
1130 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1131 
1132 /// \returns \p Storecnt and \p Dscnt components of \p Decoded  encoded as an
1133 /// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1134 /// \p Version.
1135 unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1136 
1137 namespace Hwreg {
1138 
1139 using HwregId = EncodingField<5, 0>;
1140 using HwregOffset = EncodingField<10, 6>;
1141 
1142 struct HwregSize : EncodingField<15, 11, 32> {
1143   using EncodingField::EncodingField;
1144   constexpr uint64_t encode() const { return Value - 1; }
1145   static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1146 };
1147 
1148 using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1149 
1150 } // namespace Hwreg
1151 
1152 namespace DepCtr {
1153 
1154 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1155 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1156                  const MCSubtargetInfo &STI);
1157 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1158                               const MCSubtargetInfo &STI);
1159 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1160                   bool &IsDefault, const MCSubtargetInfo &STI);
1161 
1162 /// \returns Decoded VaVdst from given immediate \p Encoded.
1163 unsigned decodeFieldVaVdst(unsigned Encoded);
1164 
1165 /// \returns Decoded VmVsrc from given immediate \p Encoded.
1166 unsigned decodeFieldVmVsrc(unsigned Encoded);
1167 
1168 /// \returns Decoded SaSdst from given immediate \p Encoded.
1169 unsigned decodeFieldSaSdst(unsigned Encoded);
1170 
1171 /// \returns \p VmVsrc as an encoded Depctr immediate.
1172 unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1173 
1174 /// \returns \p Encoded combined with encoded \p VmVsrc.
1175 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1176 
1177 /// \returns \p VaVdst as an encoded Depctr immediate.
1178 unsigned encodeFieldVaVdst(unsigned VaVdst);
1179 
1180 /// \returns \p Encoded combined with encoded \p VaVdst.
1181 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1182 
1183 /// \returns \p SaSdst as an encoded Depctr immediate.
1184 unsigned encodeFieldSaSdst(unsigned SaSdst);
1185 
1186 /// \returns \p Encoded combined with encoded \p SaSdst.
1187 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1188 
1189 } // namespace DepCtr
1190 
1191 namespace Exp {
1192 
1193 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1194 
1195 LLVM_READONLY
1196 unsigned getTgtId(const StringRef Name);
1197 
1198 LLVM_READNONE
1199 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1200 
1201 } // namespace Exp
1202 
1203 namespace MTBUFFormat {
1204 
1205 LLVM_READNONE
1206 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1207 
1208 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1209 
1210 int64_t getDfmt(const StringRef Name);
1211 
1212 StringRef getDfmtName(unsigned Id);
1213 
1214 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1215 
1216 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1217 
1218 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1219 
1220 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1221 
1222 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1223 
1224 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1225 
1226 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1227 
1228 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1229                              const MCSubtargetInfo &STI);
1230 
1231 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1232 
1233 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1234 
1235 } // namespace MTBUFFormat
1236 
1237 namespace SendMsg {
1238 
1239 LLVM_READNONE
1240 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1241 
1242 LLVM_READNONE
1243 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1244                   bool Strict = true);
1245 
1246 LLVM_READNONE
1247 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1248                       const MCSubtargetInfo &STI, bool Strict = true);
1249 
1250 LLVM_READNONE
1251 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1252 
1253 LLVM_READNONE
1254 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1255 
1256 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1257                uint16_t &StreamId, const MCSubtargetInfo &STI);
1258 
1259 LLVM_READNONE
1260 uint64_t encodeMsg(uint64_t MsgId,
1261                    uint64_t OpId,
1262                    uint64_t StreamId);
1263 
1264 } // namespace SendMsg
1265 
1266 
1267 unsigned getInitialPSInputAddr(const Function &F);
1268 
1269 bool getHasColorExport(const Function &F);
1270 
1271 bool getHasDepthExport(const Function &F);
1272 
1273 LLVM_READNONE
1274 bool isShader(CallingConv::ID CC);
1275 
1276 LLVM_READNONE
1277 bool isGraphics(CallingConv::ID CC);
1278 
1279 LLVM_READNONE
1280 bool isCompute(CallingConv::ID CC);
1281 
1282 LLVM_READNONE
1283 bool isEntryFunctionCC(CallingConv::ID CC);
1284 
1285 // These functions are considered entrypoints into the current module, i.e. they
1286 // are allowed to be called from outside the current module. This is different
1287 // from isEntryFunctionCC, which is only true for functions that are entered by
1288 // the hardware. Module entry points include all entry functions but also
1289 // include functions that can be called from other functions inside or outside
1290 // the current module. Module entry functions are allowed to allocate LDS.
1291 LLVM_READNONE
1292 bool isModuleEntryFunctionCC(CallingConv::ID CC);
1293 
1294 LLVM_READNONE
1295 bool isChainCC(CallingConv::ID CC);
1296 
1297 bool isKernelCC(const Function *Func);
1298 
1299 // FIXME: Remove this when calling conventions cleaned up
1300 LLVM_READNONE
1301 inline bool isKernel(CallingConv::ID CC) {
1302   switch (CC) {
1303   case CallingConv::AMDGPU_KERNEL:
1304   case CallingConv::SPIR_KERNEL:
1305     return true;
1306   default:
1307     return false;
1308   }
1309 }
1310 
1311 bool hasXNACK(const MCSubtargetInfo &STI);
1312 bool hasSRAMECC(const MCSubtargetInfo &STI);
1313 bool hasMIMG_R128(const MCSubtargetInfo &STI);
1314 bool hasA16(const MCSubtargetInfo &STI);
1315 bool hasG16(const MCSubtargetInfo &STI);
1316 bool hasPackedD16(const MCSubtargetInfo &STI);
1317 bool hasGDS(const MCSubtargetInfo &STI);
1318 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1319 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1320 
1321 bool isSI(const MCSubtargetInfo &STI);
1322 bool isCI(const MCSubtargetInfo &STI);
1323 bool isVI(const MCSubtargetInfo &STI);
1324 bool isGFX9(const MCSubtargetInfo &STI);
1325 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1326 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1327 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1328 bool isGFX8Plus(const MCSubtargetInfo &STI);
1329 bool isGFX9Plus(const MCSubtargetInfo &STI);
1330 bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1331 bool isGFX10(const MCSubtargetInfo &STI);
1332 bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1333 bool isGFX10Plus(const MCSubtargetInfo &STI);
1334 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1335 bool isGFX10Before1030(const MCSubtargetInfo &STI);
1336 bool isGFX11(const MCSubtargetInfo &STI);
1337 bool isGFX11Plus(const MCSubtargetInfo &STI);
1338 bool isGFX12(const MCSubtargetInfo &STI);
1339 bool isGFX12Plus(const MCSubtargetInfo &STI);
1340 bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1341 bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1342 bool isGCN3Encoding(const MCSubtargetInfo &STI);
1343 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1344 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1345 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1346 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1347 bool isGFX90A(const MCSubtargetInfo &STI);
1348 bool isGFX940(const MCSubtargetInfo &STI);
1349 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1350 bool hasMAIInsts(const MCSubtargetInfo &STI);
1351 bool hasVOPD(const MCSubtargetInfo &STI);
1352 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1353 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1354 unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1355 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1356 
1357 /// Is Reg - scalar register
1358 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1359 
1360 /// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1361 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1362 
1363 /// If \p Reg is a pseudo reg, return the correct hardware register given
1364 /// \p STI otherwise return \p Reg.
1365 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1366 
1367 /// Convert hardware register \p Reg to a pseudo register
1368 LLVM_READNONE
1369 MCRegister mc2PseudoReg(MCRegister Reg);
1370 
1371 LLVM_READNONE
1372 bool isInlineValue(unsigned Reg);
1373 
1374 /// Is this an AMDGPU specific source operand? These include registers,
1375 /// inline constants, literals and mandatory literals (KImm).
1376 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1377 
1378 /// Is this a KImm operand?
1379 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1380 
1381 /// Is this floating-point operand?
1382 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1383 
1384 /// Does this operand support only inlinable literals?
1385 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1386 
1387 /// Get the size in bits of a register from the register class \p RC.
1388 unsigned getRegBitWidth(unsigned RCID);
1389 
1390 /// Get the size in bits of a register from the register class \p RC.
1391 unsigned getRegBitWidth(const MCRegisterClass &RC);
1392 
1393 /// Get size of register operand
1394 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1395                            unsigned OpNo);
1396 
1397 LLVM_READNONE
1398 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1399   switch (OpInfo.OperandType) {
1400   case AMDGPU::OPERAND_REG_IMM_INT32:
1401   case AMDGPU::OPERAND_REG_IMM_FP32:
1402   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1403   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1404   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1405   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1406   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1407   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1408   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1409   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1410   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1411   case AMDGPU::OPERAND_KIMM32:
1412   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1413   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1414     return 4;
1415 
1416   case AMDGPU::OPERAND_REG_IMM_INT64:
1417   case AMDGPU::OPERAND_REG_IMM_FP64:
1418   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1419   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1420   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1421     return 8;
1422 
1423   case AMDGPU::OPERAND_REG_IMM_INT16:
1424   case AMDGPU::OPERAND_REG_IMM_BF16:
1425   case AMDGPU::OPERAND_REG_IMM_FP16:
1426   case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1427   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1428   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1429   case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1430   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1431   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1432   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1433   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1434   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1435   case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1436   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1437   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1438   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1439   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1440   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1441   case AMDGPU::OPERAND_REG_IMM_V2BF16:
1442   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1443     return 2;
1444 
1445   default:
1446     llvm_unreachable("unhandled operand type");
1447   }
1448 }
1449 
1450 LLVM_READNONE
1451 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1452   return getOperandSize(Desc.operands()[OpNo]);
1453 }
1454 
1455 /// Is this literal inlinable, and not one of the values intended for floating
1456 /// point values.
1457 LLVM_READNONE
1458 inline bool isInlinableIntLiteral(int64_t Literal) {
1459   return Literal >= -16 && Literal <= 64;
1460 }
1461 
1462 /// Is this literal inlinable
1463 LLVM_READNONE
1464 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1465 
1466 LLVM_READNONE
1467 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1468 
1469 LLVM_READNONE
1470 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1471 
1472 LLVM_READNONE
1473 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1474 
1475 LLVM_READNONE
1476 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1477 
1478 LLVM_READNONE
1479 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1480 
1481 LLVM_READNONE
1482 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1483 
1484 LLVM_READNONE
1485 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1486 
1487 LLVM_READNONE
1488 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1489 
1490 LLVM_READNONE
1491 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1492 
1493 LLVM_READNONE
1494 bool isInlinableLiteralV2I16(uint32_t Literal);
1495 
1496 LLVM_READNONE
1497 bool isInlinableLiteralV2BF16(uint32_t Literal);
1498 
1499 LLVM_READNONE
1500 bool isInlinableLiteralV2F16(uint32_t Literal);
1501 
1502 LLVM_READNONE
1503 bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1504 
1505 bool isArgPassedInSGPR(const Argument *Arg);
1506 
1507 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1508 
1509 LLVM_READONLY
1510 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1511                                       int64_t EncodedOffset);
1512 
1513 LLVM_READONLY
1514 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1515                                     int64_t EncodedOffset,
1516                                     bool IsBuffer);
1517 
1518 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1519 /// offsets.
1520 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1521 
1522 /// \returns The encoding that will be used for \p ByteOffset in the
1523 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1524 /// S_LOAD instructions have a signed offset, on other subtargets it is
1525 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1526 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1527                                             int64_t ByteOffset, bool IsBuffer,
1528                                             bool HasSOffset = false);
1529 
1530 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1531 /// instruction. This is only useful on CI.s
1532 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1533                                                      int64_t ByteOffset);
1534 
1535 /// For pre-GFX12 FLAT instructions the offset must be positive;
1536 /// MSB is ignored and forced to zero.
1537 ///
1538 /// \return The number of bits available for the signed offset field in flat
1539 /// instructions. Note that some forms of the instruction disallow negative
1540 /// offsets.
1541 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1542 
1543 /// \returns true if this offset is small enough to fit in the SMRD
1544 /// offset field.  \p ByteOffset should be the offset in bytes and
1545 /// not the encoded offset.
1546 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1547 
1548 LLVM_READNONE
1549 inline bool isLegalDPALU_DPPControl(unsigned DC) {
1550   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1551 }
1552 
1553 /// \returns true if an instruction may have a 64-bit VGPR operand.
1554 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1555 
1556 /// \returns true if an instruction is a DP ALU DPP.
1557 bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1558 
1559 /// \returns true if the intrinsic is divergent
1560 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1561 
1562 /// \returns true if the intrinsic is uniform
1563 bool isIntrinsicAlwaysUniform(unsigned IntrID);
1564 
1565 /// \returns lds block size in terms of dwords. \p
1566 /// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1567 /// must be defined in terms of bytes.
1568 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1569 
1570 } // end namespace AMDGPU
1571 
1572 raw_ostream &operator<<(raw_ostream &OS,
1573                         const AMDGPU::IsaInfo::TargetIDSetting S);
1574 
1575 } // end namespace llvm
1576 
1577 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1578