xref: /llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (revision e8811ad3cc2a840dcacde2f7ddea599d82f3b4e3)
1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDKernelCodeTUtils.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/IR/Attributes.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/IR/IntrinsicsR600.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/TargetParser/TargetParser.h"
29 #include <optional>
30 
31 #define GET_INSTRINFO_NAMED_OPS
32 #define GET_INSTRMAP_INFO
33 #include "AMDGPUGenInstrInfo.inc"
34 
35 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
36     "amdhsa-code-object-version", llvm::cl::Hidden,
37     llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
38     llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
39                    "or asm directive still take priority if present)"));
40 
41 namespace {
42 
43 /// \returns Bit mask for given bit \p Shift and bit \p Width.
44 unsigned getBitMask(unsigned Shift, unsigned Width) {
45   return ((1 << Width) - 1) << Shift;
46 }
47 
48 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49 ///
50 /// \returns Packed \p Dst.
51 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
52   unsigned Mask = getBitMask(Shift, Width);
53   return ((Src << Shift) & Mask) | (Dst & ~Mask);
54 }
55 
56 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Unpacked bits.
59 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
60   return (Src & getBitMask(Shift, Width)) >> Shift;
61 }
62 
63 /// \returns Vmcnt bit shift (lower bits).
64 unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65   return VersionMajor >= 11 ? 10 : 0;
66 }
67 
68 /// \returns Vmcnt bit width (lower bits).
69 unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70   return VersionMajor >= 11 ? 6 : 4;
71 }
72 
73 /// \returns Expcnt bit shift.
74 unsigned getExpcntBitShift(unsigned VersionMajor) {
75   return VersionMajor >= 11 ? 0 : 4;
76 }
77 
78 /// \returns Expcnt bit width.
79 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
80 
81 /// \returns Lgkmcnt bit shift.
82 unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83   return VersionMajor >= 11 ? 4 : 8;
84 }
85 
86 /// \returns Lgkmcnt bit width.
87 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88   return VersionMajor >= 10 ? 6 : 4;
89 }
90 
91 /// \returns Vmcnt bit shift (higher bits).
92 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
93 
94 /// \returns Vmcnt bit width (higher bits).
95 unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96   return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97 }
98 
99 /// \returns Loadcnt bit width
100 unsigned getLoadcntBitWidth(unsigned VersionMajor) {
101   return VersionMajor >= 12 ? 6 : 0;
102 }
103 
104 /// \returns Samplecnt bit width.
105 unsigned getSamplecntBitWidth(unsigned VersionMajor) {
106   return VersionMajor >= 12 ? 6 : 0;
107 }
108 
109 /// \returns Bvhcnt bit width.
110 unsigned getBvhcntBitWidth(unsigned VersionMajor) {
111   return VersionMajor >= 12 ? 3 : 0;
112 }
113 
114 /// \returns Dscnt bit width.
115 unsigned getDscntBitWidth(unsigned VersionMajor) {
116   return VersionMajor >= 12 ? 6 : 0;
117 }
118 
119 /// \returns Dscnt bit shift in combined S_WAIT instructions.
120 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
121 
122 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
123 unsigned getStorecntBitWidth(unsigned VersionMajor) {
124   return VersionMajor >= 10 ? 6 : 0;
125 }
126 
127 /// \returns Kmcnt bit width.
128 unsigned getKmcntBitWidth(unsigned VersionMajor) {
129   return VersionMajor >= 12 ? 5 : 0;
130 }
131 
132 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
133 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
134   return VersionMajor >= 12 ? 8 : 0;
135 }
136 
137 /// \returns VmVsrc bit width
138 inline unsigned getVmVsrcBitWidth() { return 3; }
139 
140 /// \returns VmVsrc bit shift
141 inline unsigned getVmVsrcBitShift() { return 2; }
142 
143 /// \returns VaVdst bit width
144 inline unsigned getVaVdstBitWidth() { return 4; }
145 
146 /// \returns VaVdst bit shift
147 inline unsigned getVaVdstBitShift() { return 12; }
148 
149 /// \returns SaSdst bit width
150 inline unsigned getSaSdstBitWidth() { return 1; }
151 
152 /// \returns SaSdst bit shift
153 inline unsigned getSaSdstBitShift() { return 0; }
154 
155 } // end anonymous namespace
156 
157 namespace llvm {
158 
159 namespace AMDGPU {
160 
161 /// \returns true if the target supports signed immediate offset for SMRD
162 /// instructions.
163 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
164   return isGFX9Plus(ST);
165 }
166 
167 /// \returns True if \p STI is AMDHSA.
168 bool isHsaAbi(const MCSubtargetInfo &STI) {
169   return STI.getTargetTriple().getOS() == Triple::AMDHSA;
170 }
171 
172 unsigned getAMDHSACodeObjectVersion(const Module &M) {
173   if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
174           M.getModuleFlag("amdhsa_code_object_version"))) {
175     return (unsigned)Ver->getZExtValue() / 100;
176   }
177 
178   return getDefaultAMDHSACodeObjectVersion();
179 }
180 
181 unsigned getDefaultAMDHSACodeObjectVersion() {
182   return DefaultAMDHSACodeObjectVersion;
183 }
184 
185 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
186   switch (ABIVersion) {
187   case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
188     return 4;
189   case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
190     return 5;
191   case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
192     return 6;
193   default:
194     return getDefaultAMDHSACodeObjectVersion();
195   }
196 }
197 
198 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
199   if (T.getOS() != Triple::AMDHSA)
200     return 0;
201 
202   switch (CodeObjectVersion) {
203   case 4:
204     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
205   case 5:
206     return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
207   case 6:
208     return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
209   default:
210     report_fatal_error("Unsupported AMDHSA Code Object Version " +
211                        Twine(CodeObjectVersion));
212   }
213 }
214 
215 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
216   switch (CodeObjectVersion) {
217   case AMDHSA_COV4:
218     return 48;
219   case AMDHSA_COV5:
220   case AMDHSA_COV6:
221   default:
222     return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
223   }
224 }
225 
226 
227 // FIXME: All such magic numbers about the ABI should be in a
228 // central TD file.
229 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
230   switch (CodeObjectVersion) {
231   case AMDHSA_COV4:
232     return 24;
233   case AMDHSA_COV5:
234   case AMDHSA_COV6:
235   default:
236     return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
237   }
238 }
239 
240 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
241   switch (CodeObjectVersion) {
242   case AMDHSA_COV4:
243     return 32;
244   case AMDHSA_COV5:
245   case AMDHSA_COV6:
246   default:
247     return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
248   }
249 }
250 
251 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
252   switch (CodeObjectVersion) {
253   case AMDHSA_COV4:
254     return 40;
255   case AMDHSA_COV5:
256   case AMDHSA_COV6:
257   default:
258     return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
259   }
260 }
261 
262 #define GET_MIMGBaseOpcodesTable_IMPL
263 #define GET_MIMGDimInfoTable_IMPL
264 #define GET_MIMGInfoTable_IMPL
265 #define GET_MIMGLZMappingTable_IMPL
266 #define GET_MIMGMIPMappingTable_IMPL
267 #define GET_MIMGBiasMappingTable_IMPL
268 #define GET_MIMGOffsetMappingTable_IMPL
269 #define GET_MIMGG16MappingTable_IMPL
270 #define GET_MAIInstInfoTable_IMPL
271 #include "AMDGPUGenSearchableTables.inc"
272 
273 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
274                   unsigned VDataDwords, unsigned VAddrDwords) {
275   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
276                                              VDataDwords, VAddrDwords);
277   return Info ? Info->Opcode : -1;
278 }
279 
280 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
281   const MIMGInfo *Info = getMIMGInfo(Opc);
282   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
283 }
284 
285 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
286   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
287   const MIMGInfo *NewInfo =
288       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
289                           NewChannels, OrigInfo->VAddrDwords);
290   return NewInfo ? NewInfo->Opcode : -1;
291 }
292 
293 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
294                            const MIMGDimInfo *Dim, bool IsA16,
295                            bool IsG16Supported) {
296   unsigned AddrWords = BaseOpcode->NumExtraArgs;
297   unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
298                             (BaseOpcode->LodOrClampOrMip ? 1 : 0);
299   if (IsA16)
300     AddrWords += divideCeil(AddrComponents, 2);
301   else
302     AddrWords += AddrComponents;
303 
304   // Note: For subtargets that support A16 but not G16, enabling A16 also
305   // enables 16 bit gradients.
306   // For subtargets that support A16 (operand) and G16 (done with a different
307   // instruction encoding), they are independent.
308 
309   if (BaseOpcode->Gradients) {
310     if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
311       // There are two gradients per coordinate, we pack them separately.
312       // For the 3d case,
313       // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
314       AddrWords += alignTo<2>(Dim->NumGradients / 2);
315     else
316       AddrWords += Dim->NumGradients;
317   }
318   return AddrWords;
319 }
320 
321 struct MUBUFInfo {
322   uint16_t Opcode;
323   uint16_t BaseOpcode;
324   uint8_t elements;
325   bool has_vaddr;
326   bool has_srsrc;
327   bool has_soffset;
328   bool IsBufferInv;
329   bool tfe;
330 };
331 
332 struct MTBUFInfo {
333   uint16_t Opcode;
334   uint16_t BaseOpcode;
335   uint8_t elements;
336   bool has_vaddr;
337   bool has_srsrc;
338   bool has_soffset;
339 };
340 
341 struct SMInfo {
342   uint16_t Opcode;
343   bool IsBuffer;
344 };
345 
346 struct VOPInfo {
347   uint16_t Opcode;
348   bool IsSingle;
349 };
350 
351 struct VOPC64DPPInfo {
352   uint16_t Opcode;
353 };
354 
355 struct VOPCDPPAsmOnlyInfo {
356   uint16_t Opcode;
357 };
358 
359 struct VOP3CDPPAsmOnlyInfo {
360   uint16_t Opcode;
361 };
362 
363 struct VOPDComponentInfo {
364   uint16_t BaseVOP;
365   uint16_t VOPDOp;
366   bool CanBeVOPDX;
367 };
368 
369 struct VOPDInfo {
370   uint16_t Opcode;
371   uint16_t OpX;
372   uint16_t OpY;
373   uint16_t Subtarget;
374 };
375 
376 struct VOPTrue16Info {
377   uint16_t Opcode;
378   bool IsTrue16;
379 };
380 
381 #define GET_FP4FP8DstByteSelTable_DECL
382 #define GET_FP4FP8DstByteSelTable_IMPL
383 
384 struct DPMACCInstructionInfo {
385   uint16_t Opcode;
386   bool IsDPMACCInstruction;
387 };
388 
389 struct FP4FP8DstByteSelInfo {
390   uint16_t Opcode;
391   bool HasFP8DstByteSel;
392   bool HasFP4DstByteSel;
393 };
394 
395 #define GET_MTBUFInfoTable_DECL
396 #define GET_MTBUFInfoTable_IMPL
397 #define GET_MUBUFInfoTable_DECL
398 #define GET_MUBUFInfoTable_IMPL
399 #define GET_SMInfoTable_DECL
400 #define GET_SMInfoTable_IMPL
401 #define GET_VOP1InfoTable_DECL
402 #define GET_VOP1InfoTable_IMPL
403 #define GET_VOP2InfoTable_DECL
404 #define GET_VOP2InfoTable_IMPL
405 #define GET_VOP3InfoTable_DECL
406 #define GET_VOP3InfoTable_IMPL
407 #define GET_VOPC64DPPTable_DECL
408 #define GET_VOPC64DPPTable_IMPL
409 #define GET_VOPC64DPP8Table_DECL
410 #define GET_VOPC64DPP8Table_IMPL
411 #define GET_VOPCAsmOnlyInfoTable_DECL
412 #define GET_VOPCAsmOnlyInfoTable_IMPL
413 #define GET_VOP3CAsmOnlyInfoTable_DECL
414 #define GET_VOP3CAsmOnlyInfoTable_IMPL
415 #define GET_VOPDComponentTable_DECL
416 #define GET_VOPDComponentTable_IMPL
417 #define GET_VOPDPairs_DECL
418 #define GET_VOPDPairs_IMPL
419 #define GET_VOPTrue16Table_DECL
420 #define GET_VOPTrue16Table_IMPL
421 #define GET_WMMAOpcode2AddrMappingTable_DECL
422 #define GET_WMMAOpcode2AddrMappingTable_IMPL
423 #define GET_WMMAOpcode3AddrMappingTable_DECL
424 #define GET_WMMAOpcode3AddrMappingTable_IMPL
425 #define GET_getMFMA_F8F6F4_WithSize_DECL
426 #define GET_getMFMA_F8F6F4_WithSize_IMPL
427 #define GET_isMFMA_F8F6F4Table_IMPL
428 #define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
429 
430 #include "AMDGPUGenSearchableTables.inc"
431 
432 int getMTBUFBaseOpcode(unsigned Opc) {
433   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
434   return Info ? Info->BaseOpcode : -1;
435 }
436 
437 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
438   const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
439   return Info ? Info->Opcode : -1;
440 }
441 
442 int getMTBUFElements(unsigned Opc) {
443   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
444   return Info ? Info->elements : 0;
445 }
446 
447 bool getMTBUFHasVAddr(unsigned Opc) {
448   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
449   return Info ? Info->has_vaddr : false;
450 }
451 
452 bool getMTBUFHasSrsrc(unsigned Opc) {
453   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
454   return Info ? Info->has_srsrc : false;
455 }
456 
457 bool getMTBUFHasSoffset(unsigned Opc) {
458   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
459   return Info ? Info->has_soffset : false;
460 }
461 
462 int getMUBUFBaseOpcode(unsigned Opc) {
463   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
464   return Info ? Info->BaseOpcode : -1;
465 }
466 
467 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
468   const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
469   return Info ? Info->Opcode : -1;
470 }
471 
472 int getMUBUFElements(unsigned Opc) {
473   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
474   return Info ? Info->elements : 0;
475 }
476 
477 bool getMUBUFHasVAddr(unsigned Opc) {
478   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
479   return Info ? Info->has_vaddr : false;
480 }
481 
482 bool getMUBUFHasSrsrc(unsigned Opc) {
483   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
484   return Info ? Info->has_srsrc : false;
485 }
486 
487 bool getMUBUFHasSoffset(unsigned Opc) {
488   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
489   return Info ? Info->has_soffset : false;
490 }
491 
492 bool getMUBUFIsBufferInv(unsigned Opc) {
493   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
494   return Info ? Info->IsBufferInv : false;
495 }
496 
497 bool getMUBUFTfe(unsigned Opc) {
498   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
499   return Info ? Info->tfe : false;
500 }
501 
502 bool getSMEMIsBuffer(unsigned Opc) {
503   const SMInfo *Info = getSMEMOpcodeHelper(Opc);
504   return Info ? Info->IsBuffer : false;
505 }
506 
507 bool getVOP1IsSingle(unsigned Opc) {
508   const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
509   return Info ? Info->IsSingle : true;
510 }
511 
512 bool getVOP2IsSingle(unsigned Opc) {
513   const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
514   return Info ? Info->IsSingle : true;
515 }
516 
517 bool getVOP3IsSingle(unsigned Opc) {
518   const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
519   return Info ? Info->IsSingle : true;
520 }
521 
522 bool isVOPC64DPP(unsigned Opc) {
523   return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
524 }
525 
526 bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
527 
528 bool getMAIIsDGEMM(unsigned Opc) {
529   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
530   return Info ? Info->is_dgemm : false;
531 }
532 
533 bool getMAIIsGFX940XDL(unsigned Opc) {
534   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
535   return Info ? Info->is_gfx940_xdl : false;
536 }
537 
538 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
539   switch (EncodingVal) {
540   case MFMAScaleFormats::FP6_E2M3:
541   case MFMAScaleFormats::FP6_E3M2:
542     return 6;
543   case MFMAScaleFormats::FP4_E2M1:
544     return 4;
545   case MFMAScaleFormats::FP8_E4M3:
546   case MFMAScaleFormats::FP8_E5M2:
547   default:
548     return 8;
549   }
550 
551   llvm_unreachable("covered switch over mfma scale formats");
552 }
553 
554 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
555                                                       unsigned BLGP,
556                                                       unsigned F8F8Opcode) {
557   uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
558   uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
559   return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
560 }
561 
562 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
563   if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
564     return SIEncodingFamily::GFX12;
565   if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
566     return SIEncodingFamily::GFX11;
567   llvm_unreachable("Subtarget generation does not support VOPD!");
568 }
569 
570 CanBeVOPD getCanBeVOPD(unsigned Opc) {
571   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
572   if (Info)
573     return {Info->CanBeVOPDX, true};
574   return {false, false};
575 }
576 
577 unsigned getVOPDOpcode(unsigned Opc) {
578   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
579   return Info ? Info->VOPDOp : ~0u;
580 }
581 
582 bool isVOPD(unsigned Opc) {
583   return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
584 }
585 
586 bool isMAC(unsigned Opc) {
587   return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
588          Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
589          Opc == AMDGPU::V_MAC_F32_e64_vi ||
590          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
591          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
592          Opc == AMDGPU::V_MAC_F16_e64_vi ||
593          Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
594          Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
595          Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
596          Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
597          Opc == AMDGPU::V_FMAC_F32_e64_vi ||
598          Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
599          Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
600          Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
601          Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
602          Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
603          Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
604          Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
605          Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
606          Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
607          Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
608 }
609 
610 bool isPermlane16(unsigned Opc) {
611   return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
612          Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
613          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
614          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
615          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
616          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
617          Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
618          Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
619 }
620 
621 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
622   return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
623          Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
624          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
625          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
626          Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
627          Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
628          Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
629          Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
630          Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
631          Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
632 }
633 
634 bool isGenericAtomic(unsigned Opc) {
635   return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
636          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
637          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
638          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
639          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
640          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
641          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
642          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
643          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
644          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
645          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
646          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
647          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
648          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
649          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
650          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
651          Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
652 }
653 
654 bool isTrue16Inst(unsigned Opc) {
655   const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
656   return Info ? Info->IsTrue16 : false;
657 }
658 
659 FPType getFPDstSelType(unsigned Opc) {
660   const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
661   if (!Info)
662     return FPType::None;
663   if (Info->HasFP8DstByteSel)
664     return FPType::FP8;
665   if (Info->HasFP4DstByteSel)
666     return FPType::FP4;
667 
668   return FPType::None;
669 }
670 
671 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
672   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
673   return Info ? Info->Opcode3Addr : ~0u;
674 }
675 
676 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
677   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
678   return Info ? Info->Opcode2Addr : ~0u;
679 }
680 
681 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
682 // header files, so we need to wrap it in a function that takes unsigned
683 // instead.
684 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
685   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
686 }
687 
688 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
689   const VOPDInfo *Info =
690       getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
691   return Info ? Info->Opcode : -1;
692 }
693 
694 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
695   const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
696   assert(Info);
697   const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
698   const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
699   assert(OpX && OpY);
700   return {OpX->BaseVOP, OpY->BaseVOP};
701 }
702 
703 namespace VOPD {
704 
705 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
706   assert(OpDesc.getNumDefs() == Component::DST_NUM);
707 
708   assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
709   assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
710   auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
711   assert(TiedIdx == -1 || TiedIdx == Component::DST);
712   HasSrc2Acc = TiedIdx != -1;
713 
714   SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
715   assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
716 
717   auto OperandsNum = OpDesc.getNumOperands();
718   unsigned CompOprIdx;
719   for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
720     if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
721       MandatoryLiteralIdx = CompOprIdx;
722       break;
723     }
724   }
725 }
726 
727 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
728   assert(CompOprIdx < Component::MAX_OPR_NUM);
729 
730   if (CompOprIdx == Component::DST)
731     return getIndexOfDstInParsedOperands();
732 
733   auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
734   if (CompSrcIdx < getCompParsedSrcOperandsNum())
735     return getIndexOfSrcInParsedOperands(CompSrcIdx);
736 
737   // The specified operand does not exist.
738   return 0;
739 }
740 
741 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
742     std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
743 
744   auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
745   auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
746 
747   const unsigned CompOprNum =
748       SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
749   unsigned CompOprIdx;
750   for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
751     unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
752     if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
753         ((OpXRegs[CompOprIdx] & BanksMasks) ==
754          (OpYRegs[CompOprIdx] & BanksMasks)))
755       return CompOprIdx;
756   }
757 
758   return {};
759 }
760 
761 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
762 // by the specified component. If an operand is unused
763 // or is not a VGPR, the corresponding value is 0.
764 //
765 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
766 // for the specified component and MC operand. The callback must return 0
767 // if the operand is not a register or not a VGPR.
768 InstInfo::RegIndices InstInfo::getRegIndices(
769     unsigned CompIdx,
770     std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
771   assert(CompIdx < COMPONENTS_NUM);
772 
773   const auto &Comp = CompInfo[CompIdx];
774   InstInfo::RegIndices RegIndices;
775 
776   RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
777 
778   for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
779     unsigned CompSrcIdx = CompOprIdx - DST_NUM;
780     RegIndices[CompOprIdx] =
781         Comp.hasRegSrcOperand(CompSrcIdx)
782             ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
783             : 0;
784   }
785   return RegIndices;
786 }
787 
788 } // namespace VOPD
789 
790 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
791   return VOPD::InstInfo(OpX, OpY);
792 }
793 
794 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
795                                const MCInstrInfo *InstrInfo) {
796   auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
797   const auto &OpXDesc = InstrInfo->get(OpX);
798   const auto &OpYDesc = InstrInfo->get(OpY);
799   VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
800   VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
801   return VOPD::InstInfo(OpXInfo, OpYInfo);
802 }
803 
804 namespace IsaInfo {
805 
806 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
807     : STI(STI), XnackSetting(TargetIDSetting::Any),
808       SramEccSetting(TargetIDSetting::Any) {
809   if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
810     XnackSetting = TargetIDSetting::Unsupported;
811   if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
812     SramEccSetting = TargetIDSetting::Unsupported;
813 }
814 
815 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
816   // Check if xnack or sramecc is explicitly enabled or disabled.  In the
817   // absence of the target features we assume we must generate code that can run
818   // in any environment.
819   SubtargetFeatures Features(FS);
820   std::optional<bool> XnackRequested;
821   std::optional<bool> SramEccRequested;
822 
823   for (const std::string &Feature : Features.getFeatures()) {
824     if (Feature == "+xnack")
825       XnackRequested = true;
826     else if (Feature == "-xnack")
827       XnackRequested = false;
828     else if (Feature == "+sramecc")
829       SramEccRequested = true;
830     else if (Feature == "-sramecc")
831       SramEccRequested = false;
832   }
833 
834   bool XnackSupported = isXnackSupported();
835   bool SramEccSupported = isSramEccSupported();
836 
837   if (XnackRequested) {
838     if (XnackSupported) {
839       XnackSetting =
840           *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
841     } else {
842       // If a specific xnack setting was requested and this GPU does not support
843       // xnack emit a warning. Setting will remain set to "Unsupported".
844       if (*XnackRequested) {
845         errs() << "warning: xnack 'On' was requested for a processor that does "
846                   "not support it!\n";
847       } else {
848         errs() << "warning: xnack 'Off' was requested for a processor that "
849                   "does not support it!\n";
850       }
851     }
852   }
853 
854   if (SramEccRequested) {
855     if (SramEccSupported) {
856       SramEccSetting =
857           *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
858     } else {
859       // If a specific sramecc setting was requested and this GPU does not
860       // support sramecc emit a warning. Setting will remain set to
861       // "Unsupported".
862       if (*SramEccRequested) {
863         errs() << "warning: sramecc 'On' was requested for a processor that "
864                   "does not support it!\n";
865       } else {
866         errs() << "warning: sramecc 'Off' was requested for a processor that "
867                   "does not support it!\n";
868       }
869     }
870   }
871 }
872 
873 static TargetIDSetting
874 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
875   if (FeatureString.ends_with("-"))
876     return TargetIDSetting::Off;
877   if (FeatureString.ends_with("+"))
878     return TargetIDSetting::On;
879 
880   llvm_unreachable("Malformed feature string");
881 }
882 
883 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
884   SmallVector<StringRef, 3> TargetIDSplit;
885   TargetID.split(TargetIDSplit, ':');
886 
887   for (const auto &FeatureString : TargetIDSplit) {
888     if (FeatureString.starts_with("xnack"))
889       XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
890     if (FeatureString.starts_with("sramecc"))
891       SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
892   }
893 }
894 
895 std::string AMDGPUTargetID::toString() const {
896   std::string StringRep;
897   raw_string_ostream StreamRep(StringRep);
898 
899   auto TargetTriple = STI.getTargetTriple();
900   auto Version = getIsaVersion(STI.getCPU());
901 
902   StreamRep << TargetTriple.getArchName() << '-'
903             << TargetTriple.getVendorName() << '-'
904             << TargetTriple.getOSName() << '-'
905             << TargetTriple.getEnvironmentName() << '-';
906 
907   std::string Processor;
908   // TODO: Following else statement is present here because we used various
909   // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
910   // Remove once all aliases are removed from GCNProcessors.td.
911   if (Version.Major >= 9)
912     Processor = STI.getCPU().str();
913   else
914     Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
915                  Twine(Version.Stepping))
916                     .str();
917 
918   std::string Features;
919   if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
920     // sramecc.
921     if (getSramEccSetting() == TargetIDSetting::Off)
922       Features += ":sramecc-";
923     else if (getSramEccSetting() == TargetIDSetting::On)
924       Features += ":sramecc+";
925     // xnack.
926     if (getXnackSetting() == TargetIDSetting::Off)
927       Features += ":xnack-";
928     else if (getXnackSetting() == TargetIDSetting::On)
929       Features += ":xnack+";
930   }
931 
932   StreamRep << Processor << Features;
933 
934   return StringRep;
935 }
936 
937 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
938   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
939     return 16;
940   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
941     return 32;
942 
943   return 64;
944 }
945 
946 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
947   unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
948 
949   // "Per CU" really means "per whatever functional block the waves of a
950   // workgroup must share". So the effective local memory size is doubled in
951   // WGP mode on gfx10.
952   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
953     BytesPerCU *= 2;
954 
955   return BytesPerCU;
956 }
957 
958 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
959   if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
960     return 32768;
961   if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
962     return 65536;
963   if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
964     return 163840;
965   return 0;
966 }
967 
968 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
969   // "Per CU" really means "per whatever functional block the waves of a
970   // workgroup must share". For gfx10 in CU mode this is the CU, which contains
971   // two SIMDs.
972   if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
973     return 2;
974   // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
975   // two CUs, so a total of four SIMDs.
976   return 4;
977 }
978 
979 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
980                                unsigned FlatWorkGroupSize) {
981   assert(FlatWorkGroupSize != 0);
982   if (STI->getTargetTriple().getArch() != Triple::amdgcn)
983     return 8;
984   unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
985   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
986   if (N == 1) {
987     // Single-wave workgroups don't consume barrier resources.
988     return MaxWaves;
989   }
990 
991   unsigned MaxBarriers = 16;
992   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
993     MaxBarriers = 32;
994 
995   return std::min(MaxWaves / N, MaxBarriers);
996 }
997 
998 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
999   return 1;
1000 }
1001 
1002 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1003   // FIXME: Need to take scratch memory into account.
1004   if (isGFX90A(*STI))
1005     return 8;
1006   if (!isGFX10Plus(*STI))
1007     return 10;
1008   return hasGFX10_3Insts(*STI) ? 16 : 20;
1009 }
1010 
1011 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1012                                    unsigned FlatWorkGroupSize) {
1013   return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1014                     getEUsPerCU(STI));
1015 }
1016 
1017 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
1018   return 1;
1019 }
1020 
1021 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
1022   // Some subtargets allow encoding 2048, but this isn't tested or supported.
1023   return 1024;
1024 }
1025 
1026 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1027                               unsigned FlatWorkGroupSize) {
1028   return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1029 }
1030 
1031 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1032   IsaVersion Version = getIsaVersion(STI->getCPU());
1033   if (Version.Major >= 10)
1034     return getAddressableNumSGPRs(STI);
1035   if (Version.Major >= 8)
1036     return 16;
1037   return 8;
1038 }
1039 
1040 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
1041   return 8;
1042 }
1043 
1044 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1045   IsaVersion Version = getIsaVersion(STI->getCPU());
1046   if (Version.Major >= 8)
1047     return 800;
1048   return 512;
1049 }
1050 
1051 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1052   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1053     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1054 
1055   IsaVersion Version = getIsaVersion(STI->getCPU());
1056   if (Version.Major >= 10)
1057     return 106;
1058   if (Version.Major >= 8)
1059     return 102;
1060   return 104;
1061 }
1062 
1063 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1064   assert(WavesPerEU != 0);
1065 
1066   IsaVersion Version = getIsaVersion(STI->getCPU());
1067   if (Version.Major >= 10)
1068     return 0;
1069 
1070   if (WavesPerEU >= getMaxWavesPerEU(STI))
1071     return 0;
1072 
1073   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1074   if (STI->getFeatureBits().test(FeatureTrapHandler))
1075     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1076   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1077   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1078 }
1079 
1080 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1081                         bool Addressable) {
1082   assert(WavesPerEU != 0);
1083 
1084   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1085   IsaVersion Version = getIsaVersion(STI->getCPU());
1086   if (Version.Major >= 10)
1087     return Addressable ? AddressableNumSGPRs : 108;
1088   if (Version.Major >= 8 && !Addressable)
1089     AddressableNumSGPRs = 112;
1090   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1091   if (STI->getFeatureBits().test(FeatureTrapHandler))
1092     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1093   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1094   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1095 }
1096 
1097 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1098                           bool FlatScrUsed, bool XNACKUsed) {
1099   unsigned ExtraSGPRs = 0;
1100   if (VCCUsed)
1101     ExtraSGPRs = 2;
1102 
1103   IsaVersion Version = getIsaVersion(STI->getCPU());
1104   if (Version.Major >= 10)
1105     return ExtraSGPRs;
1106 
1107   if (Version.Major < 8) {
1108     if (FlatScrUsed)
1109       ExtraSGPRs = 4;
1110   } else {
1111     if (XNACKUsed)
1112       ExtraSGPRs = 4;
1113 
1114     if (FlatScrUsed ||
1115         STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1116       ExtraSGPRs = 6;
1117   }
1118 
1119   return ExtraSGPRs;
1120 }
1121 
1122 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1123                           bool FlatScrUsed) {
1124   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1125                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1126 }
1127 
1128 static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1129                                                unsigned Granule) {
1130   return divideCeil(std::max(1u, NumRegs), Granule);
1131 }
1132 
1133 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1134   // SGPRBlocks is actual number of SGPR blocks minus 1.
1135   return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) -
1136          1;
1137 }
1138 
1139 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1140                              std::optional<bool> EnableWavefrontSize32) {
1141   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1142     return 8;
1143 
1144   bool IsWave32 = EnableWavefrontSize32 ?
1145       *EnableWavefrontSize32 :
1146       STI->getFeatureBits().test(FeatureWavefrontSize32);
1147 
1148   if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1149     return IsWave32 ? 24 : 12;
1150 
1151   if (hasGFX10_3Insts(*STI))
1152     return IsWave32 ? 16 : 8;
1153 
1154   return IsWave32 ? 8 : 4;
1155 }
1156 
1157 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1158                                 std::optional<bool> EnableWavefrontSize32) {
1159   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1160     return 8;
1161 
1162   bool IsWave32 = EnableWavefrontSize32 ?
1163       *EnableWavefrontSize32 :
1164       STI->getFeatureBits().test(FeatureWavefrontSize32);
1165 
1166   return IsWave32 ? 8 : 4;
1167 }
1168 
1169 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1170   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1171     return 512;
1172   if (!isGFX10Plus(*STI))
1173     return 256;
1174   bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1175   if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1176     return IsWave32 ? 1536 : 768;
1177   return IsWave32 ? 1024 : 512;
1178 }
1179 
1180 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1181 
1182 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
1183   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1184     return 512;
1185   return getAddressableNumArchVGPRs(STI);
1186 }
1187 
1188 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1189                                       unsigned NumVGPRs) {
1190   return getNumWavesPerEUWithNumVGPRs(NumVGPRs, getVGPRAllocGranule(STI),
1191                                       getMaxWavesPerEU(STI),
1192                                       getTotalNumVGPRs(STI));
1193 }
1194 
1195 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1196                                       unsigned MaxWaves,
1197                                       unsigned TotalNumVGPRs) {
1198   if (NumVGPRs < Granule)
1199     return MaxWaves;
1200   unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1201   return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1202 }
1203 
1204 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1205                                   AMDGPUSubtarget::Generation Gen) {
1206   if (Gen >= AMDGPUSubtarget::GFX10)
1207     return MaxWaves;
1208 
1209   if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1210     if (SGPRs <= 80)
1211       return 10;
1212     if (SGPRs <= 88)
1213       return 9;
1214     if (SGPRs <= 100)
1215       return 8;
1216     return 7;
1217   }
1218   if (SGPRs <= 48)
1219     return 10;
1220   if (SGPRs <= 56)
1221     return 9;
1222   if (SGPRs <= 64)
1223     return 8;
1224   if (SGPRs <= 72)
1225     return 7;
1226   if (SGPRs <= 80)
1227     return 6;
1228   return 5;
1229 }
1230 
1231 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1232   assert(WavesPerEU != 0);
1233 
1234   unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1235   if (WavesPerEU >= MaxWavesPerEU)
1236     return 0;
1237 
1238   unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1239   unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1240   unsigned Granule = getVGPRAllocGranule(STI);
1241   unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1242 
1243   if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1244     return 0;
1245 
1246   unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1247   if (WavesPerEU < MinWavesPerEU)
1248     return getMinNumVGPRs(STI, MinWavesPerEU);
1249 
1250   unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1251   unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1252   return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1253 }
1254 
1255 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1256   assert(WavesPerEU != 0);
1257 
1258   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1259                                    getVGPRAllocGranule(STI));
1260   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1261   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1262 }
1263 
1264 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1265                                  std::optional<bool> EnableWavefrontSize32) {
1266   return getGranulatedNumRegisterBlocks(
1267              NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1268          1;
1269 }
1270 
1271 unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1272                                    unsigned NumVGPRs,
1273                                    std::optional<bool> EnableWavefrontSize32) {
1274   return getGranulatedNumRegisterBlocks(
1275       NumVGPRs, getVGPRAllocGranule(STI, EnableWavefrontSize32));
1276 }
1277 } // end namespace IsaInfo
1278 
1279 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1280                                const MCSubtargetInfo *STI) {
1281   IsaVersion Version = getIsaVersion(STI->getCPU());
1282   KernelCode.amd_kernel_code_version_major = 1;
1283   KernelCode.amd_kernel_code_version_minor = 2;
1284   KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1285   KernelCode.amd_machine_version_major = Version.Major;
1286   KernelCode.amd_machine_version_minor = Version.Minor;
1287   KernelCode.amd_machine_version_stepping = Version.Stepping;
1288   KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1289   if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1290     KernelCode.wavefront_size = 5;
1291     KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1292   } else {
1293     KernelCode.wavefront_size = 6;
1294   }
1295 
1296   // If the code object does not support indirect functions, then the value must
1297   // be 0xffffffff.
1298   KernelCode.call_convention = -1;
1299 
1300   // These alignment values are specified in powers of two, so alignment =
1301   // 2^n.  The minimum alignment is 2^4 = 16.
1302   KernelCode.kernarg_segment_alignment = 4;
1303   KernelCode.group_segment_alignment = 4;
1304   KernelCode.private_segment_alignment = 4;
1305 
1306   if (Version.Major >= 10) {
1307     KernelCode.compute_pgm_resource_registers |=
1308         S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1309         S_00B848_MEM_ORDERED(1);
1310   }
1311 }
1312 
1313 bool isGroupSegment(const GlobalValue *GV) {
1314   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1315 }
1316 
1317 bool isGlobalSegment(const GlobalValue *GV) {
1318   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1319 }
1320 
1321 bool isReadOnlySegment(const GlobalValue *GV) {
1322   unsigned AS = GV->getAddressSpace();
1323   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1324          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1325 }
1326 
1327 bool shouldEmitConstantsToTextSection(const Triple &TT) {
1328   return TT.getArch() == Triple::r600;
1329 }
1330 
1331 std::pair<unsigned, unsigned>
1332 getIntegerPairAttribute(const Function &F, StringRef Name,
1333                         std::pair<unsigned, unsigned> Default,
1334                         bool OnlyFirstRequired) {
1335   if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1336     return {Attr->first, Attr->second ? *(Attr->second) : Default.second};
1337   return Default;
1338 }
1339 
1340 std::optional<std::pair<unsigned, std::optional<unsigned>>>
1341 getIntegerPairAttribute(const Function &F, StringRef Name,
1342                         bool OnlyFirstRequired) {
1343   Attribute A = F.getFnAttribute(Name);
1344   if (!A.isStringAttribute())
1345     return std::nullopt;
1346 
1347   LLVMContext &Ctx = F.getContext();
1348   std::pair<unsigned, std::optional<unsigned>> Ints;
1349   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1350   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1351     Ctx.emitError("can't parse first integer attribute " + Name);
1352     return std::nullopt;
1353   }
1354   unsigned Second = 0;
1355   if (Strs.second.trim().getAsInteger(0, Second)) {
1356     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1357       Ctx.emitError("can't parse second integer attribute " + Name);
1358       return std::nullopt;
1359     }
1360   } else {
1361     Ints.second = Second;
1362   }
1363 
1364   return Ints;
1365 }
1366 
1367 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1368                                              unsigned Size,
1369                                              unsigned DefaultVal) {
1370   assert(Size > 2);
1371   SmallVector<unsigned> Default(Size, DefaultVal);
1372 
1373   Attribute A = F.getFnAttribute(Name);
1374   if (!A.isStringAttribute())
1375     return Default;
1376 
1377   SmallVector<unsigned> Vals(Size, DefaultVal);
1378 
1379   LLVMContext &Ctx = F.getContext();
1380 
1381   StringRef S = A.getValueAsString();
1382   unsigned i = 0;
1383   for (; !S.empty() && i < Size; i++) {
1384     std::pair<StringRef, StringRef> Strs = S.split(',');
1385     unsigned IntVal;
1386     if (Strs.first.trim().getAsInteger(0, IntVal)) {
1387       Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1388                     Name);
1389       return Default;
1390     }
1391     Vals[i] = IntVal;
1392     S = Strs.second;
1393   }
1394 
1395   if (!S.empty() || i < Size) {
1396     Ctx.emitError("attribute " + Name +
1397                   " has incorrect number of integers; expected " +
1398                   llvm::utostr(Size));
1399     return Default;
1400   }
1401   return Vals;
1402 }
1403 
1404 unsigned getVmcntBitMask(const IsaVersion &Version) {
1405   return (1 << (getVmcntBitWidthLo(Version.Major) +
1406                 getVmcntBitWidthHi(Version.Major))) -
1407          1;
1408 }
1409 
1410 unsigned getLoadcntBitMask(const IsaVersion &Version) {
1411   return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1412 }
1413 
1414 unsigned getSamplecntBitMask(const IsaVersion &Version) {
1415   return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1416 }
1417 
1418 unsigned getBvhcntBitMask(const IsaVersion &Version) {
1419   return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1420 }
1421 
1422 unsigned getExpcntBitMask(const IsaVersion &Version) {
1423   return (1 << getExpcntBitWidth(Version.Major)) - 1;
1424 }
1425 
1426 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1427   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1428 }
1429 
1430 unsigned getDscntBitMask(const IsaVersion &Version) {
1431   return (1 << getDscntBitWidth(Version.Major)) - 1;
1432 }
1433 
1434 unsigned getKmcntBitMask(const IsaVersion &Version) {
1435   return (1 << getKmcntBitWidth(Version.Major)) - 1;
1436 }
1437 
1438 unsigned getStorecntBitMask(const IsaVersion &Version) {
1439   return (1 << getStorecntBitWidth(Version.Major)) - 1;
1440 }
1441 
1442 unsigned getWaitcntBitMask(const IsaVersion &Version) {
1443   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1444                                 getVmcntBitWidthLo(Version.Major));
1445   unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1446                                getExpcntBitWidth(Version.Major));
1447   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1448                                 getLgkmcntBitWidth(Version.Major));
1449   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1450                                 getVmcntBitWidthHi(Version.Major));
1451   return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1452 }
1453 
1454 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1455   unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1456                                 getVmcntBitWidthLo(Version.Major));
1457   unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1458                                 getVmcntBitWidthHi(Version.Major));
1459   return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1460 }
1461 
1462 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1463   return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1464                     getExpcntBitWidth(Version.Major));
1465 }
1466 
1467 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1468   return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1469                     getLgkmcntBitWidth(Version.Major));
1470 }
1471 
1472 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1473                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1474   Vmcnt = decodeVmcnt(Version, Waitcnt);
1475   Expcnt = decodeExpcnt(Version, Waitcnt);
1476   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1477 }
1478 
1479 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1480   Waitcnt Decoded;
1481   Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1482   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1483   Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1484   return Decoded;
1485 }
1486 
1487 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1488                      unsigned Vmcnt) {
1489   Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1490                      getVmcntBitWidthLo(Version.Major));
1491   return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1492                   getVmcntBitShiftHi(Version.Major),
1493                   getVmcntBitWidthHi(Version.Major));
1494 }
1495 
1496 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1497                       unsigned Expcnt) {
1498   return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1499                   getExpcntBitWidth(Version.Major));
1500 }
1501 
1502 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1503                        unsigned Lgkmcnt) {
1504   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1505                   getLgkmcntBitWidth(Version.Major));
1506 }
1507 
1508 unsigned encodeWaitcnt(const IsaVersion &Version,
1509                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1510   unsigned Waitcnt = getWaitcntBitMask(Version);
1511   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1512   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1513   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1514   return Waitcnt;
1515 }
1516 
1517 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1518   return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1519 }
1520 
1521 static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1522                                         bool IsStore) {
1523   unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1524                               getDscntBitWidth(Version.Major));
1525   if (IsStore) {
1526     unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1527                                    getStorecntBitWidth(Version.Major));
1528     return Dscnt | Storecnt;
1529   }
1530   unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1531                                 getLoadcntBitWidth(Version.Major));
1532   return Dscnt | Loadcnt;
1533 }
1534 
1535 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1536   Waitcnt Decoded;
1537   Decoded.LoadCnt =
1538       unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1539                  getLoadcntBitWidth(Version.Major));
1540   Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1541                              getDscntBitWidth(Version.Major));
1542   return Decoded;
1543 }
1544 
1545 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1546   Waitcnt Decoded;
1547   Decoded.StoreCnt =
1548       unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1549                  getStorecntBitWidth(Version.Major));
1550   Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1551                              getDscntBitWidth(Version.Major));
1552   return Decoded;
1553 }
1554 
1555 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1556                               unsigned Loadcnt) {
1557   return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1558                   getLoadcntBitWidth(Version.Major));
1559 }
1560 
1561 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1562                                unsigned Storecnt) {
1563   return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1564                   getStorecntBitWidth(Version.Major));
1565 }
1566 
1567 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1568                             unsigned Dscnt) {
1569   return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1570                   getDscntBitWidth(Version.Major));
1571 }
1572 
1573 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1574                                    unsigned Dscnt) {
1575   unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1576   Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1577   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1578   return Waitcnt;
1579 }
1580 
1581 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1582   return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1583 }
1584 
1585 static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1586                                     unsigned Storecnt, unsigned Dscnt) {
1587   unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1588   Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1589   Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1590   return Waitcnt;
1591 }
1592 
1593 unsigned encodeStorecntDscnt(const IsaVersion &Version,
1594                              const Waitcnt &Decoded) {
1595   return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1596 }
1597 
1598 //===----------------------------------------------------------------------===//
1599 // Custom Operand Values
1600 //===----------------------------------------------------------------------===//
1601 
1602 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1603                                                 int Size,
1604                                                 const MCSubtargetInfo &STI) {
1605   unsigned Enc = 0;
1606   for (int Idx = 0; Idx < Size; ++Idx) {
1607     const auto &Op = Opr[Idx];
1608     if (Op.isSupported(STI))
1609       Enc |= Op.encode(Op.Default);
1610   }
1611   return Enc;
1612 }
1613 
1614 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1615                                             int Size, unsigned Code,
1616                                             bool &HasNonDefaultVal,
1617                                             const MCSubtargetInfo &STI) {
1618   unsigned UsedOprMask = 0;
1619   HasNonDefaultVal = false;
1620   for (int Idx = 0; Idx < Size; ++Idx) {
1621     const auto &Op = Opr[Idx];
1622     if (!Op.isSupported(STI))
1623       continue;
1624     UsedOprMask |= Op.getMask();
1625     unsigned Val = Op.decode(Code);
1626     if (!Op.isValid(Val))
1627       return false;
1628     HasNonDefaultVal |= (Val != Op.Default);
1629   }
1630   return (Code & ~UsedOprMask) == 0;
1631 }
1632 
1633 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1634                                 unsigned Code, int &Idx, StringRef &Name,
1635                                 unsigned &Val, bool &IsDefault,
1636                                 const MCSubtargetInfo &STI) {
1637   while (Idx < Size) {
1638     const auto &Op = Opr[Idx++];
1639     if (Op.isSupported(STI)) {
1640       Name = Op.Name;
1641       Val = Op.decode(Code);
1642       IsDefault = (Val == Op.Default);
1643       return true;
1644     }
1645   }
1646 
1647   return false;
1648 }
1649 
1650 static int encodeCustomOperandVal(const CustomOperandVal &Op,
1651                                   int64_t InputVal) {
1652   if (InputVal < 0 || InputVal > Op.Max)
1653     return OPR_VAL_INVALID;
1654   return Op.encode(InputVal);
1655 }
1656 
1657 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1658                                const StringRef Name, int64_t InputVal,
1659                                unsigned &UsedOprMask,
1660                                const MCSubtargetInfo &STI) {
1661   int InvalidId = OPR_ID_UNKNOWN;
1662   for (int Idx = 0; Idx < Size; ++Idx) {
1663     const auto &Op = Opr[Idx];
1664     if (Op.Name == Name) {
1665       if (!Op.isSupported(STI)) {
1666         InvalidId = OPR_ID_UNSUPPORTED;
1667         continue;
1668       }
1669       auto OprMask = Op.getMask();
1670       if (OprMask & UsedOprMask)
1671         return OPR_ID_DUPLICATE;
1672       UsedOprMask |= OprMask;
1673       return encodeCustomOperandVal(Op, InputVal);
1674     }
1675   }
1676   return InvalidId;
1677 }
1678 
1679 //===----------------------------------------------------------------------===//
1680 // DepCtr
1681 //===----------------------------------------------------------------------===//
1682 
1683 namespace DepCtr {
1684 
1685 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1686   static int Default = -1;
1687   if (Default == -1)
1688     Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1689   return Default;
1690 }
1691 
1692 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1693                               const MCSubtargetInfo &STI) {
1694   return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1695                                          HasNonDefaultVal, STI);
1696 }
1697 
1698 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1699                   bool &IsDefault, const MCSubtargetInfo &STI) {
1700   return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1701                              IsDefault, STI);
1702 }
1703 
1704 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1705                  const MCSubtargetInfo &STI) {
1706   return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1707                              STI);
1708 }
1709 
1710 unsigned decodeFieldVmVsrc(unsigned Encoded) {
1711   return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1712 }
1713 
1714 unsigned decodeFieldVaVdst(unsigned Encoded) {
1715   return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1716 }
1717 
1718 unsigned decodeFieldSaSdst(unsigned Encoded) {
1719   return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1720 }
1721 
1722 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1723   return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1724 }
1725 
1726 unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1727   return encodeFieldVmVsrc(0xffff, VmVsrc);
1728 }
1729 
1730 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1731   return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1732 }
1733 
1734 unsigned encodeFieldVaVdst(unsigned VaVdst) {
1735   return encodeFieldVaVdst(0xffff, VaVdst);
1736 }
1737 
1738 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1739   return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1740 }
1741 
1742 unsigned encodeFieldSaSdst(unsigned SaSdst) {
1743   return encodeFieldSaSdst(0xffff, SaSdst);
1744 }
1745 
1746 } // namespace DepCtr
1747 
1748 //===----------------------------------------------------------------------===//
1749 // exp tgt
1750 //===----------------------------------------------------------------------===//
1751 
1752 namespace Exp {
1753 
1754 struct ExpTgt {
1755   StringLiteral Name;
1756   unsigned Tgt;
1757   unsigned MaxIndex;
1758 };
1759 
1760 static constexpr ExpTgt ExpTgtInfo[] = {
1761   {{"null"},           ET_NULL,            ET_NULL_MAX_IDX},
1762   {{"mrtz"},           ET_MRTZ,            ET_MRTZ_MAX_IDX},
1763   {{"prim"},           ET_PRIM,            ET_PRIM_MAX_IDX},
1764   {{"mrt"},            ET_MRT0,            ET_MRT_MAX_IDX},
1765   {{"pos"},            ET_POS0,            ET_POS_MAX_IDX},
1766   {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1767   {{"param"},          ET_PARAM0,          ET_PARAM_MAX_IDX},
1768 };
1769 
1770 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1771   for (const ExpTgt &Val : ExpTgtInfo) {
1772     if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1773       Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1774       Name = Val.Name;
1775       return true;
1776     }
1777   }
1778   return false;
1779 }
1780 
1781 unsigned getTgtId(const StringRef Name) {
1782 
1783   for (const ExpTgt &Val : ExpTgtInfo) {
1784     if (Val.MaxIndex == 0 && Name == Val.Name)
1785       return Val.Tgt;
1786 
1787     if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1788       StringRef Suffix = Name.drop_front(Val.Name.size());
1789 
1790       unsigned Id;
1791       if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1792         return ET_INVALID;
1793 
1794       // Disable leading zeroes
1795       if (Suffix.size() > 1 && Suffix[0] == '0')
1796         return ET_INVALID;
1797 
1798       return Val.Tgt + Id;
1799     }
1800   }
1801   return ET_INVALID;
1802 }
1803 
1804 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1805   switch (Id) {
1806   case ET_NULL:
1807     return !isGFX11Plus(STI);
1808   case ET_POS4:
1809   case ET_PRIM:
1810     return isGFX10Plus(STI);
1811   case ET_DUAL_SRC_BLEND0:
1812   case ET_DUAL_SRC_BLEND1:
1813     return isGFX11Plus(STI);
1814   default:
1815     if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1816       return !isGFX11Plus(STI);
1817     return true;
1818   }
1819 }
1820 
1821 } // namespace Exp
1822 
1823 //===----------------------------------------------------------------------===//
1824 // MTBUF Format
1825 //===----------------------------------------------------------------------===//
1826 
1827 namespace MTBUFFormat {
1828 
1829 int64_t getDfmt(const StringRef Name) {
1830   for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1831     if (Name == DfmtSymbolic[Id])
1832       return Id;
1833   }
1834   return DFMT_UNDEF;
1835 }
1836 
1837 StringRef getDfmtName(unsigned Id) {
1838   assert(Id <= DFMT_MAX);
1839   return DfmtSymbolic[Id];
1840 }
1841 
1842 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1843   if (isSI(STI) || isCI(STI))
1844     return NfmtSymbolicSICI;
1845   if (isVI(STI) || isGFX9(STI))
1846     return NfmtSymbolicVI;
1847   return NfmtSymbolicGFX10;
1848 }
1849 
1850 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1851   const auto *lookupTable = getNfmtLookupTable(STI);
1852   for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1853     if (Name == lookupTable[Id])
1854       return Id;
1855   }
1856   return NFMT_UNDEF;
1857 }
1858 
1859 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1860   assert(Id <= NFMT_MAX);
1861   return getNfmtLookupTable(STI)[Id];
1862 }
1863 
1864 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1865   unsigned Dfmt;
1866   unsigned Nfmt;
1867   decodeDfmtNfmt(Id, Dfmt, Nfmt);
1868   return isValidNfmt(Nfmt, STI);
1869 }
1870 
1871 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1872   return !getNfmtName(Id, STI).empty();
1873 }
1874 
1875 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1876   return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1877 }
1878 
1879 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1880   Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1881   Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1882 }
1883 
1884 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
1885   if (isGFX11Plus(STI)) {
1886     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1887       if (Name == UfmtSymbolicGFX11[Id])
1888         return Id;
1889     }
1890   } else {
1891     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1892       if (Name == UfmtSymbolicGFX10[Id])
1893         return Id;
1894     }
1895   }
1896   return UFMT_UNDEF;
1897 }
1898 
1899 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
1900   if(isValidUnifiedFormat(Id, STI))
1901     return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1902   return "";
1903 }
1904 
1905 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1906   return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1907 }
1908 
1909 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1910                              const MCSubtargetInfo &STI) {
1911   int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1912   if (isGFX11Plus(STI)) {
1913     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1914       if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1915         return Id;
1916     }
1917   } else {
1918     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1919       if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1920         return Id;
1921     }
1922   }
1923   return UFMT_UNDEF;
1924 }
1925 
1926 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1927   return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1928 }
1929 
1930 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
1931   if (isGFX10Plus(STI))
1932     return UFMT_DEFAULT;
1933   return DFMT_NFMT_DEFAULT;
1934 }
1935 
1936 } // namespace MTBUFFormat
1937 
1938 //===----------------------------------------------------------------------===//
1939 // SendMsg
1940 //===----------------------------------------------------------------------===//
1941 
1942 namespace SendMsg {
1943 
1944 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
1945   return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
1946 }
1947 
1948 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1949   return (MsgId & ~(getMsgIdMask(STI))) == 0;
1950 }
1951 
1952 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1953                   bool Strict) {
1954   assert(isValidMsgId(MsgId, STI));
1955 
1956   if (!Strict)
1957     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1958 
1959   if (msgRequiresOp(MsgId, STI)) {
1960     if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
1961       return false;
1962 
1963     return !getMsgOpName(MsgId, OpId, STI).empty();
1964   }
1965 
1966   return OpId == OP_NONE_;
1967 }
1968 
1969 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1970                       const MCSubtargetInfo &STI, bool Strict) {
1971   assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1972 
1973   if (!Strict)
1974     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1975 
1976   if (!isGFX11Plus(STI)) {
1977     switch (MsgId) {
1978     case ID_GS_PreGFX11:
1979       return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
1980     case ID_GS_DONE_PreGFX11:
1981       return (OpId == OP_GS_NOP) ?
1982           (StreamId == STREAM_ID_NONE_) :
1983           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
1984     }
1985   }
1986   return StreamId == STREAM_ID_NONE_;
1987 }
1988 
1989 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1990   return MsgId == ID_SYSMSG ||
1991       (!isGFX11Plus(STI) &&
1992        (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1993 }
1994 
1995 bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1996                        const MCSubtargetInfo &STI) {
1997   return !isGFX11Plus(STI) &&
1998       (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1999       OpId != OP_GS_NOP;
2000 }
2001 
2002 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2003                uint16_t &StreamId, const MCSubtargetInfo &STI) {
2004   MsgId = Val & getMsgIdMask(STI);
2005   if (isGFX11Plus(STI)) {
2006     OpId = 0;
2007     StreamId = 0;
2008   } else {
2009     OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2010     StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2011   }
2012 }
2013 
2014 uint64_t encodeMsg(uint64_t MsgId,
2015                    uint64_t OpId,
2016                    uint64_t StreamId) {
2017   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2018 }
2019 
2020 } // namespace SendMsg
2021 
2022 //===----------------------------------------------------------------------===//
2023 //
2024 //===----------------------------------------------------------------------===//
2025 
2026 unsigned getInitialPSInputAddr(const Function &F) {
2027   return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2028 }
2029 
2030 bool getHasColorExport(const Function &F) {
2031   // As a safe default always respond as if PS has color exports.
2032   return F.getFnAttributeAsParsedInteger(
2033              "amdgpu-color-export",
2034              F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2035 }
2036 
2037 bool getHasDepthExport(const Function &F) {
2038   return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2039 }
2040 
2041 bool isShader(CallingConv::ID cc) {
2042   switch(cc) {
2043     case CallingConv::AMDGPU_VS:
2044     case CallingConv::AMDGPU_LS:
2045     case CallingConv::AMDGPU_HS:
2046     case CallingConv::AMDGPU_ES:
2047     case CallingConv::AMDGPU_GS:
2048     case CallingConv::AMDGPU_PS:
2049     case CallingConv::AMDGPU_CS_Chain:
2050     case CallingConv::AMDGPU_CS_ChainPreserve:
2051     case CallingConv::AMDGPU_CS:
2052       return true;
2053     default:
2054       return false;
2055   }
2056 }
2057 
2058 bool isGraphics(CallingConv::ID cc) {
2059   return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
2060 }
2061 
2062 bool isCompute(CallingConv::ID cc) {
2063   return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
2064 }
2065 
2066 bool isEntryFunctionCC(CallingConv::ID CC) {
2067   switch (CC) {
2068   case CallingConv::AMDGPU_KERNEL:
2069   case CallingConv::SPIR_KERNEL:
2070   case CallingConv::AMDGPU_VS:
2071   case CallingConv::AMDGPU_GS:
2072   case CallingConv::AMDGPU_PS:
2073   case CallingConv::AMDGPU_CS:
2074   case CallingConv::AMDGPU_ES:
2075   case CallingConv::AMDGPU_HS:
2076   case CallingConv::AMDGPU_LS:
2077     return true;
2078   default:
2079     return false;
2080   }
2081 }
2082 
2083 bool isModuleEntryFunctionCC(CallingConv::ID CC) {
2084   switch (CC) {
2085   case CallingConv::AMDGPU_Gfx:
2086     return true;
2087   default:
2088     return isEntryFunctionCC(CC) || isChainCC(CC);
2089   }
2090 }
2091 
2092 bool isChainCC(CallingConv::ID CC) {
2093   switch (CC) {
2094   case CallingConv::AMDGPU_CS_Chain:
2095   case CallingConv::AMDGPU_CS_ChainPreserve:
2096     return true;
2097   default:
2098     return false;
2099   }
2100 }
2101 
2102 bool isKernelCC(const Function *Func) {
2103   return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2104 }
2105 
2106 bool hasXNACK(const MCSubtargetInfo &STI) {
2107   return STI.hasFeature(AMDGPU::FeatureXNACK);
2108 }
2109 
2110 bool hasSRAMECC(const MCSubtargetInfo &STI) {
2111   return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2112 }
2113 
2114 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2115   return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
2116 }
2117 
2118 bool hasA16(const MCSubtargetInfo &STI) {
2119   return STI.hasFeature(AMDGPU::FeatureA16);
2120 }
2121 
2122 bool hasG16(const MCSubtargetInfo &STI) {
2123   return STI.hasFeature(AMDGPU::FeatureG16);
2124 }
2125 
2126 bool hasPackedD16(const MCSubtargetInfo &STI) {
2127   return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2128          !isSI(STI);
2129 }
2130 
2131 bool hasGDS(const MCSubtargetInfo &STI) {
2132   return STI.hasFeature(AMDGPU::FeatureGDS);
2133 }
2134 
2135 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2136   auto Version = getIsaVersion(STI.getCPU());
2137   if (Version.Major == 10)
2138     return Version.Minor >= 3 ? 13 : 5;
2139   if (Version.Major == 11)
2140     return 5;
2141   if (Version.Major >= 12)
2142     return HasSampler ? 4 : 5;
2143   return 0;
2144 }
2145 
2146 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2147 
2148 bool isSI(const MCSubtargetInfo &STI) {
2149   return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2150 }
2151 
2152 bool isCI(const MCSubtargetInfo &STI) {
2153   return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2154 }
2155 
2156 bool isVI(const MCSubtargetInfo &STI) {
2157   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2158 }
2159 
2160 bool isGFX9(const MCSubtargetInfo &STI) {
2161   return STI.hasFeature(AMDGPU::FeatureGFX9);
2162 }
2163 
2164 bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2165   return isGFX9(STI) || isGFX10(STI);
2166 }
2167 
2168 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2169   return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2170 }
2171 
2172 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2173   return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2174 }
2175 
2176 bool isGFX8Plus(const MCSubtargetInfo &STI) {
2177   return isVI(STI) || isGFX9Plus(STI);
2178 }
2179 
2180 bool isGFX9Plus(const MCSubtargetInfo &STI) {
2181   return isGFX9(STI) || isGFX10Plus(STI);
2182 }
2183 
2184 bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2185 
2186 bool isGFX10(const MCSubtargetInfo &STI) {
2187   return STI.hasFeature(AMDGPU::FeatureGFX10);
2188 }
2189 
2190 bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2191   return isGFX10(STI) || isGFX11(STI);
2192 }
2193 
2194 bool isGFX10Plus(const MCSubtargetInfo &STI) {
2195   return isGFX10(STI) || isGFX11Plus(STI);
2196 }
2197 
2198 bool isGFX11(const MCSubtargetInfo &STI) {
2199   return STI.hasFeature(AMDGPU::FeatureGFX11);
2200 }
2201 
2202 bool isGFX11Plus(const MCSubtargetInfo &STI) {
2203   return isGFX11(STI) || isGFX12Plus(STI);
2204 }
2205 
2206 bool isGFX12(const MCSubtargetInfo &STI) {
2207   return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2208 }
2209 
2210 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2211 
2212 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2213 
2214 bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
2215   return !isGFX11Plus(STI);
2216 }
2217 
2218 bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2219   return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2220 }
2221 
2222 bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2223   return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2224 }
2225 
2226 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2227   return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2228 }
2229 
2230 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2231   return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2232 }
2233 
2234 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2235   return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2236 }
2237 
2238 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2239   return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2240 }
2241 
2242 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2243   return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2244 }
2245 
2246 bool isGFX90A(const MCSubtargetInfo &STI) {
2247   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2248 }
2249 
2250 bool isGFX940(const MCSubtargetInfo &STI) {
2251   return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2252 }
2253 
2254 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2255   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2256 }
2257 
2258 bool hasMAIInsts(const MCSubtargetInfo &STI) {
2259   return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2260 }
2261 
2262 bool hasVOPD(const MCSubtargetInfo &STI) {
2263   return STI.hasFeature(AMDGPU::FeatureVOPD);
2264 }
2265 
2266 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2267   return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2268 }
2269 
2270 unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2271   return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2272 }
2273 
2274 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2275                          int32_t ArgNumVGPR) {
2276   if (has90AInsts && ArgNumAGPR)
2277     return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2278   return std::max(ArgNumVGPR, ArgNumAGPR);
2279 }
2280 
2281 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2282   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2283   const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2284   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2285     Reg == AMDGPU::SCC;
2286 }
2287 
2288 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2289   return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2290 }
2291 
2292 #define MAP_REG2REG \
2293   using namespace AMDGPU; \
2294   switch(Reg.id()) { \
2295   default: return Reg; \
2296   CASE_CI_VI(FLAT_SCR) \
2297   CASE_CI_VI(FLAT_SCR_LO) \
2298   CASE_CI_VI(FLAT_SCR_HI) \
2299   CASE_VI_GFX9PLUS(TTMP0) \
2300   CASE_VI_GFX9PLUS(TTMP1) \
2301   CASE_VI_GFX9PLUS(TTMP2) \
2302   CASE_VI_GFX9PLUS(TTMP3) \
2303   CASE_VI_GFX9PLUS(TTMP4) \
2304   CASE_VI_GFX9PLUS(TTMP5) \
2305   CASE_VI_GFX9PLUS(TTMP6) \
2306   CASE_VI_GFX9PLUS(TTMP7) \
2307   CASE_VI_GFX9PLUS(TTMP8) \
2308   CASE_VI_GFX9PLUS(TTMP9) \
2309   CASE_VI_GFX9PLUS(TTMP10) \
2310   CASE_VI_GFX9PLUS(TTMP11) \
2311   CASE_VI_GFX9PLUS(TTMP12) \
2312   CASE_VI_GFX9PLUS(TTMP13) \
2313   CASE_VI_GFX9PLUS(TTMP14) \
2314   CASE_VI_GFX9PLUS(TTMP15) \
2315   CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2316   CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2317   CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2318   CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2319   CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2320   CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2321   CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2322   CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2323   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2324   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2325   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2326   CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2327   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2328   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2329   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2330   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2331   CASE_GFXPRE11_GFX11PLUS(M0) \
2332   CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2333   CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2334   }
2335 
2336 #define CASE_CI_VI(node) \
2337   assert(!isSI(STI)); \
2338   case node: return isCI(STI) ? node##_ci : node##_vi;
2339 
2340 #define CASE_VI_GFX9PLUS(node) \
2341   case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2342 
2343 #define CASE_GFXPRE11_GFX11PLUS(node) \
2344   case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2345 
2346 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2347   case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2348 
2349 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2350   if (STI.getTargetTriple().getArch() == Triple::r600)
2351     return Reg;
2352   MAP_REG2REG
2353 }
2354 
2355 #undef CASE_CI_VI
2356 #undef CASE_VI_GFX9PLUS
2357 #undef CASE_GFXPRE11_GFX11PLUS
2358 #undef CASE_GFXPRE11_GFX11PLUS_TO
2359 
2360 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
2361 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2362 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2363 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2364 
2365 MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2366 
2367 bool isInlineValue(unsigned Reg) {
2368   switch (Reg) {
2369   case AMDGPU::SRC_SHARED_BASE_LO:
2370   case AMDGPU::SRC_SHARED_BASE:
2371   case AMDGPU::SRC_SHARED_LIMIT_LO:
2372   case AMDGPU::SRC_SHARED_LIMIT:
2373   case AMDGPU::SRC_PRIVATE_BASE_LO:
2374   case AMDGPU::SRC_PRIVATE_BASE:
2375   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2376   case AMDGPU::SRC_PRIVATE_LIMIT:
2377   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2378     return true;
2379   case AMDGPU::SRC_VCCZ:
2380   case AMDGPU::SRC_EXECZ:
2381   case AMDGPU::SRC_SCC:
2382     return true;
2383   case AMDGPU::SGPR_NULL:
2384     return true;
2385   default:
2386     return false;
2387   }
2388 }
2389 
2390 #undef CASE_CI_VI
2391 #undef CASE_VI_GFX9PLUS
2392 #undef CASE_GFXPRE11_GFX11PLUS
2393 #undef CASE_GFXPRE11_GFX11PLUS_TO
2394 #undef MAP_REG2REG
2395 
2396 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2397   assert(OpNo < Desc.NumOperands);
2398   unsigned OpType = Desc.operands()[OpNo].OperandType;
2399   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2400          OpType <= AMDGPU::OPERAND_SRC_LAST;
2401 }
2402 
2403 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2404   assert(OpNo < Desc.NumOperands);
2405   unsigned OpType = Desc.operands()[OpNo].OperandType;
2406   return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2407          OpType <= AMDGPU::OPERAND_KIMM_LAST;
2408 }
2409 
2410 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2411   assert(OpNo < Desc.NumOperands);
2412   unsigned OpType = Desc.operands()[OpNo].OperandType;
2413   switch (OpType) {
2414   case AMDGPU::OPERAND_REG_IMM_FP32:
2415   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2416   case AMDGPU::OPERAND_REG_IMM_FP64:
2417   case AMDGPU::OPERAND_REG_IMM_FP16:
2418   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2419   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2420   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2421   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2422   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2423   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2424   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2425   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2426   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2427   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2428   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2429   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2430     return true;
2431   default:
2432     return false;
2433   }
2434 }
2435 
2436 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2437   assert(OpNo < Desc.NumOperands);
2438   unsigned OpType = Desc.operands()[OpNo].OperandType;
2439   return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2440           OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2441          (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2442           OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2443 }
2444 
2445 // Avoid using MCRegisterClass::getSize, since that function will go away
2446 // (move from MC* level to Target* level). Return size in bits.
2447 unsigned getRegBitWidth(unsigned RCID) {
2448   switch (RCID) {
2449   case AMDGPU::SGPR_LO16RegClassID:
2450   case AMDGPU::AGPR_LO16RegClassID:
2451     return 16;
2452   case AMDGPU::SGPR_32RegClassID:
2453   case AMDGPU::VGPR_32RegClassID:
2454   case AMDGPU::VRegOrLds_32RegClassID:
2455   case AMDGPU::AGPR_32RegClassID:
2456   case AMDGPU::VS_32RegClassID:
2457   case AMDGPU::AV_32RegClassID:
2458   case AMDGPU::SReg_32RegClassID:
2459   case AMDGPU::SReg_32_XM0RegClassID:
2460   case AMDGPU::SRegOrLds_32RegClassID:
2461     return 32;
2462   case AMDGPU::SGPR_64RegClassID:
2463   case AMDGPU::VS_64RegClassID:
2464   case AMDGPU::SReg_64RegClassID:
2465   case AMDGPU::VReg_64RegClassID:
2466   case AMDGPU::AReg_64RegClassID:
2467   case AMDGPU::SReg_64_XEXECRegClassID:
2468   case AMDGPU::VReg_64_Align2RegClassID:
2469   case AMDGPU::AReg_64_Align2RegClassID:
2470   case AMDGPU::AV_64RegClassID:
2471   case AMDGPU::AV_64_Align2RegClassID:
2472     return 64;
2473   case AMDGPU::SGPR_96RegClassID:
2474   case AMDGPU::SReg_96RegClassID:
2475   case AMDGPU::VReg_96RegClassID:
2476   case AMDGPU::AReg_96RegClassID:
2477   case AMDGPU::VReg_96_Align2RegClassID:
2478   case AMDGPU::AReg_96_Align2RegClassID:
2479   case AMDGPU::AV_96RegClassID:
2480   case AMDGPU::AV_96_Align2RegClassID:
2481     return 96;
2482   case AMDGPU::SGPR_128RegClassID:
2483   case AMDGPU::SReg_128RegClassID:
2484   case AMDGPU::VReg_128RegClassID:
2485   case AMDGPU::AReg_128RegClassID:
2486   case AMDGPU::VReg_128_Align2RegClassID:
2487   case AMDGPU::AReg_128_Align2RegClassID:
2488   case AMDGPU::AV_128RegClassID:
2489   case AMDGPU::AV_128_Align2RegClassID:
2490   case AMDGPU::SReg_128_XNULLRegClassID:
2491     return 128;
2492   case AMDGPU::SGPR_160RegClassID:
2493   case AMDGPU::SReg_160RegClassID:
2494   case AMDGPU::VReg_160RegClassID:
2495   case AMDGPU::AReg_160RegClassID:
2496   case AMDGPU::VReg_160_Align2RegClassID:
2497   case AMDGPU::AReg_160_Align2RegClassID:
2498   case AMDGPU::AV_160RegClassID:
2499   case AMDGPU::AV_160_Align2RegClassID:
2500     return 160;
2501   case AMDGPU::SGPR_192RegClassID:
2502   case AMDGPU::SReg_192RegClassID:
2503   case AMDGPU::VReg_192RegClassID:
2504   case AMDGPU::AReg_192RegClassID:
2505   case AMDGPU::VReg_192_Align2RegClassID:
2506   case AMDGPU::AReg_192_Align2RegClassID:
2507   case AMDGPU::AV_192RegClassID:
2508   case AMDGPU::AV_192_Align2RegClassID:
2509     return 192;
2510   case AMDGPU::SGPR_224RegClassID:
2511   case AMDGPU::SReg_224RegClassID:
2512   case AMDGPU::VReg_224RegClassID:
2513   case AMDGPU::AReg_224RegClassID:
2514   case AMDGPU::VReg_224_Align2RegClassID:
2515   case AMDGPU::AReg_224_Align2RegClassID:
2516   case AMDGPU::AV_224RegClassID:
2517   case AMDGPU::AV_224_Align2RegClassID:
2518     return 224;
2519   case AMDGPU::SGPR_256RegClassID:
2520   case AMDGPU::SReg_256RegClassID:
2521   case AMDGPU::VReg_256RegClassID:
2522   case AMDGPU::AReg_256RegClassID:
2523   case AMDGPU::VReg_256_Align2RegClassID:
2524   case AMDGPU::AReg_256_Align2RegClassID:
2525   case AMDGPU::AV_256RegClassID:
2526   case AMDGPU::AV_256_Align2RegClassID:
2527   case AMDGPU::SReg_256_XNULLRegClassID:
2528     return 256;
2529   case AMDGPU::SGPR_288RegClassID:
2530   case AMDGPU::SReg_288RegClassID:
2531   case AMDGPU::VReg_288RegClassID:
2532   case AMDGPU::AReg_288RegClassID:
2533   case AMDGPU::VReg_288_Align2RegClassID:
2534   case AMDGPU::AReg_288_Align2RegClassID:
2535   case AMDGPU::AV_288RegClassID:
2536   case AMDGPU::AV_288_Align2RegClassID:
2537     return 288;
2538   case AMDGPU::SGPR_320RegClassID:
2539   case AMDGPU::SReg_320RegClassID:
2540   case AMDGPU::VReg_320RegClassID:
2541   case AMDGPU::AReg_320RegClassID:
2542   case AMDGPU::VReg_320_Align2RegClassID:
2543   case AMDGPU::AReg_320_Align2RegClassID:
2544   case AMDGPU::AV_320RegClassID:
2545   case AMDGPU::AV_320_Align2RegClassID:
2546     return 320;
2547   case AMDGPU::SGPR_352RegClassID:
2548   case AMDGPU::SReg_352RegClassID:
2549   case AMDGPU::VReg_352RegClassID:
2550   case AMDGPU::AReg_352RegClassID:
2551   case AMDGPU::VReg_352_Align2RegClassID:
2552   case AMDGPU::AReg_352_Align2RegClassID:
2553   case AMDGPU::AV_352RegClassID:
2554   case AMDGPU::AV_352_Align2RegClassID:
2555     return 352;
2556   case AMDGPU::SGPR_384RegClassID:
2557   case AMDGPU::SReg_384RegClassID:
2558   case AMDGPU::VReg_384RegClassID:
2559   case AMDGPU::AReg_384RegClassID:
2560   case AMDGPU::VReg_384_Align2RegClassID:
2561   case AMDGPU::AReg_384_Align2RegClassID:
2562   case AMDGPU::AV_384RegClassID:
2563   case AMDGPU::AV_384_Align2RegClassID:
2564     return 384;
2565   case AMDGPU::SGPR_512RegClassID:
2566   case AMDGPU::SReg_512RegClassID:
2567   case AMDGPU::VReg_512RegClassID:
2568   case AMDGPU::AReg_512RegClassID:
2569   case AMDGPU::VReg_512_Align2RegClassID:
2570   case AMDGPU::AReg_512_Align2RegClassID:
2571   case AMDGPU::AV_512RegClassID:
2572   case AMDGPU::AV_512_Align2RegClassID:
2573     return 512;
2574   case AMDGPU::SGPR_1024RegClassID:
2575   case AMDGPU::SReg_1024RegClassID:
2576   case AMDGPU::VReg_1024RegClassID:
2577   case AMDGPU::AReg_1024RegClassID:
2578   case AMDGPU::VReg_1024_Align2RegClassID:
2579   case AMDGPU::AReg_1024_Align2RegClassID:
2580   case AMDGPU::AV_1024RegClassID:
2581   case AMDGPU::AV_1024_Align2RegClassID:
2582     return 1024;
2583   default:
2584     llvm_unreachable("Unexpected register class");
2585   }
2586 }
2587 
2588 unsigned getRegBitWidth(const MCRegisterClass &RC) {
2589   return getRegBitWidth(RC.getID());
2590 }
2591 
2592 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2593                            unsigned OpNo) {
2594   assert(OpNo < Desc.NumOperands);
2595   unsigned RCID = Desc.operands()[OpNo].RegClass;
2596   return getRegBitWidth(RCID) / 8;
2597 }
2598 
2599 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2600   if (isInlinableIntLiteral(Literal))
2601     return true;
2602 
2603   uint64_t Val = static_cast<uint64_t>(Literal);
2604   return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2605          (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2606          (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2607          (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2608          (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2609          (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2610          (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2611          (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2612          (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2613          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2614 }
2615 
2616 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2617   if (isInlinableIntLiteral(Literal))
2618     return true;
2619 
2620   // The actual type of the operand does not seem to matter as long
2621   // as the bits match one of the inline immediate values.  For example:
2622   //
2623   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2624   // so it is a legal inline immediate.
2625   //
2626   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2627   // floating-point, so it is a legal inline immediate.
2628 
2629   uint32_t Val = static_cast<uint32_t>(Literal);
2630   return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2631          (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2632          (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2633          (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2634          (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2635          (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2636          (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2637          (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2638          (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2639          (Val == 0x3e22f983 && HasInv2Pi);
2640 }
2641 
2642 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2643   if (!HasInv2Pi)
2644     return false;
2645   if (isInlinableIntLiteral(Literal))
2646     return true;
2647   uint16_t Val = static_cast<uint16_t>(Literal);
2648   return Val == 0x3F00 || // 0.5
2649          Val == 0xBF00 || // -0.5
2650          Val == 0x3F80 || // 1.0
2651          Val == 0xBF80 || // -1.0
2652          Val == 0x4000 || // 2.0
2653          Val == 0xC000 || // -2.0
2654          Val == 0x4080 || // 4.0
2655          Val == 0xC080 || // -4.0
2656          Val == 0x3E22;   // 1.0 / (2.0 * pi)
2657 }
2658 
2659 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2660   return isInlinableLiteral32(Literal, HasInv2Pi);
2661 }
2662 
2663 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2664   if (!HasInv2Pi)
2665     return false;
2666   if (isInlinableIntLiteral(Literal))
2667     return true;
2668   uint16_t Val = static_cast<uint16_t>(Literal);
2669   return Val == 0x3C00 || // 1.0
2670          Val == 0xBC00 || // -1.0
2671          Val == 0x3800 || // 0.5
2672          Val == 0xB800 || // -0.5
2673          Val == 0x4000 || // 2.0
2674          Val == 0xC000 || // -2.0
2675          Val == 0x4400 || // 4.0
2676          Val == 0xC400 || // -4.0
2677          Val == 0x3118;   // 1/2pi
2678 }
2679 
2680 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2681   // Unfortunately, the Instruction Set Architecture Reference Guide is
2682   // misleading about how the inline operands work for (packed) 16-bit
2683   // instructions. In a nutshell, the actual HW behavior is:
2684   //
2685   //  - integer encodings (-16 .. 64) are always produced as sign-extended
2686   //    32-bit values
2687   //  - float encodings are produced as:
2688   //    - for F16 instructions: corresponding half-precision float values in
2689   //      the LSBs, 0 in the MSBs
2690   //    - for UI16 instructions: corresponding single-precision float value
2691   int32_t Signed = static_cast<int32_t>(Literal);
2692   if (Signed >= 0 && Signed <= 64)
2693     return 128 + Signed;
2694 
2695   if (Signed >= -16 && Signed <= -1)
2696     return 192 + std::abs(Signed);
2697 
2698   if (IsFloat) {
2699     // clang-format off
2700     switch (Literal) {
2701     case 0x3800: return 240; // 0.5
2702     case 0xB800: return 241; // -0.5
2703     case 0x3C00: return 242; // 1.0
2704     case 0xBC00: return 243; // -1.0
2705     case 0x4000: return 244; // 2.0
2706     case 0xC000: return 245; // -2.0
2707     case 0x4400: return 246; // 4.0
2708     case 0xC400: return 247; // -4.0
2709     case 0x3118: return 248; // 1.0 / (2.0 * pi)
2710     default: break;
2711     }
2712     // clang-format on
2713   } else {
2714     // clang-format off
2715     switch (Literal) {
2716     case 0x3F000000: return 240; // 0.5
2717     case 0xBF000000: return 241; // -0.5
2718     case 0x3F800000: return 242; // 1.0
2719     case 0xBF800000: return 243; // -1.0
2720     case 0x40000000: return 244; // 2.0
2721     case 0xC0000000: return 245; // -2.0
2722     case 0x40800000: return 246; // 4.0
2723     case 0xC0800000: return 247; // -4.0
2724     case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2725     default: break;
2726     }
2727     // clang-format on
2728   }
2729 
2730   return {};
2731 }
2732 
2733 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2734 // or nullopt.
2735 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2736   return getInlineEncodingV216(false, Literal);
2737 }
2738 
2739 // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2740 // or nullopt.
2741 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2742   int32_t Signed = static_cast<int32_t>(Literal);
2743   if (Signed >= 0 && Signed <= 64)
2744     return 128 + Signed;
2745 
2746   if (Signed >= -16 && Signed <= -1)
2747     return 192 + std::abs(Signed);
2748 
2749   // clang-format off
2750   switch (Literal) {
2751   case 0x3F00: return 240; // 0.5
2752   case 0xBF00: return 241; // -0.5
2753   case 0x3F80: return 242; // 1.0
2754   case 0xBF80: return 243; // -1.0
2755   case 0x4000: return 244; // 2.0
2756   case 0xC000: return 245; // -2.0
2757   case 0x4080: return 246; // 4.0
2758   case 0xC080: return 247; // -4.0
2759   case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2760   default: break;
2761   }
2762   // clang-format on
2763 
2764   return std::nullopt;
2765 }
2766 
2767 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2768 // or nullopt.
2769 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2770   return getInlineEncodingV216(true, Literal);
2771 }
2772 
2773 // Whether the given literal can be inlined for a V_PK_* instruction.
2774 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
2775   switch (OpType) {
2776   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2777   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2778   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2779     return getInlineEncodingV216(false, Literal).has_value();
2780   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2781   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2782   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2783     return getInlineEncodingV216(true, Literal).has_value();
2784   case AMDGPU::OPERAND_REG_IMM_V2BF16:
2785   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2786   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2787     return isInlinableLiteralV2BF16(Literal);
2788   default:
2789     llvm_unreachable("bad packed operand type");
2790   }
2791 }
2792 
2793 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2794 bool isInlinableLiteralV2I16(uint32_t Literal) {
2795   return getInlineEncodingV2I16(Literal).has_value();
2796 }
2797 
2798 // Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2799 bool isInlinableLiteralV2BF16(uint32_t Literal) {
2800   return getInlineEncodingV2BF16(Literal).has_value();
2801 }
2802 
2803 // Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2804 bool isInlinableLiteralV2F16(uint32_t Literal) {
2805   return getInlineEncodingV2F16(Literal).has_value();
2806 }
2807 
2808 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2809   if (IsFP64)
2810     return !(Val & 0xffffffffu);
2811 
2812   return isUInt<32>(Val) || isInt<32>(Val);
2813 }
2814 
2815 bool isArgPassedInSGPR(const Argument *A) {
2816   const Function *F = A->getParent();
2817 
2818   // Arguments to compute shaders are never a source of divergence.
2819   CallingConv::ID CC = F->getCallingConv();
2820   switch (CC) {
2821   case CallingConv::AMDGPU_KERNEL:
2822   case CallingConv::SPIR_KERNEL:
2823     return true;
2824   case CallingConv::AMDGPU_VS:
2825   case CallingConv::AMDGPU_LS:
2826   case CallingConv::AMDGPU_HS:
2827   case CallingConv::AMDGPU_ES:
2828   case CallingConv::AMDGPU_GS:
2829   case CallingConv::AMDGPU_PS:
2830   case CallingConv::AMDGPU_CS:
2831   case CallingConv::AMDGPU_Gfx:
2832   case CallingConv::AMDGPU_CS_Chain:
2833   case CallingConv::AMDGPU_CS_ChainPreserve:
2834     // For non-compute shaders, SGPR inputs are marked with either inreg or
2835     // byval. Everything else is in VGPRs.
2836     return A->hasAttribute(Attribute::InReg) ||
2837            A->hasAttribute(Attribute::ByVal);
2838   default:
2839     // TODO: treat i1 as divergent?
2840     return A->hasAttribute(Attribute::InReg);
2841   }
2842 }
2843 
2844 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2845   // Arguments to compute shaders are never a source of divergence.
2846   CallingConv::ID CC = CB->getCallingConv();
2847   switch (CC) {
2848   case CallingConv::AMDGPU_KERNEL:
2849   case CallingConv::SPIR_KERNEL:
2850     return true;
2851   case CallingConv::AMDGPU_VS:
2852   case CallingConv::AMDGPU_LS:
2853   case CallingConv::AMDGPU_HS:
2854   case CallingConv::AMDGPU_ES:
2855   case CallingConv::AMDGPU_GS:
2856   case CallingConv::AMDGPU_PS:
2857   case CallingConv::AMDGPU_CS:
2858   case CallingConv::AMDGPU_Gfx:
2859   case CallingConv::AMDGPU_CS_Chain:
2860   case CallingConv::AMDGPU_CS_ChainPreserve:
2861     // For non-compute shaders, SGPR inputs are marked with either inreg or
2862     // byval. Everything else is in VGPRs.
2863     return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2864            CB->paramHasAttr(ArgNo, Attribute::ByVal);
2865   default:
2866     return CB->paramHasAttr(ArgNo, Attribute::InReg);
2867   }
2868 }
2869 
2870 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2871   return isGCN3Encoding(ST) || isGFX10Plus(ST);
2872 }
2873 
2874 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
2875                                       int64_t EncodedOffset) {
2876   if (isGFX12Plus(ST))
2877     return isUInt<23>(EncodedOffset);
2878 
2879   return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2880                                : isUInt<8>(EncodedOffset);
2881 }
2882 
2883 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
2884                                     int64_t EncodedOffset,
2885                                     bool IsBuffer) {
2886   if (isGFX12Plus(ST))
2887     return isInt<24>(EncodedOffset);
2888 
2889   return !IsBuffer &&
2890          hasSMRDSignedImmOffset(ST) &&
2891          isInt<21>(EncodedOffset);
2892 }
2893 
2894 static bool isDwordAligned(uint64_t ByteOffset) {
2895   return (ByteOffset & 3) == 0;
2896 }
2897 
2898 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
2899                                 uint64_t ByteOffset) {
2900   if (hasSMEMByteOffset(ST))
2901     return ByteOffset;
2902 
2903   assert(isDwordAligned(ByteOffset));
2904   return ByteOffset >> 2;
2905 }
2906 
2907 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2908                                             int64_t ByteOffset, bool IsBuffer,
2909                                             bool HasSOffset) {
2910   // For unbuffered smem loads, it is illegal for the Immediate Offset to be
2911   // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
2912   // Handle case where SOffset is not present.
2913   if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
2914     return std::nullopt;
2915 
2916   if (isGFX12Plus(ST)) // 24 bit signed offsets
2917     return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2918                                  : std::nullopt;
2919 
2920   // The signed version is always a byte offset.
2921   if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2922     assert(hasSMEMByteOffset(ST));
2923     return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2924                                  : std::nullopt;
2925   }
2926 
2927   if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2928     return std::nullopt;
2929 
2930   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2931   return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2932              ? std::optional<int64_t>(EncodedOffset)
2933              : std::nullopt;
2934 }
2935 
2936 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2937                                                      int64_t ByteOffset) {
2938   if (!isCI(ST) || !isDwordAligned(ByteOffset))
2939     return std::nullopt;
2940 
2941   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2942   return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2943                                    : std::nullopt;
2944 }
2945 
2946 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
2947   if (AMDGPU::isGFX10(ST))
2948     return 12;
2949 
2950   if (AMDGPU::isGFX12(ST))
2951     return 24;
2952   return 13;
2953 }
2954 
2955 namespace {
2956 
2957 struct SourceOfDivergence {
2958   unsigned Intr;
2959 };
2960 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2961 
2962 struct AlwaysUniform {
2963   unsigned Intr;
2964 };
2965 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2966 
2967 #define GET_SourcesOfDivergence_IMPL
2968 #define GET_UniformIntrinsics_IMPL
2969 #define GET_Gfx9BufferFormat_IMPL
2970 #define GET_Gfx10BufferFormat_IMPL
2971 #define GET_Gfx11PlusBufferFormat_IMPL
2972 
2973 #include "AMDGPUGenSearchableTables.inc"
2974 
2975 } // end anonymous namespace
2976 
2977 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2978   return lookupSourceOfDivergence(IntrID);
2979 }
2980 
2981 bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2982   return lookupAlwaysUniform(IntrID);
2983 }
2984 
2985 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
2986                                                   uint8_t NumComponents,
2987                                                   uint8_t NumFormat,
2988                                                   const MCSubtargetInfo &STI) {
2989   return isGFX11Plus(STI)
2990              ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2991                                             NumFormat)
2992              : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2993                                                        NumComponents, NumFormat)
2994                             : getGfx9BufferFormatInfo(BitsPerComp,
2995                                                       NumComponents, NumFormat);
2996 }
2997 
2998 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
2999                                                   const MCSubtargetInfo &STI) {
3000   return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3001                           : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3002                                          : getGfx9BufferFormatInfo(Format);
3003 }
3004 
3005 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
3006   for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
3007                        OpName::src2 }) {
3008     int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3009     if (Idx == -1)
3010       continue;
3011 
3012     if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3013         OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3014       return true;
3015   }
3016 
3017   return false;
3018 }
3019 
3020 bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3021   return hasAny64BitVGPROperands(OpDesc);
3022 }
3023 
3024 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3025   // Currently this is 128 for all subtargets
3026   return 128;
3027 }
3028 
3029 } // namespace AMDGPU
3030 
3031 raw_ostream &operator<<(raw_ostream &OS,
3032                         const AMDGPU::IsaInfo::TargetIDSetting S) {
3033   switch (S) {
3034   case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3035     OS << "Unsupported";
3036     break;
3037   case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3038     OS << "Any";
3039     break;
3040   case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3041     OS << "Off";
3042     break;
3043   case (AMDGPU::IsaInfo::TargetIDSetting::On):
3044     OS << "On";
3045     break;
3046   }
3047   return OS;
3048 }
3049 
3050 } // namespace llvm
3051