Lines Matching +full:node +full:- +full:version

1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
36 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
45 return ((1 << Width) - 1) << Shift;
175 return (unsigned)Ver->getZExtValue() / 100;
210 report_fatal_error("Unsupported AMDHSA Code Object Version " +
277 return Info ? Info->Opcode : -1;
282 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
288 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
289 NewChannels, OrigInfo->VAddrDwords);
290 return NewInfo ? NewInfo->Opcode : -1;
296 unsigned AddrWords = BaseOpcode->NumExtraArgs;
297 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
298 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
309 if (BaseOpcode->Gradients) {
310 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
313 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
314 AddrWords += alignTo<2>(Dim->NumGradients / 2);
316 AddrWords += Dim->NumGradients;
434 return Info ? Info->BaseOpcode : -1;
439 return Info ? Info->Opcode : -1;
444 return Info ? Info->elements : 0;
449 return Info ? Info->has_vaddr : false;
454 return Info ? Info->has_srsrc : false;
459 return Info ? Info->has_soffset : false;
464 return Info ? Info->BaseOpcode : -1;
469 return Info ? Info->Opcode : -1;
474 return Info ? Info->elements : 0;
479 return Info ? Info->has_vaddr : false;
484 return Info ? Info->has_srsrc : false;
489 return Info ? Info->has_soffset : false;
494 return Info ? Info->IsBufferInv : false;
499 return Info ? Info->tfe : false;
504 return Info ? Info->IsBuffer : false;
509 return Info ? Info->IsSingle : true;
514 return Info ? Info->IsSingle : true;
519 return Info ? Info->IsSingle : true;
530 return Info ? Info->is_dgemm : false;
535 return Info ? Info->is_gfx940_xdl : false;
573 return {Info->CanBeVOPDX, true};
579 return Info ? Info->VOPDOp : ~0u;
656 return Info ? Info->IsTrue16 : false;
663 if (Info->HasFP8DstByteSel)
665 if (Info->HasFP4DstByteSel)
673 return Info ? Info->Opcode3Addr : ~0u;
678 return Info ? Info->Opcode2Addr : ~0u;
691 return Info ? Info->Opcode : -1;
697 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
698 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
700 return {OpX->BaseVOP, OpY->BaseVOP};
708 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
709 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
711 assert(TiedIdx == -1 || TiedIdx == Component::DST);
712 HasSrc2Acc = TiedIdx != -1;
714 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
733 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
779 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
797 const auto &OpXDesc = InstrInfo->get(OpX);
798 const auto &OpYDesc = InstrInfo->get(OpY);
826 else if (Feature == "-xnack")
830 else if (Feature == "-sramecc")
875 if (FeatureString.ends_with("-"))
900 auto Version = getIsaVersion(STI.getCPU());
902 StreamRep << TargetTriple.getArchName() << '-'
903 << TargetTriple.getVendorName() << '-'
904 << TargetTriple.getOSName() << '-'
905 << TargetTriple.getEnvironmentName() << '-';
911 if (Version.Major >= 9)
914 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
915 Twine(Version.Stepping))
922 Features += ":sramecc-";
927 Features += ":xnack-";
938 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
940 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
952 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
959 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
961 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
963 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
972 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
974 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
982 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
987 // Single-wave workgroups don't consume barrier resources.
992 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1032 IsaVersion Version = getIsaVersion(STI->getCPU());
1033 if (Version.Major >= 10)
1035 if (Version.Major >= 8)
1045 IsaVersion Version = getIsaVersion(STI->getCPU());
1046 if (Version.Major >= 8)
1052 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1055 IsaVersion Version = getIsaVersion(STI->getCPU());
1056 if (Version.Major >= 10)
1058 if (Version.Major >= 8)
1066 IsaVersion Version = getIsaVersion(STI->getCPU());
1067 if (Version.Major >= 10)
1074 if (STI->getFeatureBits().test(FeatureTrapHandler))
1075 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1085 IsaVersion Version = getIsaVersion(STI->getCPU());
1086 if (Version.Major >= 10)
1088 if (Version.Major >= 8 && !Addressable)
1091 if (STI->getFeatureBits().test(FeatureTrapHandler))
1092 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1103 IsaVersion Version = getIsaVersion(STI->getCPU());
1104 if (Version.Major >= 10)
1107 if (Version.Major < 8) {
1115 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1125 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1135 return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) -
1141 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1146 STI->getFeatureBits().test(FeatureWavefrontSize32);
1148 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1159 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1164 STI->getFeatureBits().test(FeatureWavefrontSize32);
1170 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1174 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1175 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1183 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1251 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1267 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1281 IsaVersion Version = getIsaVersion(STI->getCPU());
1285 KernelCode.amd_machine_version_major = Version.Major;
1286 KernelCode.amd_machine_version_minor = Version.Minor;
1287 KernelCode.amd_machine_version_stepping = Version.Stepping;
1289 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1298 KernelCode.call_convention = -1;
1306 if (Version.Major >= 10) {
1308 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1314 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1318 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1322 unsigned AS = GV->getAddressSpace();
1336 return {Attr->first, Attr->second ? *(Attr->second) : Default.second};
1404 unsigned getVmcntBitMask(const IsaVersion &Version) {
1405 return (1 << (getVmcntBitWidthLo(Version.Major) +
1406 getVmcntBitWidthHi(Version.Major))) -
1410 unsigned getLoadcntBitMask(const IsaVersion &Version) {
1411 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1414 unsigned getSamplecntBitMask(const IsaVersion &Version) {
1415 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1418 unsigned getBvhcntBitMask(const IsaVersion &Version) {
1419 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1422 unsigned getExpcntBitMask(const IsaVersion &Version) {
1423 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1426 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1427 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1430 unsigned getDscntBitMask(const IsaVersion &Version) {
1431 return (1 << getDscntBitWidth(Version.Major)) - 1;
1434 unsigned getKmcntBitMask(const IsaVersion &Version) {
1435 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1438 unsigned getStorecntBitMask(const IsaVersion &Version) {
1439 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1442 unsigned getWaitcntBitMask(const IsaVersion &Version) {
1443 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1444 getVmcntBitWidthLo(Version.Major));
1445 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1446 getExpcntBitWidth(Version.Major));
1447 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1448 getLgkmcntBitWidth(Version.Major));
1449 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1450 getVmcntBitWidthHi(Version.Major));
1454 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1455 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1456 getVmcntBitWidthLo(Version.Major));
1457 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1458 getVmcntBitWidthHi(Version.Major));
1459 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1462 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1463 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1464 getExpcntBitWidth(Version.Major));
1467 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1468 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1469 getLgkmcntBitWidth(Version.Major));
1472 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1474 Vmcnt = decodeVmcnt(Version, Waitcnt);
1475 Expcnt = decodeExpcnt(Version, Waitcnt);
1476 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1479 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1481 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1482 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1483 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1487 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1489 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1490 getVmcntBitWidthLo(Version.Major));
1491 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1492 getVmcntBitShiftHi(Version.Major),
1493 getVmcntBitWidthHi(Version.Major));
1496 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1498 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1499 getExpcntBitWidth(Version.Major));
1502 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1504 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1505 getLgkmcntBitWidth(Version.Major));
1508 unsigned encodeWaitcnt(const IsaVersion &Version,
1510 unsigned Waitcnt = getWaitcntBitMask(Version);
1511 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1512 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1513 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1517 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1518 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1521 static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1523 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1524 getDscntBitWidth(Version.Major));
1526 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1527 getStorecntBitWidth(Version.Major));
1530 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1531 getLoadcntBitWidth(Version.Major));
1535 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1538 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1539 getLoadcntBitWidth(Version.Major));
1540 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1541 getDscntBitWidth(Version.Major));
1545 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1548 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1549 getStorecntBitWidth(Version.Major));
1550 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1551 getDscntBitWidth(Version.Major));
1555 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1557 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1558 getLoadcntBitWidth(Version.Major));
1561 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1563 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1564 getStorecntBitWidth(Version.Major));
1567 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1569 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1570 getDscntBitWidth(Version.Major));
1573 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1575 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1576 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1577 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1581 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1582 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1585 static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1587 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1588 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1589 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1593 unsigned encodeStorecntDscnt(const IsaVersion &Version,
1595 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1598 //===----------------------------------------------------------------------===//
1600 //===----------------------------------------------------------------------===//
1679 //===----------------------------------------------------------------------===//
1681 //===----------------------------------------------------------------------===//
1686 static int Default = -1;
1687 if (Default == -1)
1748 //===----------------------------------------------------------------------===//
1750 //===----------------------------------------------------------------------===//
1773 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1823 //===----------------------------------------------------------------------===//
1825 //===----------------------------------------------------------------------===//
1938 //===----------------------------------------------------------------------===//
1940 //===----------------------------------------------------------------------===//
2022 //===----------------------------------------------------------------------===//
2024 //===----------------------------------------------------------------------===//
2033 "amdgpu-color-export",
2038 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2103 return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
2136 auto Version = getIsaVersion(STI.getCPU());
2137 if (Version.Major == 10)
2138 return Version.Minor >= 3 ? 13 : 5;
2139 if (Version.Major == 11)
2141 if (Version.Major >= 12)
2282 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2283 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2336 #define CASE_CI_VI(node) \
2338 case node: return isCI(STI) ? node##_ci : node##_vi;
2340 #define CASE_VI_GFX9PLUS(node) \
2341 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2343 #define CASE_GFXPRE11_GFX11PLUS(node) \
2344 case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2346 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2347 case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2360 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
2361 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2362 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2363 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2606 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2608 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2610 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2612 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2623 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2627 // floating-point, so it is a legal inline immediate.
2632 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2634 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2636 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2638 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2649 Val == 0xBF00 || // -0.5
2651 Val == 0xBF80 || // -1.0
2653 Val == 0xC000 || // -2.0
2655 Val == 0xC080 || // -4.0
2670 Val == 0xBC00 || // -1.0
2672 Val == 0xB800 || // -0.5
2674 Val == 0xC000 || // -2.0
2676 Val == 0xC400 || // -4.0
2682 // misleading about how the inline operands work for (packed) 16-bit
2685 // - integer encodings (-16 .. 64) are always produced as sign-extended
2686 // 32-bit values
2687 // - float encodings are produced as:
2688 // - for F16 instructions: corresponding half-precision float values in
2690 // - for UI16 instructions: corresponding single-precision float value
2695 if (Signed >= -16 && Signed <= -1)
2699 // clang-format off
2702 case 0xB800: return 241; // -0.5
2704 case 0xBC00: return 243; // -1.0
2706 case 0xC000: return 245; // -2.0
2708 case 0xC400: return 247; // -4.0
2712 // clang-format on
2714 // clang-format off
2717 case 0xBF000000: return 241; // -0.5
2719 case 0xBF800000: return 243; // -1.0
2721 case 0xC0000000: return 245; // -2.0
2723 case 0xC0800000: return 247; // -4.0
2727 // clang-format on
2746 if (Signed >= -16 && Signed <= -1)
2749 // clang-format off
2752 case 0xBF00: return 241; // -0.5
2754 case 0xBF80: return 243; // -1.0
2756 case 0xC000: return 245; // -2.0
2758 case 0xC080: return 247; // -4.0
2762 // clang-format on
2816 const Function *F = A->getParent();
2819 CallingConv::ID CC = F->getCallingConv();
2834 // For non-compute shaders, SGPR inputs are marked with either inreg or
2836 return A->hasAttribute(Attribute::InReg) ||
2837 A->hasAttribute(Attribute::ByVal);
2840 return A->hasAttribute(Attribute::InReg);
2846 CallingConv::ID CC = CB->getCallingConv();
2861 // For non-compute shaders, SGPR inputs are marked with either inreg or
2863 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2864 CB->paramHasAttr(ArgNo, Attribute::ByVal);
2866 return CB->paramHasAttr(ArgNo, Attribute::InReg);
2920 // The signed version is always a byte offset.
3009 if (Idx == -1)