109467b48Spatrick //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file provides AMDGPU specific target streamer methods.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick
1309467b48Spatrick #include "AMDGPUTargetStreamer.h"
1473471bf0Spatrick #include "AMDGPUPTNote.h"
1573471bf0Spatrick #include "AMDKernelCodeT.h"
1609467b48Spatrick #include "Utils/AMDGPUBaseInfo.h"
1709467b48Spatrick #include "Utils/AMDKernelCodeTUtils.h"
1809467b48Spatrick #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
1909467b48Spatrick #include "llvm/BinaryFormat/ELF.h"
20*d415bd75Srobert #include "llvm/MC/MCAssembler.h"
2109467b48Spatrick #include "llvm/MC/MCContext.h"
2209467b48Spatrick #include "llvm/MC/MCELFStreamer.h"
2309467b48Spatrick #include "llvm/MC/MCSectionELF.h"
24*d415bd75Srobert #include "llvm/MC/MCSubtargetInfo.h"
2573471bf0Spatrick #include "llvm/Support/AMDGPUMetadata.h"
2673471bf0Spatrick #include "llvm/Support/AMDHSAKernelDescriptor.h"
27*d415bd75Srobert #include "llvm/Support/Casting.h"
2809467b48Spatrick #include "llvm/Support/FormattedStream.h"
29*d415bd75Srobert #include "llvm/Support/TargetParser.h"
3009467b48Spatrick
3109467b48Spatrick using namespace llvm;
3209467b48Spatrick using namespace llvm::AMDGPU;
3309467b48Spatrick
3409467b48Spatrick //===----------------------------------------------------------------------===//
3509467b48Spatrick // AMDGPUTargetStreamer
3609467b48Spatrick //===----------------------------------------------------------------------===//
3709467b48Spatrick
convertIsaVersionV2(uint32_t & Major,uint32_t & Minor,uint32_t & Stepping,bool Sramecc,bool Xnack)3873471bf0Spatrick static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
3973471bf0Spatrick uint32_t &Stepping, bool Sramecc, bool Xnack) {
4073471bf0Spatrick if (Major == 9 && Minor == 0) {
4173471bf0Spatrick switch (Stepping) {
4273471bf0Spatrick case 0:
4373471bf0Spatrick case 2:
4473471bf0Spatrick case 4:
4573471bf0Spatrick case 6:
4673471bf0Spatrick if (Xnack)
4773471bf0Spatrick Stepping++;
4873471bf0Spatrick }
4973471bf0Spatrick }
5073471bf0Spatrick }
5173471bf0Spatrick
EmitHSAMetadataV2(StringRef HSAMetadataString)5209467b48Spatrick bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
5309467b48Spatrick HSAMD::Metadata HSAMetadata;
5473471bf0Spatrick if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
5509467b48Spatrick return false;
5609467b48Spatrick return EmitHSAMetadata(HSAMetadata);
5709467b48Spatrick }
5809467b48Spatrick
EmitHSAMetadataV3(StringRef HSAMetadataString)5909467b48Spatrick bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
6009467b48Spatrick msgpack::Document HSAMetadataDoc;
6109467b48Spatrick if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
6209467b48Spatrick return false;
6309467b48Spatrick return EmitHSAMetadata(HSAMetadataDoc, false);
6409467b48Spatrick }
6509467b48Spatrick
getArchNameFromElfMach(unsigned ElfMach)6609467b48Spatrick StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
6709467b48Spatrick AMDGPU::GPUKind AK;
6809467b48Spatrick
6909467b48Spatrick switch (ElfMach) {
7009467b48Spatrick default: llvm_unreachable("Unhandled ELF::EF_AMDGPU type");
7109467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
7209467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
7309467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
7409467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
7509467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
7609467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
7709467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
7809467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
7909467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
8009467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
8109467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
8209467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
8309467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
8409467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
8509467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
8609467b48Spatrick case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
8709467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
8809467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
8973471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
9009467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
9109467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
9209467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
9309467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
9409467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
9573471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
9609467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
9709467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
9809467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
9973471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
10009467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
10109467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
10209467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
10309467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
10409467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
10509467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
10609467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
10773471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
10873471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
109*d415bd75Srobert case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
11009467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
11109467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
11209467b48Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
11373471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
114097a140dSpatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
11573471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
11673471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
11773471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
11873471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
11973471bf0Spatrick case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
120*d415bd75Srobert case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
121*d415bd75Srobert case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
122*d415bd75Srobert case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
123*d415bd75Srobert case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
124*d415bd75Srobert case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
12509467b48Spatrick case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
12609467b48Spatrick }
12709467b48Spatrick
12809467b48Spatrick StringRef GPUName = getArchNameAMDGCN(AK);
12909467b48Spatrick if (GPUName != "")
13009467b48Spatrick return GPUName;
13109467b48Spatrick return getArchNameR600(AK);
13209467b48Spatrick }
13309467b48Spatrick
getElfMach(StringRef GPU)13409467b48Spatrick unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
13509467b48Spatrick AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
13609467b48Spatrick if (AK == AMDGPU::GPUKind::GK_NONE)
13709467b48Spatrick AK = parseArchR600(GPU);
13809467b48Spatrick
13909467b48Spatrick switch (AK) {
14009467b48Spatrick case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
14109467b48Spatrick case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
14209467b48Spatrick case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
14309467b48Spatrick case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
14409467b48Spatrick case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
14509467b48Spatrick case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
14609467b48Spatrick case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
14709467b48Spatrick case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
14809467b48Spatrick case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
14909467b48Spatrick case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
15009467b48Spatrick case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
15109467b48Spatrick case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
15209467b48Spatrick case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
15309467b48Spatrick case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
15409467b48Spatrick case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
15509467b48Spatrick case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
15609467b48Spatrick case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
15709467b48Spatrick case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
15873471bf0Spatrick case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
15909467b48Spatrick case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
16009467b48Spatrick case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
16109467b48Spatrick case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
16209467b48Spatrick case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
16309467b48Spatrick case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
16473471bf0Spatrick case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
16509467b48Spatrick case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
16609467b48Spatrick case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
16709467b48Spatrick case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
16873471bf0Spatrick case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
16909467b48Spatrick case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
17009467b48Spatrick case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
17109467b48Spatrick case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
17209467b48Spatrick case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
17309467b48Spatrick case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
17409467b48Spatrick case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
17509467b48Spatrick case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
17673471bf0Spatrick case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
17773471bf0Spatrick case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
178*d415bd75Srobert case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
17909467b48Spatrick case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
18009467b48Spatrick case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
18109467b48Spatrick case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
18273471bf0Spatrick case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
183097a140dSpatrick case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
18473471bf0Spatrick case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
18573471bf0Spatrick case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
18673471bf0Spatrick case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
18773471bf0Spatrick case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
18873471bf0Spatrick case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
189*d415bd75Srobert case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
190*d415bd75Srobert case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
191*d415bd75Srobert case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
192*d415bd75Srobert case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
193*d415bd75Srobert case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
19409467b48Spatrick case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
19509467b48Spatrick }
19609467b48Spatrick
19709467b48Spatrick llvm_unreachable("unknown GPU");
19809467b48Spatrick }
19909467b48Spatrick
20009467b48Spatrick //===----------------------------------------------------------------------===//
20109467b48Spatrick // AMDGPUTargetAsmStreamer
20209467b48Spatrick //===----------------------------------------------------------------------===//
20309467b48Spatrick
AMDGPUTargetAsmStreamer(MCStreamer & S,formatted_raw_ostream & OS)20409467b48Spatrick AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
20509467b48Spatrick formatted_raw_ostream &OS)
20609467b48Spatrick : AMDGPUTargetStreamer(S), OS(OS) { }
20709467b48Spatrick
20809467b48Spatrick // A hook for emitting stuff at the end.
20909467b48Spatrick // We use it for emitting the accumulated PAL metadata as directives.
21073471bf0Spatrick // The PAL metadata is reset after it is emitted.
finish()21109467b48Spatrick void AMDGPUTargetAsmStreamer::finish() {
21209467b48Spatrick std::string S;
21309467b48Spatrick getPALMetadata()->toString(S);
21409467b48Spatrick OS << S;
21573471bf0Spatrick
21673471bf0Spatrick // Reset the pal metadata so its data will not affect a compilation that
21773471bf0Spatrick // reuses this object.
21873471bf0Spatrick getPALMetadata()->reset();
21909467b48Spatrick }
22009467b48Spatrick
EmitDirectiveAMDGCNTarget()22173471bf0Spatrick void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
22273471bf0Spatrick OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
22309467b48Spatrick }
22409467b48Spatrick
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)22509467b48Spatrick void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
22609467b48Spatrick uint32_t Major, uint32_t Minor) {
22709467b48Spatrick OS << "\t.hsa_code_object_version " <<
22809467b48Spatrick Twine(Major) << "," << Twine(Minor) << '\n';
22909467b48Spatrick }
23009467b48Spatrick
23109467b48Spatrick void
EmitDirectiveHSACodeObjectISAV2(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)23273471bf0Spatrick AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
23309467b48Spatrick uint32_t Minor,
23409467b48Spatrick uint32_t Stepping,
23509467b48Spatrick StringRef VendorName,
23609467b48Spatrick StringRef ArchName) {
23773471bf0Spatrick convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
23873471bf0Spatrick OS << "\t.hsa_code_object_isa " << Twine(Major) << "," << Twine(Minor) << ","
23973471bf0Spatrick << Twine(Stepping) << ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
24009467b48Spatrick }
24109467b48Spatrick
24209467b48Spatrick void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)24309467b48Spatrick AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
24409467b48Spatrick OS << "\t.amd_kernel_code_t\n";
24509467b48Spatrick dumpAmdKernelCode(&Header, OS, "\t\t");
24609467b48Spatrick OS << "\t.end_amd_kernel_code_t\n";
24709467b48Spatrick }
24809467b48Spatrick
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)24909467b48Spatrick void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
25009467b48Spatrick unsigned Type) {
25109467b48Spatrick switch (Type) {
25209467b48Spatrick default: llvm_unreachable("Invalid AMDGPU symbol type");
25309467b48Spatrick case ELF::STT_AMDGPU_HSA_KERNEL:
25409467b48Spatrick OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
25509467b48Spatrick break;
25609467b48Spatrick }
25709467b48Spatrick }
25809467b48Spatrick
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)25909467b48Spatrick void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
260097a140dSpatrick Align Alignment) {
261097a140dSpatrick OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
262097a140dSpatrick << Alignment.value() << '\n';
26309467b48Spatrick }
26409467b48Spatrick
EmitISAVersion()26573471bf0Spatrick bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
26673471bf0Spatrick OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
26709467b48Spatrick return true;
26809467b48Spatrick }
26909467b48Spatrick
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)27009467b48Spatrick bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
27109467b48Spatrick const AMDGPU::HSAMD::Metadata &HSAMetadata) {
27209467b48Spatrick std::string HSAMetadataString;
27309467b48Spatrick if (HSAMD::toString(HSAMetadata, HSAMetadataString))
27409467b48Spatrick return false;
27509467b48Spatrick
27673471bf0Spatrick OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
27709467b48Spatrick OS << HSAMetadataString << '\n';
27873471bf0Spatrick OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
27909467b48Spatrick return true;
28009467b48Spatrick }
28109467b48Spatrick
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)28209467b48Spatrick bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
28309467b48Spatrick msgpack::Document &HSAMetadataDoc, bool Strict) {
28473471bf0Spatrick HSAMD::V3::MetadataVerifier Verifier(Strict);
28509467b48Spatrick if (!Verifier.verify(HSAMetadataDoc.getRoot()))
28609467b48Spatrick return false;
28709467b48Spatrick
28809467b48Spatrick std::string HSAMetadataString;
28909467b48Spatrick raw_string_ostream StrOS(HSAMetadataString);
29009467b48Spatrick HSAMetadataDoc.toYAML(StrOS);
29109467b48Spatrick
29273471bf0Spatrick OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
29309467b48Spatrick OS << StrOS.str() << '\n';
29473471bf0Spatrick OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
29509467b48Spatrick return true;
29609467b48Spatrick }
29709467b48Spatrick
EmitCodeEnd(const MCSubtargetInfo & STI)29873471bf0Spatrick bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
29909467b48Spatrick const uint32_t Encoded_s_code_end = 0xbf9f0000;
30073471bf0Spatrick const uint32_t Encoded_s_nop = 0xbf800000;
30173471bf0Spatrick uint32_t Encoded_pad = Encoded_s_code_end;
30273471bf0Spatrick
30373471bf0Spatrick // Instruction cache line size in bytes.
304*d415bd75Srobert const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
30573471bf0Spatrick const unsigned CacheLineSize = 1u << Log2CacheLineSize;
30673471bf0Spatrick
30773471bf0Spatrick // Extra padding amount in bytes to support prefetch mode 3.
30873471bf0Spatrick unsigned FillSize = 3 * CacheLineSize;
30973471bf0Spatrick
31073471bf0Spatrick if (AMDGPU::isGFX90A(STI)) {
31173471bf0Spatrick Encoded_pad = Encoded_s_nop;
31273471bf0Spatrick FillSize = 16 * CacheLineSize;
31373471bf0Spatrick }
31473471bf0Spatrick
31573471bf0Spatrick OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
31673471bf0Spatrick OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
31709467b48Spatrick return true;
31809467b48Spatrick }
31909467b48Spatrick
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KD,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr)32009467b48Spatrick void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
32109467b48Spatrick const MCSubtargetInfo &STI, StringRef KernelName,
32209467b48Spatrick const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
32373471bf0Spatrick bool ReserveVCC, bool ReserveFlatScr) {
32409467b48Spatrick IsaVersion IVersion = getIsaVersion(STI.getCPU());
32509467b48Spatrick
32609467b48Spatrick OS << "\t.amdhsa_kernel " << KernelName << '\n';
32709467b48Spatrick
32809467b48Spatrick #define PRINT_FIELD(STREAM, DIRECTIVE, KERNEL_DESC, MEMBER_NAME, FIELD_NAME) \
32909467b48Spatrick STREAM << "\t\t" << DIRECTIVE << " " \
33009467b48Spatrick << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
33109467b48Spatrick
33209467b48Spatrick OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
33309467b48Spatrick << '\n';
33409467b48Spatrick OS << "\t\t.amdhsa_private_segment_fixed_size "
33509467b48Spatrick << KD.private_segment_fixed_size << '\n';
33673471bf0Spatrick OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
33709467b48Spatrick
338*d415bd75Srobert PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
339*d415bd75Srobert compute_pgm_rsrc2,
340*d415bd75Srobert amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
341*d415bd75Srobert
34273471bf0Spatrick if (!hasArchitectedFlatScratch(STI))
34373471bf0Spatrick PRINT_FIELD(
34473471bf0Spatrick OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
34509467b48Spatrick kernel_code_properties,
34609467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
34709467b48Spatrick PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD,
34809467b48Spatrick kernel_code_properties,
34909467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
35009467b48Spatrick PRINT_FIELD(OS, ".amdhsa_user_sgpr_queue_ptr", KD,
35109467b48Spatrick kernel_code_properties,
35209467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
35309467b48Spatrick PRINT_FIELD(OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD,
35409467b48Spatrick kernel_code_properties,
35509467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
35609467b48Spatrick PRINT_FIELD(OS, ".amdhsa_user_sgpr_dispatch_id", KD,
35709467b48Spatrick kernel_code_properties,
35809467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
35973471bf0Spatrick if (!hasArchitectedFlatScratch(STI))
36009467b48Spatrick PRINT_FIELD(OS, ".amdhsa_user_sgpr_flat_scratch_init", KD,
36109467b48Spatrick kernel_code_properties,
36209467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
36309467b48Spatrick PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
36409467b48Spatrick kernel_code_properties,
36509467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
36609467b48Spatrick if (IVersion.Major >= 10)
36709467b48Spatrick PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
36809467b48Spatrick kernel_code_properties,
36909467b48Spatrick amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
370*d415bd75Srobert if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
371*d415bd75Srobert PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
372*d415bd75Srobert amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
37373471bf0Spatrick PRINT_FIELD(OS,
37473471bf0Spatrick (hasArchitectedFlatScratch(STI)
37573471bf0Spatrick ? ".amdhsa_enable_private_segment"
37673471bf0Spatrick : ".amdhsa_system_sgpr_private_segment_wavefront_offset"),
37773471bf0Spatrick KD, compute_pgm_rsrc2,
37873471bf0Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
37909467b48Spatrick PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD,
38009467b48Spatrick compute_pgm_rsrc2,
38109467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
38209467b48Spatrick PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD,
38309467b48Spatrick compute_pgm_rsrc2,
38409467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
38509467b48Spatrick PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD,
38609467b48Spatrick compute_pgm_rsrc2,
38709467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
38809467b48Spatrick PRINT_FIELD(OS, ".amdhsa_system_sgpr_workgroup_info", KD,
38909467b48Spatrick compute_pgm_rsrc2,
39009467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
39109467b48Spatrick PRINT_FIELD(OS, ".amdhsa_system_vgpr_workitem_id", KD,
39209467b48Spatrick compute_pgm_rsrc2,
39309467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
39409467b48Spatrick
39509467b48Spatrick // These directives are required.
39609467b48Spatrick OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
39709467b48Spatrick OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
39809467b48Spatrick
39973471bf0Spatrick if (AMDGPU::isGFX90A(STI))
40073471bf0Spatrick OS << "\t\t.amdhsa_accum_offset " <<
40173471bf0Spatrick (AMDHSA_BITS_GET(KD.compute_pgm_rsrc3,
40273471bf0Spatrick amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
40373471bf0Spatrick << '\n';
40473471bf0Spatrick
40509467b48Spatrick if (!ReserveVCC)
40609467b48Spatrick OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
40773471bf0Spatrick if (IVersion.Major >= 7 && !ReserveFlatScr && !hasArchitectedFlatScratch(STI))
40809467b48Spatrick OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
40973471bf0Spatrick
410*d415bd75Srobert if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
41173471bf0Spatrick switch (*HsaAbiVer) {
41273471bf0Spatrick default:
41373471bf0Spatrick break;
41473471bf0Spatrick case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
41573471bf0Spatrick break;
41673471bf0Spatrick case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
41773471bf0Spatrick case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
418*d415bd75Srobert case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
41973471bf0Spatrick if (getTargetID()->isXnackSupported())
42073471bf0Spatrick OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
42173471bf0Spatrick break;
42273471bf0Spatrick }
42373471bf0Spatrick }
42409467b48Spatrick
42509467b48Spatrick PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
42609467b48Spatrick compute_pgm_rsrc1,
42709467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
42809467b48Spatrick PRINT_FIELD(OS, ".amdhsa_float_round_mode_16_64", KD,
42909467b48Spatrick compute_pgm_rsrc1,
43009467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
43109467b48Spatrick PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_32", KD,
43209467b48Spatrick compute_pgm_rsrc1,
43309467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
43409467b48Spatrick PRINT_FIELD(OS, ".amdhsa_float_denorm_mode_16_64", KD,
43509467b48Spatrick compute_pgm_rsrc1,
43609467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
43709467b48Spatrick PRINT_FIELD(OS, ".amdhsa_dx10_clamp", KD,
43809467b48Spatrick compute_pgm_rsrc1,
43909467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
44009467b48Spatrick PRINT_FIELD(OS, ".amdhsa_ieee_mode", KD,
44109467b48Spatrick compute_pgm_rsrc1,
44209467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
44309467b48Spatrick if (IVersion.Major >= 9)
44409467b48Spatrick PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
44509467b48Spatrick compute_pgm_rsrc1,
44609467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
44773471bf0Spatrick if (AMDGPU::isGFX90A(STI))
44873471bf0Spatrick PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
44973471bf0Spatrick compute_pgm_rsrc3,
45073471bf0Spatrick amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
45109467b48Spatrick if (IVersion.Major >= 10) {
45209467b48Spatrick PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
45309467b48Spatrick compute_pgm_rsrc1,
45409467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
45509467b48Spatrick PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
45609467b48Spatrick compute_pgm_rsrc1,
45709467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
45809467b48Spatrick PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
45909467b48Spatrick compute_pgm_rsrc1,
46009467b48Spatrick amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
461*d415bd75Srobert PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
462*d415bd75Srobert amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
46309467b48Spatrick }
46409467b48Spatrick PRINT_FIELD(
46509467b48Spatrick OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
46609467b48Spatrick compute_pgm_rsrc2,
46709467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
46809467b48Spatrick PRINT_FIELD(OS, ".amdhsa_exception_fp_denorm_src", KD,
46909467b48Spatrick compute_pgm_rsrc2,
47009467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
47109467b48Spatrick PRINT_FIELD(
47209467b48Spatrick OS, ".amdhsa_exception_fp_ieee_div_zero", KD,
47309467b48Spatrick compute_pgm_rsrc2,
47409467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
47509467b48Spatrick PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_overflow", KD,
47609467b48Spatrick compute_pgm_rsrc2,
47709467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
47809467b48Spatrick PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_underflow", KD,
47909467b48Spatrick compute_pgm_rsrc2,
48009467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
48109467b48Spatrick PRINT_FIELD(OS, ".amdhsa_exception_fp_ieee_inexact", KD,
48209467b48Spatrick compute_pgm_rsrc2,
48309467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
48409467b48Spatrick PRINT_FIELD(OS, ".amdhsa_exception_int_div_zero", KD,
48509467b48Spatrick compute_pgm_rsrc2,
48609467b48Spatrick amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
48709467b48Spatrick #undef PRINT_FIELD
48809467b48Spatrick
48909467b48Spatrick OS << "\t.end_amdhsa_kernel\n";
49009467b48Spatrick }
49109467b48Spatrick
49209467b48Spatrick //===----------------------------------------------------------------------===//
49309467b48Spatrick // AMDGPUTargetELFStreamer
49409467b48Spatrick //===----------------------------------------------------------------------===//
49509467b48Spatrick
AMDGPUTargetELFStreamer(MCStreamer & S,const MCSubtargetInfo & STI)496097a140dSpatrick AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
497097a140dSpatrick const MCSubtargetInfo &STI)
49873471bf0Spatrick : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
49909467b48Spatrick
getStreamer()50009467b48Spatrick MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
50109467b48Spatrick return static_cast<MCELFStreamer &>(Streamer);
50209467b48Spatrick }
50309467b48Spatrick
50409467b48Spatrick // A hook for emitting stuff at the end.
50509467b48Spatrick // We use it for emitting the accumulated PAL metadata as a .note record.
50673471bf0Spatrick // The PAL metadata is reset after it is emitted.
finish()50709467b48Spatrick void AMDGPUTargetELFStreamer::finish() {
50873471bf0Spatrick MCAssembler &MCA = getStreamer().getAssembler();
50973471bf0Spatrick MCA.setELFHeaderEFlags(getEFlags());
51073471bf0Spatrick
51109467b48Spatrick std::string Blob;
51209467b48Spatrick const char *Vendor = getPALMetadata()->getVendor();
51309467b48Spatrick unsigned Type = getPALMetadata()->getType();
51409467b48Spatrick getPALMetadata()->toBlob(Type, Blob);
51509467b48Spatrick if (Blob.empty())
51609467b48Spatrick return;
51709467b48Spatrick EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
518097a140dSpatrick [&](MCELFStreamer &OS) { OS.emitBytes(Blob); });
51973471bf0Spatrick
52073471bf0Spatrick // Reset the pal metadata so its data will not affect a compilation that
52173471bf0Spatrick // reuses this object.
52273471bf0Spatrick getPALMetadata()->reset();
52309467b48Spatrick }
52409467b48Spatrick
EmitNote(StringRef Name,const MCExpr * DescSZ,unsigned NoteType,function_ref<void (MCELFStreamer &)> EmitDesc)52509467b48Spatrick void AMDGPUTargetELFStreamer::EmitNote(
52609467b48Spatrick StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
52709467b48Spatrick function_ref<void(MCELFStreamer &)> EmitDesc) {
52809467b48Spatrick auto &S = getStreamer();
52909467b48Spatrick auto &Context = S.getContext();
53009467b48Spatrick
53109467b48Spatrick auto NameSZ = Name.size() + 1;
53209467b48Spatrick
533097a140dSpatrick unsigned NoteFlags = 0;
534097a140dSpatrick // TODO Apparently, this is currently needed for OpenCL as mentioned in
535097a140dSpatrick // https://reviews.llvm.org/D74995
53673471bf0Spatrick if (STI.getTargetTriple().getOS() == Triple::AMDHSA)
537097a140dSpatrick NoteFlags = ELF::SHF_ALLOC;
538097a140dSpatrick
539*d415bd75Srobert S.pushSection();
540*d415bd75Srobert S.switchSection(
541097a140dSpatrick Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags));
542097a140dSpatrick S.emitInt32(NameSZ); // namesz
543097a140dSpatrick S.emitValue(DescSZ, 4); // descz
544097a140dSpatrick S.emitInt32(NoteType); // type
545097a140dSpatrick S.emitBytes(Name); // name
546*d415bd75Srobert S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
54709467b48Spatrick EmitDesc(S); // desc
548*d415bd75Srobert S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
549*d415bd75Srobert S.popSection();
55009467b48Spatrick }
55109467b48Spatrick
getEFlags()55273471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlags() {
55373471bf0Spatrick switch (STI.getTargetTriple().getArch()) {
55473471bf0Spatrick default:
55573471bf0Spatrick llvm_unreachable("Unsupported Arch");
55673471bf0Spatrick case Triple::r600:
55773471bf0Spatrick return getEFlagsR600();
55873471bf0Spatrick case Triple::amdgcn:
55973471bf0Spatrick return getEFlagsAMDGCN();
56073471bf0Spatrick }
56173471bf0Spatrick }
56273471bf0Spatrick
getEFlagsR600()56373471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
56473471bf0Spatrick assert(STI.getTargetTriple().getArch() == Triple::r600);
56573471bf0Spatrick
56673471bf0Spatrick return getElfMach(STI.getCPU());
56773471bf0Spatrick }
56873471bf0Spatrick
getEFlagsAMDGCN()56973471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
57073471bf0Spatrick assert(STI.getTargetTriple().getArch() == Triple::amdgcn);
57173471bf0Spatrick
57273471bf0Spatrick switch (STI.getTargetTriple().getOS()) {
57373471bf0Spatrick default:
57473471bf0Spatrick // TODO: Why are some tests have "mingw" listed as OS?
57573471bf0Spatrick // llvm_unreachable("Unsupported OS");
57673471bf0Spatrick case Triple::UnknownOS:
57773471bf0Spatrick return getEFlagsUnknownOS();
57873471bf0Spatrick case Triple::AMDHSA:
57973471bf0Spatrick return getEFlagsAMDHSA();
58073471bf0Spatrick case Triple::AMDPAL:
58173471bf0Spatrick return getEFlagsAMDPAL();
58273471bf0Spatrick case Triple::Mesa3D:
58373471bf0Spatrick return getEFlagsMesa3D();
58473471bf0Spatrick }
58573471bf0Spatrick }
58673471bf0Spatrick
getEFlagsUnknownOS()58773471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
58873471bf0Spatrick // TODO: Why are some tests have "mingw" listed as OS?
58973471bf0Spatrick // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
59073471bf0Spatrick
59173471bf0Spatrick return getEFlagsV3();
59273471bf0Spatrick }
59373471bf0Spatrick
getEFlagsAMDHSA()59473471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
59573471bf0Spatrick assert(STI.getTargetTriple().getOS() == Triple::AMDHSA);
59673471bf0Spatrick
597*d415bd75Srobert if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
59873471bf0Spatrick switch (*HsaAbiVer) {
59973471bf0Spatrick case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
60073471bf0Spatrick case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
60173471bf0Spatrick return getEFlagsV3();
60273471bf0Spatrick case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
603*d415bd75Srobert case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
60473471bf0Spatrick return getEFlagsV4();
60573471bf0Spatrick }
60673471bf0Spatrick }
60773471bf0Spatrick
60873471bf0Spatrick llvm_unreachable("HSA OS ABI Version identification must be defined");
60973471bf0Spatrick }
61073471bf0Spatrick
getEFlagsAMDPAL()61173471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
61273471bf0Spatrick assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
61373471bf0Spatrick
61473471bf0Spatrick return getEFlagsV3();
61573471bf0Spatrick }
61673471bf0Spatrick
getEFlagsMesa3D()61773471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
61873471bf0Spatrick assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
61973471bf0Spatrick
62073471bf0Spatrick return getEFlagsV3();
62173471bf0Spatrick }
62273471bf0Spatrick
getEFlagsV3()62373471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
62473471bf0Spatrick unsigned EFlagsV3 = 0;
62573471bf0Spatrick
62673471bf0Spatrick // mach.
62773471bf0Spatrick EFlagsV3 |= getElfMach(STI.getCPU());
62873471bf0Spatrick
62973471bf0Spatrick // xnack.
63073471bf0Spatrick if (getTargetID()->isXnackOnOrAny())
63173471bf0Spatrick EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
63273471bf0Spatrick // sramecc.
63373471bf0Spatrick if (getTargetID()->isSramEccOnOrAny())
63473471bf0Spatrick EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
63573471bf0Spatrick
63673471bf0Spatrick return EFlagsV3;
63773471bf0Spatrick }
63873471bf0Spatrick
getEFlagsV4()63973471bf0Spatrick unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
64073471bf0Spatrick unsigned EFlagsV4 = 0;
64173471bf0Spatrick
64273471bf0Spatrick // mach.
64373471bf0Spatrick EFlagsV4 |= getElfMach(STI.getCPU());
64473471bf0Spatrick
64573471bf0Spatrick // xnack.
64673471bf0Spatrick switch (getTargetID()->getXnackSetting()) {
64773471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
64873471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
64973471bf0Spatrick break;
65073471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::Any:
65173471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
65273471bf0Spatrick break;
65373471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::Off:
65473471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
65573471bf0Spatrick break;
65673471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::On:
65773471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
65873471bf0Spatrick break;
65973471bf0Spatrick }
66073471bf0Spatrick // sramecc.
66173471bf0Spatrick switch (getTargetID()->getSramEccSetting()) {
66273471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
66373471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
66473471bf0Spatrick break;
66573471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::Any:
66673471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
66773471bf0Spatrick break;
66873471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::Off:
66973471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
67073471bf0Spatrick break;
67173471bf0Spatrick case AMDGPU::IsaInfo::TargetIDSetting::On:
67273471bf0Spatrick EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
67373471bf0Spatrick break;
67473471bf0Spatrick }
67573471bf0Spatrick
67673471bf0Spatrick return EFlagsV4;
67773471bf0Spatrick }
67873471bf0Spatrick
EmitDirectiveAMDGCNTarget()67973471bf0Spatrick void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
68009467b48Spatrick
EmitDirectiveHSACodeObjectVersion(uint32_t Major,uint32_t Minor)68109467b48Spatrick void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
68209467b48Spatrick uint32_t Major, uint32_t Minor) {
68309467b48Spatrick
68409467b48Spatrick EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
68573471bf0Spatrick ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
686097a140dSpatrick OS.emitInt32(Major);
687097a140dSpatrick OS.emitInt32(Minor);
68809467b48Spatrick });
68909467b48Spatrick }
69009467b48Spatrick
69109467b48Spatrick void
EmitDirectiveHSACodeObjectISAV2(uint32_t Major,uint32_t Minor,uint32_t Stepping,StringRef VendorName,StringRef ArchName)69273471bf0Spatrick AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISAV2(uint32_t Major,
69309467b48Spatrick uint32_t Minor,
69409467b48Spatrick uint32_t Stepping,
69509467b48Spatrick StringRef VendorName,
69609467b48Spatrick StringRef ArchName) {
69709467b48Spatrick uint16_t VendorNameSize = VendorName.size() + 1;
69809467b48Spatrick uint16_t ArchNameSize = ArchName.size() + 1;
69909467b48Spatrick
70009467b48Spatrick unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
70109467b48Spatrick sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
70209467b48Spatrick VendorNameSize + ArchNameSize;
70309467b48Spatrick
70473471bf0Spatrick convertIsaVersionV2(Major, Minor, Stepping, TargetID->isSramEccOnOrAny(), TargetID->isXnackOnOrAny());
70509467b48Spatrick EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
70673471bf0Spatrick ELF::NT_AMD_HSA_ISA_VERSION, [&](MCELFStreamer &OS) {
707097a140dSpatrick OS.emitInt16(VendorNameSize);
708097a140dSpatrick OS.emitInt16(ArchNameSize);
709097a140dSpatrick OS.emitInt32(Major);
710097a140dSpatrick OS.emitInt32(Minor);
711097a140dSpatrick OS.emitInt32(Stepping);
712097a140dSpatrick OS.emitBytes(VendorName);
713097a140dSpatrick OS.emitInt8(0); // NULL terminate VendorName
714097a140dSpatrick OS.emitBytes(ArchName);
715*d415bd75Srobert OS.emitInt8(0); // NULL terminate ArchName
71609467b48Spatrick });
71709467b48Spatrick }
71809467b48Spatrick
71909467b48Spatrick void
EmitAMDKernelCodeT(const amd_kernel_code_t & Header)72009467b48Spatrick AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
72109467b48Spatrick
72209467b48Spatrick MCStreamer &OS = getStreamer();
723*d415bd75Srobert OS.pushSection();
724097a140dSpatrick OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
725*d415bd75Srobert OS.popSection();
72609467b48Spatrick }
72709467b48Spatrick
EmitAMDGPUSymbolType(StringRef SymbolName,unsigned Type)72809467b48Spatrick void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
72909467b48Spatrick unsigned Type) {
73009467b48Spatrick MCSymbolELF *Symbol = cast<MCSymbolELF>(
73109467b48Spatrick getStreamer().getContext().getOrCreateSymbol(SymbolName));
73209467b48Spatrick Symbol->setType(Type);
73309467b48Spatrick }
73409467b48Spatrick
emitAMDGPULDS(MCSymbol * Symbol,unsigned Size,Align Alignment)73509467b48Spatrick void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
736097a140dSpatrick Align Alignment) {
73709467b48Spatrick MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
73809467b48Spatrick SymbolELF->setType(ELF::STT_OBJECT);
73909467b48Spatrick
74009467b48Spatrick if (!SymbolELF->isBindingSet()) {
74109467b48Spatrick SymbolELF->setBinding(ELF::STB_GLOBAL);
74209467b48Spatrick SymbolELF->setExternal(true);
74309467b48Spatrick }
74409467b48Spatrick
745*d415bd75Srobert if (SymbolELF->declareCommon(Size, Alignment, true)) {
74609467b48Spatrick report_fatal_error("Symbol: " + Symbol->getName() +
74709467b48Spatrick " redeclared as different type");
74809467b48Spatrick }
74909467b48Spatrick
75009467b48Spatrick SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
75109467b48Spatrick SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
75209467b48Spatrick }
75309467b48Spatrick
EmitISAVersion()75473471bf0Spatrick bool AMDGPUTargetELFStreamer::EmitISAVersion() {
75509467b48Spatrick // Create two labels to mark the beginning and end of the desc field
75609467b48Spatrick // and a MCExpr to calculate the size of the desc field.
75709467b48Spatrick auto &Context = getContext();
75809467b48Spatrick auto *DescBegin = Context.createTempSymbol();
75909467b48Spatrick auto *DescEnd = Context.createTempSymbol();
76009467b48Spatrick auto *DescSZ = MCBinaryExpr::createSub(
76109467b48Spatrick MCSymbolRefExpr::create(DescEnd, Context),
76209467b48Spatrick MCSymbolRefExpr::create(DescBegin, Context), Context);
76309467b48Spatrick
76473471bf0Spatrick EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME,
76509467b48Spatrick [&](MCELFStreamer &OS) {
766097a140dSpatrick OS.emitLabel(DescBegin);
76773471bf0Spatrick OS.emitBytes(getTargetID()->toString());
768097a140dSpatrick OS.emitLabel(DescEnd);
76909467b48Spatrick });
77009467b48Spatrick return true;
77109467b48Spatrick }
77209467b48Spatrick
EmitHSAMetadata(msgpack::Document & HSAMetadataDoc,bool Strict)77309467b48Spatrick bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
77409467b48Spatrick bool Strict) {
77573471bf0Spatrick HSAMD::V3::MetadataVerifier Verifier(Strict);
77609467b48Spatrick if (!Verifier.verify(HSAMetadataDoc.getRoot()))
77709467b48Spatrick return false;
77809467b48Spatrick
77909467b48Spatrick std::string HSAMetadataString;
78009467b48Spatrick HSAMetadataDoc.writeToBlob(HSAMetadataString);
78109467b48Spatrick
78209467b48Spatrick // Create two labels to mark the beginning and end of the desc field
78309467b48Spatrick // and a MCExpr to calculate the size of the desc field.
78409467b48Spatrick auto &Context = getContext();
78509467b48Spatrick auto *DescBegin = Context.createTempSymbol();
78609467b48Spatrick auto *DescEnd = Context.createTempSymbol();
78709467b48Spatrick auto *DescSZ = MCBinaryExpr::createSub(
78809467b48Spatrick MCSymbolRefExpr::create(DescEnd, Context),
78909467b48Spatrick MCSymbolRefExpr::create(DescBegin, Context), Context);
79009467b48Spatrick
79109467b48Spatrick EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
79209467b48Spatrick [&](MCELFStreamer &OS) {
793097a140dSpatrick OS.emitLabel(DescBegin);
794097a140dSpatrick OS.emitBytes(HSAMetadataString);
795097a140dSpatrick OS.emitLabel(DescEnd);
79609467b48Spatrick });
79709467b48Spatrick return true;
79809467b48Spatrick }
79909467b48Spatrick
EmitHSAMetadata(const AMDGPU::HSAMD::Metadata & HSAMetadata)80009467b48Spatrick bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
80109467b48Spatrick const AMDGPU::HSAMD::Metadata &HSAMetadata) {
80209467b48Spatrick std::string HSAMetadataString;
80309467b48Spatrick if (HSAMD::toString(HSAMetadata, HSAMetadataString))
80409467b48Spatrick return false;
80509467b48Spatrick
80609467b48Spatrick // Create two labels to mark the beginning and end of the desc field
80709467b48Spatrick // and a MCExpr to calculate the size of the desc field.
80809467b48Spatrick auto &Context = getContext();
80909467b48Spatrick auto *DescBegin = Context.createTempSymbol();
81009467b48Spatrick auto *DescEnd = Context.createTempSymbol();
81109467b48Spatrick auto *DescSZ = MCBinaryExpr::createSub(
81209467b48Spatrick MCSymbolRefExpr::create(DescEnd, Context),
81309467b48Spatrick MCSymbolRefExpr::create(DescBegin, Context), Context);
81409467b48Spatrick
81573471bf0Spatrick EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_METADATA,
81609467b48Spatrick [&](MCELFStreamer &OS) {
817097a140dSpatrick OS.emitLabel(DescBegin);
818097a140dSpatrick OS.emitBytes(HSAMetadataString);
819097a140dSpatrick OS.emitLabel(DescEnd);
82009467b48Spatrick });
82109467b48Spatrick return true;
82209467b48Spatrick }
82309467b48Spatrick
EmitCodeEnd(const MCSubtargetInfo & STI)82473471bf0Spatrick bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
82509467b48Spatrick const uint32_t Encoded_s_code_end = 0xbf9f0000;
82673471bf0Spatrick const uint32_t Encoded_s_nop = 0xbf800000;
82773471bf0Spatrick uint32_t Encoded_pad = Encoded_s_code_end;
82873471bf0Spatrick
82973471bf0Spatrick // Instruction cache line size in bytes.
830*d415bd75Srobert const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
83173471bf0Spatrick const unsigned CacheLineSize = 1u << Log2CacheLineSize;
83273471bf0Spatrick
83373471bf0Spatrick // Extra padding amount in bytes to support prefetch mode 3.
83473471bf0Spatrick unsigned FillSize = 3 * CacheLineSize;
83573471bf0Spatrick
83673471bf0Spatrick if (AMDGPU::isGFX90A(STI)) {
83773471bf0Spatrick Encoded_pad = Encoded_s_nop;
83873471bf0Spatrick FillSize = 16 * CacheLineSize;
83973471bf0Spatrick }
84009467b48Spatrick
84109467b48Spatrick MCStreamer &OS = getStreamer();
842*d415bd75Srobert OS.pushSection();
843*d415bd75Srobert OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4);
84473471bf0Spatrick for (unsigned I = 0; I < FillSize; I += 4)
84573471bf0Spatrick OS.emitInt32(Encoded_pad);
846*d415bd75Srobert OS.popSection();
84709467b48Spatrick return true;
84809467b48Spatrick }
84909467b48Spatrick
EmitAmdhsaKernelDescriptor(const MCSubtargetInfo & STI,StringRef KernelName,const amdhsa::kernel_descriptor_t & KernelDescriptor,uint64_t NextVGPR,uint64_t NextSGPR,bool ReserveVCC,bool ReserveFlatScr)85009467b48Spatrick void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
85109467b48Spatrick const MCSubtargetInfo &STI, StringRef KernelName,
85209467b48Spatrick const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
85373471bf0Spatrick uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
85409467b48Spatrick auto &Streamer = getStreamer();
85509467b48Spatrick auto &Context = Streamer.getContext();
85609467b48Spatrick
85709467b48Spatrick MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
85809467b48Spatrick Context.getOrCreateSymbol(Twine(KernelName)));
85909467b48Spatrick MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
86009467b48Spatrick Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
86109467b48Spatrick
86209467b48Spatrick // Copy kernel descriptor symbol's binding, other and visibility from the
86309467b48Spatrick // kernel code symbol.
86409467b48Spatrick KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
86509467b48Spatrick KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
86609467b48Spatrick KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
86709467b48Spatrick // Kernel descriptor symbol's type and size are fixed.
86809467b48Spatrick KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
86909467b48Spatrick KernelDescriptorSymbol->setSize(
87009467b48Spatrick MCConstantExpr::create(sizeof(KernelDescriptor), Context));
87109467b48Spatrick
87209467b48Spatrick // The visibility of the kernel code symbol must be protected or less to allow
87309467b48Spatrick // static relocations from the kernel descriptor to be used.
87409467b48Spatrick if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
87509467b48Spatrick KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
87609467b48Spatrick
877097a140dSpatrick Streamer.emitLabel(KernelDescriptorSymbol);
87873471bf0Spatrick Streamer.emitInt32(KernelDescriptor.group_segment_fixed_size);
87973471bf0Spatrick Streamer.emitInt32(KernelDescriptor.private_segment_fixed_size);
88073471bf0Spatrick Streamer.emitInt32(KernelDescriptor.kernarg_size);
88173471bf0Spatrick
88273471bf0Spatrick for (uint8_t Res : KernelDescriptor.reserved0)
88373471bf0Spatrick Streamer.emitInt8(Res);
88473471bf0Spatrick
88509467b48Spatrick // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
88609467b48Spatrick // expression being created is:
88709467b48Spatrick // (start of kernel code) - (start of kernel descriptor)
88809467b48Spatrick // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
889097a140dSpatrick Streamer.emitValue(MCBinaryExpr::createSub(
89009467b48Spatrick MCSymbolRefExpr::create(
89109467b48Spatrick KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
89209467b48Spatrick MCSymbolRefExpr::create(
89309467b48Spatrick KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
89409467b48Spatrick Context),
89509467b48Spatrick sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
89673471bf0Spatrick for (uint8_t Res : KernelDescriptor.reserved1)
89773471bf0Spatrick Streamer.emitInt8(Res);
89873471bf0Spatrick Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc3);
89973471bf0Spatrick Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc1);
90073471bf0Spatrick Streamer.emitInt32(KernelDescriptor.compute_pgm_rsrc2);
90173471bf0Spatrick Streamer.emitInt16(KernelDescriptor.kernel_code_properties);
90273471bf0Spatrick for (uint8_t Res : KernelDescriptor.reserved2)
90373471bf0Spatrick Streamer.emitInt8(Res);
90409467b48Spatrick }
905