1 //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides AMDGPU specific target streamer methods. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPUTargetStreamer.h" 14 #include "AMDGPUMCExpr.h" 15 #include "AMDGPUMCKernelDescriptor.h" 16 #include "AMDGPUPTNote.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" 20 #include "llvm/BinaryFormat/ELF.h" 21 #include "llvm/MC/MCAssembler.h" 22 #include "llvm/MC/MCContext.h" 23 #include "llvm/MC/MCELFObjectWriter.h" 24 #include "llvm/MC/MCELFStreamer.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/Support/AMDGPUMetadata.h" 27 #include "llvm/Support/AMDHSAKernelDescriptor.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/FormattedStream.h" 31 #include "llvm/TargetParser/TargetParser.h" 32 33 using namespace llvm; 34 using namespace llvm::AMDGPU; 35 36 //===----------------------------------------------------------------------===// 37 // AMDGPUTargetStreamer 38 //===----------------------------------------------------------------------===// 39 40 static cl::opt<unsigned> 41 ForceGenericVersion("amdgpu-force-generic-version", 42 cl::desc("Force a specific generic_v<N> flag to be " 43 "added. For testing purposes only."), 44 cl::ReallyHidden, cl::init(0)); 45 46 bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) { 47 msgpack::Document HSAMetadataDoc; 48 if (!HSAMetadataDoc.fromYAML(HSAMetadataString)) 49 return false; 50 return EmitHSAMetadata(HSAMetadataDoc, false); 51 } 52 53 StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { 54 AMDGPU::GPUKind AK; 55 56 // clang-format off 57 switch (ElfMach) { 58 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break; 59 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break; 60 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break; 61 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break; 62 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break; 63 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break; 64 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break; 65 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break; 66 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break; 67 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break; 68 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break; 69 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break; 70 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break; 71 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break; 72 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break; 73 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break; 74 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break; 75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break; 76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break; 77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break; 78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break; 79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break; 80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break; 81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break; 82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break; 83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break; 84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break; 85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break; 86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break; 87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break; 88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break; 89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break; 90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break; 91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break; 92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break; 93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break; 94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break; 95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break; 96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break; 97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break; 98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break; 99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break; 100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break; 101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break; 102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break; 103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break; 104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break; 105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break; 106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break; 107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break; 108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break; 109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break; 110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break; 111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break; 112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break; 113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break; 114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break; 115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break; 116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; 117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break; 118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break; 119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break; 120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break; 121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break; 122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break; 123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break; 124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break; 125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break; 126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break; 127 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break; 128 default: AK = GK_NONE; break; 129 } 130 // clang-format on 131 132 StringRef GPUName = getArchNameAMDGCN(AK); 133 if (GPUName != "") 134 return GPUName; 135 return getArchNameR600(AK); 136 } 137 138 unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { 139 AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); 140 if (AK == AMDGPU::GPUKind::GK_NONE) 141 AK = parseArchR600(GPU); 142 143 // clang-format off 144 switch (AK) { 145 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600; 146 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630; 147 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880; 148 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670; 149 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710; 150 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730; 151 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770; 152 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR; 153 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS; 154 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER; 155 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD; 156 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO; 157 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS; 158 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS; 159 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN; 160 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS; 161 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600; 162 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601; 163 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602; 164 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700; 165 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701; 166 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702; 167 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703; 168 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704; 169 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705; 170 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801; 171 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802; 172 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803; 173 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805; 174 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810; 175 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900; 176 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902; 177 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904; 178 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906; 179 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908; 180 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909; 181 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A; 182 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C; 183 case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940; 184 case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941; 185 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942; 186 case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950; 187 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010; 188 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011; 189 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012; 190 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013; 191 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030; 192 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031; 193 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032; 194 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033; 195 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034; 196 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035; 197 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036; 198 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100; 199 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101; 200 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102; 201 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103; 202 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150; 203 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; 204 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152; 205 case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153; 206 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200; 207 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201; 208 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC; 209 case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC; 210 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC; 211 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC; 212 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC; 213 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC; 214 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE; 215 } 216 // clang-format on 217 218 llvm_unreachable("unknown GPU"); 219 } 220 221 //===----------------------------------------------------------------------===// 222 // AMDGPUTargetAsmStreamer 223 //===----------------------------------------------------------------------===// 224 225 AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S, 226 formatted_raw_ostream &OS) 227 : AMDGPUTargetStreamer(S), OS(OS) { } 228 229 // A hook for emitting stuff at the end. 230 // We use it for emitting the accumulated PAL metadata as directives. 231 // The PAL metadata is reset after it is emitted. 232 void AMDGPUTargetAsmStreamer::finish() { 233 std::string S; 234 getPALMetadata()->toString(S); 235 OS << S; 236 237 // Reset the pal metadata so its data will not affect a compilation that 238 // reuses this object. 239 getPALMetadata()->reset(); 240 } 241 242 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() { 243 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n"; 244 } 245 246 void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion( 247 unsigned COV) { 248 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV); 249 OS << "\t.amdhsa_code_object_version " << COV << '\n'; 250 } 251 252 void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { 253 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS, 254 const MCAsmInfo *MAI) { 255 printAMDGPUMCExpr(foldAMDGPUMCExpr(Expr, getContext()), OS, MAI); 256 }; 257 258 OS << "\t.amd_kernel_code_t\n"; 259 Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint); 260 OS << "\t.end_amd_kernel_code_t\n"; 261 } 262 263 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 264 unsigned Type) { 265 switch (Type) { 266 default: llvm_unreachable("Invalid AMDGPU symbol type"); 267 case ELF::STT_AMDGPU_HSA_KERNEL: 268 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ; 269 break; 270 } 271 } 272 273 void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, 274 Align Alignment) { 275 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " 276 << Alignment.value() << '\n'; 277 } 278 279 void AMDGPUTargetAsmStreamer::EmitMCResourceInfo( 280 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, 281 const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize, 282 const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, 283 const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, 284 const MCSymbol *HasIndirectCall) { 285 #define PRINT_RES_INFO(ARG) \ 286 OS << "\t.set "; \ 287 ARG->print(OS, getContext().getAsmInfo()); \ 288 OS << ", "; \ 289 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \ 290 Streamer.addBlankLine(); 291 292 PRINT_RES_INFO(NumVGPR); 293 PRINT_RES_INFO(NumAGPR); 294 PRINT_RES_INFO(NumExplicitSGPR); 295 PRINT_RES_INFO(PrivateSegmentSize); 296 PRINT_RES_INFO(UsesVCC); 297 PRINT_RES_INFO(UsesFlatScratch); 298 PRINT_RES_INFO(HasDynamicallySizedStack); 299 PRINT_RES_INFO(HasRecursion); 300 PRINT_RES_INFO(HasIndirectCall); 301 #undef PRINT_RES_INFO 302 } 303 304 void AMDGPUTargetAsmStreamer::EmitMCResourceMaximums(const MCSymbol *MaxVGPR, 305 const MCSymbol *MaxAGPR, 306 const MCSymbol *MaxSGPR) { 307 #define PRINT_RES_INFO(ARG) \ 308 OS << "\t.set "; \ 309 ARG->print(OS, getContext().getAsmInfo()); \ 310 OS << ", "; \ 311 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \ 312 Streamer.addBlankLine(); 313 314 PRINT_RES_INFO(MaxVGPR); 315 PRINT_RES_INFO(MaxAGPR); 316 PRINT_RES_INFO(MaxSGPR); 317 #undef PRINT_RES_INFO 318 } 319 320 bool AMDGPUTargetAsmStreamer::EmitISAVersion() { 321 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n"; 322 return true; 323 } 324 325 bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( 326 msgpack::Document &HSAMetadataDoc, bool Strict) { 327 HSAMD::V3::MetadataVerifier Verifier(Strict); 328 if (!Verifier.verify(HSAMetadataDoc.getRoot())) 329 return false; 330 331 std::string HSAMetadataString; 332 raw_string_ostream StrOS(HSAMetadataString); 333 HSAMetadataDoc.toYAML(StrOS); 334 335 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n'; 336 OS << StrOS.str() << '\n'; 337 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n'; 338 return true; 339 } 340 341 bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { 342 const uint32_t Encoded_s_code_end = 0xbf9f0000; 343 const uint32_t Encoded_s_nop = 0xbf800000; 344 uint32_t Encoded_pad = Encoded_s_code_end; 345 346 // Instruction cache line size in bytes. 347 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; 348 const unsigned CacheLineSize = 1u << Log2CacheLineSize; 349 350 // Extra padding amount in bytes to support prefetch mode 3. 351 unsigned FillSize = 3 * CacheLineSize; 352 353 if (AMDGPU::isGFX90A(STI)) { 354 Encoded_pad = Encoded_s_nop; 355 FillSize = 16 * CacheLineSize; 356 } 357 358 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n'; 359 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n'; 360 return true; 361 } 362 363 void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( 364 const MCSubtargetInfo &STI, StringRef KernelName, 365 const MCKernelDescriptor &KD, const MCExpr *NextVGPR, 366 const MCExpr *NextSGPR, const MCExpr *ReserveVCC, 367 const MCExpr *ReserveFlatScr) { 368 IsaVersion IVersion = getIsaVersion(STI.getCPU()); 369 const MCAsmInfo *MAI = getContext().getAsmInfo(); 370 371 OS << "\t.amdhsa_kernel " << KernelName << '\n'; 372 373 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask, 374 StringRef Directive) { 375 OS << "\t\t" << Directive << ' '; 376 const MCExpr *ShiftedAndMaskedExpr = 377 MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext()); 378 const MCExpr *New = foldAMDGPUMCExpr(ShiftedAndMaskedExpr, getContext()); 379 printAMDGPUMCExpr(New, OS, MAI); 380 OS << '\n'; 381 }; 382 383 auto EmitMCExpr = [&](const MCExpr *Value) { 384 const MCExpr *NewExpr = foldAMDGPUMCExpr(Value, getContext()); 385 printAMDGPUMCExpr(NewExpr, OS, MAI); 386 }; 387 388 OS << "\t\t.amdhsa_group_segment_fixed_size "; 389 EmitMCExpr(KD.group_segment_fixed_size); 390 OS << '\n'; 391 392 OS << "\t\t.amdhsa_private_segment_fixed_size "; 393 EmitMCExpr(KD.private_segment_fixed_size); 394 OS << '\n'; 395 396 OS << "\t\t.amdhsa_kernarg_size "; 397 EmitMCExpr(KD.kernarg_size); 398 OS << '\n'; 399 400 PrintField( 401 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, 402 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count"); 403 404 if (!hasArchitectedFlatScratch(STI)) 405 PrintField( 406 KD.kernel_code_properties, 407 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT, 408 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 409 ".amdhsa_user_sgpr_private_segment_buffer"); 410 PrintField(KD.kernel_code_properties, 411 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT, 412 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, 413 ".amdhsa_user_sgpr_dispatch_ptr"); 414 PrintField(KD.kernel_code_properties, 415 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT, 416 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, 417 ".amdhsa_user_sgpr_queue_ptr"); 418 PrintField(KD.kernel_code_properties, 419 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT, 420 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 421 ".amdhsa_user_sgpr_kernarg_segment_ptr"); 422 PrintField(KD.kernel_code_properties, 423 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT, 424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, 425 ".amdhsa_user_sgpr_dispatch_id"); 426 if (!hasArchitectedFlatScratch(STI)) 427 PrintField(KD.kernel_code_properties, 428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT, 429 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, 430 ".amdhsa_user_sgpr_flat_scratch_init"); 431 if (hasKernargPreload(STI)) { 432 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT, 433 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH, 434 ".amdhsa_user_sgpr_kernarg_preload_length"); 435 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT, 436 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET, 437 ".amdhsa_user_sgpr_kernarg_preload_offset"); 438 } 439 PrintField( 440 KD.kernel_code_properties, 441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT, 442 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 443 ".amdhsa_user_sgpr_private_segment_size"); 444 if (IVersion.Major >= 10) 445 PrintField(KD.kernel_code_properties, 446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, 447 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 448 ".amdhsa_wavefront_size32"); 449 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) 450 PrintField(KD.kernel_code_properties, 451 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, 452 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, 453 ".amdhsa_uses_dynamic_stack"); 454 PrintField(KD.compute_pgm_rsrc2, 455 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT, 456 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, 457 (hasArchitectedFlatScratch(STI) 458 ? ".amdhsa_enable_private_segment" 459 : ".amdhsa_system_sgpr_private_segment_wavefront_offset")); 460 PrintField(KD.compute_pgm_rsrc2, 461 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, 462 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 463 ".amdhsa_system_sgpr_workgroup_id_x"); 464 PrintField(KD.compute_pgm_rsrc2, 465 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT, 466 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, 467 ".amdhsa_system_sgpr_workgroup_id_y"); 468 PrintField(KD.compute_pgm_rsrc2, 469 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT, 470 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, 471 ".amdhsa_system_sgpr_workgroup_id_z"); 472 PrintField(KD.compute_pgm_rsrc2, 473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT, 474 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, 475 ".amdhsa_system_sgpr_workgroup_info"); 476 PrintField(KD.compute_pgm_rsrc2, 477 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT, 478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, 479 ".amdhsa_system_vgpr_workitem_id"); 480 481 // These directives are required. 482 OS << "\t\t.amdhsa_next_free_vgpr "; 483 EmitMCExpr(NextVGPR); 484 OS << '\n'; 485 486 OS << "\t\t.amdhsa_next_free_sgpr "; 487 EmitMCExpr(NextSGPR); 488 OS << '\n'; 489 490 if (AMDGPU::isGFX90A(STI)) { 491 // MCExpr equivalent of taking the (accum_offset + 1) * 4. 492 const MCExpr *accum_bits = MCKernelDescriptor::bits_get( 493 KD.compute_pgm_rsrc3, 494 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, 495 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext()); 496 accum_bits = MCBinaryExpr::createAdd( 497 accum_bits, MCConstantExpr::create(1, getContext()), getContext()); 498 accum_bits = MCBinaryExpr::createMul( 499 accum_bits, MCConstantExpr::create(4, getContext()), getContext()); 500 OS << "\t\t.amdhsa_accum_offset "; 501 const MCExpr *New = foldAMDGPUMCExpr(accum_bits, getContext()); 502 printAMDGPUMCExpr(New, OS, MAI); 503 OS << '\n'; 504 } 505 506 OS << "\t\t.amdhsa_reserve_vcc "; 507 EmitMCExpr(ReserveVCC); 508 OS << '\n'; 509 510 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) { 511 OS << "\t\t.amdhsa_reserve_flat_scratch "; 512 EmitMCExpr(ReserveFlatScr); 513 OS << '\n'; 514 } 515 516 switch (CodeObjectVersion) { 517 default: 518 break; 519 case AMDGPU::AMDHSA_COV4: 520 case AMDGPU::AMDHSA_COV5: 521 if (getTargetID()->isXnackSupported()) 522 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n'; 523 break; 524 } 525 526 PrintField(KD.compute_pgm_rsrc1, 527 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT, 528 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, 529 ".amdhsa_float_round_mode_32"); 530 PrintField(KD.compute_pgm_rsrc1, 531 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT, 532 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, 533 ".amdhsa_float_round_mode_16_64"); 534 PrintField(KD.compute_pgm_rsrc1, 535 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT, 536 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, 537 ".amdhsa_float_denorm_mode_32"); 538 PrintField(KD.compute_pgm_rsrc1, 539 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, 540 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 541 ".amdhsa_float_denorm_mode_16_64"); 542 if (IVersion.Major < 12) { 543 PrintField(KD.compute_pgm_rsrc1, 544 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, 545 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 546 ".amdhsa_dx10_clamp"); 547 PrintField(KD.compute_pgm_rsrc1, 548 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, 549 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 550 ".amdhsa_ieee_mode"); 551 } 552 if (IVersion.Major >= 9) { 553 PrintField(KD.compute_pgm_rsrc1, 554 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT, 555 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, 556 ".amdhsa_fp16_overflow"); 557 } 558 if (AMDGPU::isGFX90A(STI)) 559 PrintField(KD.compute_pgm_rsrc3, 560 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, 561 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split"); 562 if (IVersion.Major >= 10) { 563 PrintField(KD.compute_pgm_rsrc1, 564 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, 565 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, 566 ".amdhsa_workgroup_processor_mode"); 567 PrintField(KD.compute_pgm_rsrc1, 568 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, 569 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 570 ".amdhsa_memory_ordered"); 571 PrintField(KD.compute_pgm_rsrc1, 572 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT, 573 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, 574 ".amdhsa_forward_progress"); 575 } 576 if (IVersion.Major >= 10 && IVersion.Major < 12) { 577 PrintField(KD.compute_pgm_rsrc3, 578 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT, 579 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, 580 ".amdhsa_shared_vgpr_count"); 581 } 582 if (IVersion.Major >= 12) { 583 PrintField(KD.compute_pgm_rsrc1, 584 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT, 585 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 586 ".amdhsa_round_robin_scheduling"); 587 } 588 PrintField( 589 KD.compute_pgm_rsrc2, 590 amdhsa:: 591 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT, 592 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 593 ".amdhsa_exception_fp_ieee_invalid_op"); 594 PrintField( 595 KD.compute_pgm_rsrc2, 596 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT, 597 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 598 ".amdhsa_exception_fp_denorm_src"); 599 PrintField( 600 KD.compute_pgm_rsrc2, 601 amdhsa:: 602 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT, 603 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 604 ".amdhsa_exception_fp_ieee_div_zero"); 605 PrintField( 606 KD.compute_pgm_rsrc2, 607 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT, 608 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 609 ".amdhsa_exception_fp_ieee_overflow"); 610 PrintField( 611 KD.compute_pgm_rsrc2, 612 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT, 613 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 614 ".amdhsa_exception_fp_ieee_underflow"); 615 PrintField( 616 KD.compute_pgm_rsrc2, 617 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT, 618 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 619 ".amdhsa_exception_fp_ieee_inexact"); 620 PrintField( 621 KD.compute_pgm_rsrc2, 622 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT, 623 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 624 ".amdhsa_exception_int_div_zero"); 625 626 OS << "\t.end_amdhsa_kernel\n"; 627 } 628 629 //===----------------------------------------------------------------------===// 630 // AMDGPUTargetELFStreamer 631 //===----------------------------------------------------------------------===// 632 633 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S, 634 const MCSubtargetInfo &STI) 635 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {} 636 637 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { 638 return static_cast<MCELFStreamer &>(Streamer); 639 } 640 641 // A hook for emitting stuff at the end. 642 // We use it for emitting the accumulated PAL metadata as a .note record. 643 // The PAL metadata is reset after it is emitted. 644 void AMDGPUTargetELFStreamer::finish() { 645 ELFObjectWriter &W = getStreamer().getWriter(); 646 W.setELFHeaderEFlags(getEFlags()); 647 W.setOverrideABIVersion( 648 getELFABIVersion(STI.getTargetTriple(), CodeObjectVersion)); 649 650 std::string Blob; 651 const char *Vendor = getPALMetadata()->getVendor(); 652 unsigned Type = getPALMetadata()->getType(); 653 getPALMetadata()->toBlob(Type, Blob); 654 if (Blob.empty()) 655 return; 656 EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type, 657 [&](MCELFStreamer &OS) { OS.emitBytes(Blob); }); 658 659 // Reset the pal metadata so its data will not affect a compilation that 660 // reuses this object. 661 getPALMetadata()->reset(); 662 } 663 664 void AMDGPUTargetELFStreamer::EmitNote( 665 StringRef Name, const MCExpr *DescSZ, unsigned NoteType, 666 function_ref<void(MCELFStreamer &)> EmitDesc) { 667 auto &S = getStreamer(); 668 auto &Context = S.getContext(); 669 670 auto NameSZ = Name.size() + 1; 671 672 unsigned NoteFlags = 0; 673 // TODO Apparently, this is currently needed for OpenCL as mentioned in 674 // https://reviews.llvm.org/D74995 675 if (isHsaAbi(STI)) 676 NoteFlags = ELF::SHF_ALLOC; 677 678 S.pushSection(); 679 S.switchSection( 680 Context.getELFSection(ElfNote::SectionName, ELF::SHT_NOTE, NoteFlags)); 681 S.emitInt32(NameSZ); // namesz 682 S.emitValue(DescSZ, 4); // descz 683 S.emitInt32(NoteType); // type 684 S.emitBytes(Name); // name 685 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0 686 EmitDesc(S); // desc 687 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0 688 S.popSection(); 689 } 690 691 unsigned AMDGPUTargetELFStreamer::getEFlags() { 692 switch (STI.getTargetTriple().getArch()) { 693 default: 694 llvm_unreachable("Unsupported Arch"); 695 case Triple::r600: 696 return getEFlagsR600(); 697 case Triple::amdgcn: 698 return getEFlagsAMDGCN(); 699 } 700 } 701 702 unsigned AMDGPUTargetELFStreamer::getEFlagsR600() { 703 assert(STI.getTargetTriple().getArch() == Triple::r600); 704 705 return getElfMach(STI.getCPU()); 706 } 707 708 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() { 709 assert(STI.getTargetTriple().getArch() == Triple::amdgcn); 710 711 switch (STI.getTargetTriple().getOS()) { 712 default: 713 // TODO: Why are some tests have "mingw" listed as OS? 714 // llvm_unreachable("Unsupported OS"); 715 case Triple::UnknownOS: 716 return getEFlagsUnknownOS(); 717 case Triple::AMDHSA: 718 return getEFlagsAMDHSA(); 719 case Triple::AMDPAL: 720 return getEFlagsAMDPAL(); 721 case Triple::Mesa3D: 722 return getEFlagsMesa3D(); 723 } 724 } 725 726 unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() { 727 // TODO: Why are some tests have "mingw" listed as OS? 728 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS); 729 730 return getEFlagsV3(); 731 } 732 733 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() { 734 assert(isHsaAbi(STI)); 735 736 if (CodeObjectVersion >= 6) 737 return getEFlagsV6(); 738 return getEFlagsV4(); 739 } 740 741 unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() { 742 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL); 743 744 return getEFlagsV3(); 745 } 746 747 unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() { 748 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D); 749 750 return getEFlagsV3(); 751 } 752 753 unsigned AMDGPUTargetELFStreamer::getEFlagsV3() { 754 unsigned EFlagsV3 = 0; 755 756 // mach. 757 EFlagsV3 |= getElfMach(STI.getCPU()); 758 759 // xnack. 760 if (getTargetID()->isXnackOnOrAny()) 761 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3; 762 // sramecc. 763 if (getTargetID()->isSramEccOnOrAny()) 764 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3; 765 766 return EFlagsV3; 767 } 768 769 unsigned AMDGPUTargetELFStreamer::getEFlagsV4() { 770 unsigned EFlagsV4 = 0; 771 772 // mach. 773 EFlagsV4 |= getElfMach(STI.getCPU()); 774 775 // xnack. 776 switch (getTargetID()->getXnackSetting()) { 777 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: 778 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4; 779 break; 780 case AMDGPU::IsaInfo::TargetIDSetting::Any: 781 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4; 782 break; 783 case AMDGPU::IsaInfo::TargetIDSetting::Off: 784 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4; 785 break; 786 case AMDGPU::IsaInfo::TargetIDSetting::On: 787 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4; 788 break; 789 } 790 // sramecc. 791 switch (getTargetID()->getSramEccSetting()) { 792 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: 793 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4; 794 break; 795 case AMDGPU::IsaInfo::TargetIDSetting::Any: 796 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4; 797 break; 798 case AMDGPU::IsaInfo::TargetIDSetting::Off: 799 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4; 800 break; 801 case AMDGPU::IsaInfo::TargetIDSetting::On: 802 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4; 803 break; 804 } 805 806 return EFlagsV4; 807 } 808 809 unsigned AMDGPUTargetELFStreamer::getEFlagsV6() { 810 unsigned Flags = getEFlagsV4(); 811 812 unsigned Version = ForceGenericVersion; 813 if (!Version) { 814 switch (parseArchAMDGCN(STI.getCPU())) { 815 case AMDGPU::GK_GFX9_GENERIC: 816 Version = GenericVersion::GFX9; 817 break; 818 case AMDGPU::GK_GFX9_4_GENERIC: 819 Version = GenericVersion::GFX9_4; 820 break; 821 case AMDGPU::GK_GFX10_1_GENERIC: 822 Version = GenericVersion::GFX10_1; 823 break; 824 case AMDGPU::GK_GFX10_3_GENERIC: 825 Version = GenericVersion::GFX10_3; 826 break; 827 case AMDGPU::GK_GFX11_GENERIC: 828 Version = GenericVersion::GFX11; 829 break; 830 case AMDGPU::GK_GFX12_GENERIC: 831 Version = GenericVersion::GFX12; 832 break; 833 default: 834 break; 835 } 836 } 837 838 // Versions start at 1. 839 if (Version) { 840 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX) 841 report_fatal_error("Cannot encode generic code object version " + 842 Twine(Version) + 843 " - no ELF flag can represent this version!"); 844 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET); 845 } 846 847 return Flags; 848 } 849 850 void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {} 851 852 void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) { 853 MCStreamer &OS = getStreamer(); 854 OS.pushSection(); 855 Header.EmitKernelCodeT(OS, getContext()); 856 OS.popSection(); 857 } 858 859 void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, 860 unsigned Type) { 861 MCSymbolELF *Symbol = cast<MCSymbolELF>( 862 getStreamer().getContext().getOrCreateSymbol(SymbolName)); 863 Symbol->setType(Type); 864 } 865 866 void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size, 867 Align Alignment) { 868 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol); 869 SymbolELF->setType(ELF::STT_OBJECT); 870 871 if (!SymbolELF->isBindingSet()) 872 SymbolELF->setBinding(ELF::STB_GLOBAL); 873 874 if (SymbolELF->declareCommon(Size, Alignment, true)) { 875 report_fatal_error("Symbol: " + Symbol->getName() + 876 " redeclared as different type"); 877 } 878 879 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS); 880 SymbolELF->setSize(MCConstantExpr::create(Size, getContext())); 881 } 882 883 bool AMDGPUTargetELFStreamer::EmitISAVersion() { 884 // Create two labels to mark the beginning and end of the desc field 885 // and a MCExpr to calculate the size of the desc field. 886 auto &Context = getContext(); 887 auto *DescBegin = Context.createTempSymbol(); 888 auto *DescEnd = Context.createTempSymbol(); 889 auto *DescSZ = MCBinaryExpr::createSub( 890 MCSymbolRefExpr::create(DescEnd, Context), 891 MCSymbolRefExpr::create(DescBegin, Context), Context); 892 893 EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_HSA_ISA_NAME, 894 [&](MCELFStreamer &OS) { 895 OS.emitLabel(DescBegin); 896 OS.emitBytes(getTargetID()->toString()); 897 OS.emitLabel(DescEnd); 898 }); 899 return true; 900 } 901 902 bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc, 903 bool Strict) { 904 HSAMD::V3::MetadataVerifier Verifier(Strict); 905 if (!Verifier.verify(HSAMetadataDoc.getRoot())) 906 return false; 907 908 std::string HSAMetadataString; 909 HSAMetadataDoc.writeToBlob(HSAMetadataString); 910 911 // Create two labels to mark the beginning and end of the desc field 912 // and a MCExpr to calculate the size of the desc field. 913 auto &Context = getContext(); 914 auto *DescBegin = Context.createTempSymbol(); 915 auto *DescEnd = Context.createTempSymbol(); 916 auto *DescSZ = MCBinaryExpr::createSub( 917 MCSymbolRefExpr::create(DescEnd, Context), 918 MCSymbolRefExpr::create(DescBegin, Context), Context); 919 920 EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA, 921 [&](MCELFStreamer &OS) { 922 OS.emitLabel(DescBegin); 923 OS.emitBytes(HSAMetadataString); 924 OS.emitLabel(DescEnd); 925 }); 926 return true; 927 } 928 929 bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) { 930 const uint32_t Encoded_s_code_end = 0xbf9f0000; 931 const uint32_t Encoded_s_nop = 0xbf800000; 932 uint32_t Encoded_pad = Encoded_s_code_end; 933 934 // Instruction cache line size in bytes. 935 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6; 936 const unsigned CacheLineSize = 1u << Log2CacheLineSize; 937 938 // Extra padding amount in bytes to support prefetch mode 3. 939 unsigned FillSize = 3 * CacheLineSize; 940 941 if (AMDGPU::isGFX90A(STI)) { 942 Encoded_pad = Encoded_s_nop; 943 FillSize = 16 * CacheLineSize; 944 } 945 946 MCStreamer &OS = getStreamer(); 947 OS.pushSection(); 948 OS.emitValueToAlignment(Align(CacheLineSize), Encoded_pad, 4); 949 for (unsigned I = 0; I < FillSize; I += 4) 950 OS.emitInt32(Encoded_pad); 951 OS.popSection(); 952 return true; 953 } 954 955 void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( 956 const MCSubtargetInfo &STI, StringRef KernelName, 957 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, 958 const MCExpr *NextSGPR, const MCExpr *ReserveVCC, 959 const MCExpr *ReserveFlatScr) { 960 auto &Streamer = getStreamer(); 961 auto &Context = Streamer.getContext(); 962 963 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>( 964 Context.getOrCreateSymbol(Twine(KernelName))); 965 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>( 966 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd"))); 967 968 // Copy kernel descriptor symbol's binding, other and visibility from the 969 // kernel code symbol. 970 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding()); 971 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther()); 972 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility()); 973 // Kernel descriptor symbol's type and size are fixed. 974 KernelDescriptorSymbol->setType(ELF::STT_OBJECT); 975 KernelDescriptorSymbol->setSize( 976 MCConstantExpr::create(sizeof(amdhsa::kernel_descriptor_t), Context)); 977 978 // The visibility of the kernel code symbol must be protected or less to allow 979 // static relocations from the kernel descriptor to be used. 980 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT) 981 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED); 982 983 Streamer.emitLabel(KernelDescriptorSymbol); 984 Streamer.emitValue( 985 KernelDescriptor.group_segment_fixed_size, 986 sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size)); 987 Streamer.emitValue( 988 KernelDescriptor.private_segment_fixed_size, 989 sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size)); 990 Streamer.emitValue(KernelDescriptor.kernarg_size, 991 sizeof(amdhsa::kernel_descriptor_t::kernarg_size)); 992 993 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i) 994 Streamer.emitInt8(0u); 995 996 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The 997 // expression being created is: 998 // (start of kernel code) - (start of kernel descriptor) 999 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. 1000 Streamer.emitValue( 1001 MCBinaryExpr::createSub( 1002 MCSymbolRefExpr::create(KernelCodeSymbol, 1003 MCSymbolRefExpr::VK_AMDGPU_REL64, Context), 1004 MCSymbolRefExpr::create(KernelDescriptorSymbol, 1005 MCSymbolRefExpr::VK_None, Context), 1006 Context), 1007 sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset)); 1008 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i) 1009 Streamer.emitInt8(0u); 1010 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc3, 1011 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3)); 1012 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc1, 1013 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1)); 1014 Streamer.emitValue(KernelDescriptor.compute_pgm_rsrc2, 1015 sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2)); 1016 Streamer.emitValue( 1017 KernelDescriptor.kernel_code_properties, 1018 sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties)); 1019 Streamer.emitValue(KernelDescriptor.kernarg_preload, 1020 sizeof(amdhsa::kernel_descriptor_t::kernarg_preload)); 1021 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i) 1022 Streamer.emitInt8(0u); 1023 } 1024