1 //===- AMDKernelCodeTUtils.cpp --------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file - utility functions to parse/print AMDGPUMCKernelCodeT structure 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDKernelCodeTUtils.h" 14 #include "AMDKernelCodeT.h" 15 #include "SIDefines.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "llvm/ADT/IndexedMap.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCExpr.h" 21 #include "llvm/MC/MCParser/MCAsmLexer.h" 22 #include "llvm/MC/MCParser/MCAsmParser.h" 23 #include "llvm/MC/MCStreamer.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace llvm; 28 using namespace llvm::AMDGPU; 29 30 // Generates the following for AMDGPUMCKernelCodeT struct members: 31 // - HasMemberXXXXX class 32 // A check to see if AMDGPUMCKernelCodeT has a specific member so it can 33 // determine which of the original amd_kernel_code_t members are duplicated 34 // (if the names don't match, the table driven strategy won't work). 35 // - IsMCExprXXXXX class 36 // Check whether a AMDGPUMCKernelcodeT struct member is MCExpr-ified or not. 37 // - GetMemberXXXXX class 38 // A retrieval helper for said member (of type const MCExpr *&). Will return 39 // a `Phony` const MCExpr * initialized to nullptr to preserve reference 40 // returns. 41 #define GEN_HAS_MEMBER(member) \ 42 class HasMember##member { \ 43 private: \ 44 struct KnownWithMember { \ 45 int member; \ 46 }; \ 47 class AmbiguousDerived : public AMDGPUMCKernelCodeT, \ 48 public KnownWithMember {}; \ 49 template <typename U> \ 50 static constexpr std::false_type Test(decltype(U::member) *); \ 51 template <typename U> static constexpr std::true_type Test(...); \ 52 \ 53 public: \ 54 static constexpr bool RESULT = \ 55 std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)), \ 56 std::true_type>; \ 57 }; \ 58 class IsMCExpr##member { \ 59 template <typename U, \ 60 typename std::enable_if_t< \ 61 HasMember##member::RESULT && \ 62 std::is_same_v<decltype(U::member), const MCExpr *>, \ 63 U> * = nullptr> \ 64 static constexpr std::true_type HasMCExprType(decltype(U::member) *); \ 65 template <typename U> static constexpr std::false_type HasMCExprType(...); \ 66 \ 67 public: \ 68 static constexpr bool RESULT = \ 69 std::is_same_v<decltype(HasMCExprType<AMDGPUMCKernelCodeT>(nullptr)), \ 70 std::true_type>; \ 71 }; \ 72 class GetMember##member { \ 73 public: \ 74 static const MCExpr *Phony; \ 75 template <typename U, typename std::enable_if_t<IsMCExpr##member::RESULT, \ 76 U> * = nullptr> \ 77 static const MCExpr *&Get(U &C) { \ 78 assert(IsMCExpr##member::RESULT && \ 79 "Trying to retrieve member that does not exist."); \ 80 return C.member; \ 81 } \ 82 template <typename U, typename std::enable_if_t<!IsMCExpr##member::RESULT, \ 83 U> * = nullptr> \ 84 static const MCExpr *&Get(U &C) { \ 85 return Phony; \ 86 } \ 87 }; \ 88 const MCExpr *GetMember##member::Phony = nullptr; 89 90 // Cannot generate class declarations using the table driver approach (see table 91 // in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually 92 // added to the table, an error should occur when trying to retrieve the table 93 // in getMCExprIndexTable. 94 GEN_HAS_MEMBER(amd_code_version_major) 95 GEN_HAS_MEMBER(amd_code_version_minor) 96 GEN_HAS_MEMBER(amd_machine_kind) 97 GEN_HAS_MEMBER(amd_machine_version_major) 98 GEN_HAS_MEMBER(amd_machine_version_minor) 99 GEN_HAS_MEMBER(amd_machine_version_stepping) 100 101 GEN_HAS_MEMBER(kernel_code_entry_byte_offset) 102 GEN_HAS_MEMBER(kernel_code_prefetch_byte_size) 103 104 GEN_HAS_MEMBER(granulated_workitem_vgpr_count) 105 GEN_HAS_MEMBER(granulated_wavefront_sgpr_count) 106 GEN_HAS_MEMBER(priority) 107 GEN_HAS_MEMBER(float_mode) 108 GEN_HAS_MEMBER(priv) 109 GEN_HAS_MEMBER(enable_dx10_clamp) 110 GEN_HAS_MEMBER(debug_mode) 111 GEN_HAS_MEMBER(enable_ieee_mode) 112 GEN_HAS_MEMBER(enable_wgp_mode) 113 GEN_HAS_MEMBER(enable_mem_ordered) 114 GEN_HAS_MEMBER(enable_fwd_progress) 115 116 GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset) 117 GEN_HAS_MEMBER(user_sgpr_count) 118 GEN_HAS_MEMBER(enable_trap_handler) 119 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x) 120 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y) 121 GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z) 122 GEN_HAS_MEMBER(enable_sgpr_workgroup_info) 123 GEN_HAS_MEMBER(enable_vgpr_workitem_id) 124 GEN_HAS_MEMBER(enable_exception_msb) 125 GEN_HAS_MEMBER(granulated_lds_size) 126 GEN_HAS_MEMBER(enable_exception) 127 128 GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer) 129 GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr) 130 GEN_HAS_MEMBER(enable_sgpr_queue_ptr) 131 GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr) 132 GEN_HAS_MEMBER(enable_sgpr_dispatch_id) 133 GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init) 134 GEN_HAS_MEMBER(enable_sgpr_private_segment_size) 135 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x) 136 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y) 137 GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z) 138 GEN_HAS_MEMBER(enable_wavefront_size32) 139 GEN_HAS_MEMBER(enable_ordered_append_gds) 140 GEN_HAS_MEMBER(private_element_size) 141 GEN_HAS_MEMBER(is_ptr64) 142 GEN_HAS_MEMBER(is_dynamic_callstack) 143 GEN_HAS_MEMBER(is_debug_enabled) 144 GEN_HAS_MEMBER(is_xnack_enabled) 145 146 GEN_HAS_MEMBER(workitem_private_segment_byte_size) 147 GEN_HAS_MEMBER(workgroup_group_segment_byte_size) 148 GEN_HAS_MEMBER(gds_segment_byte_size) 149 GEN_HAS_MEMBER(kernarg_segment_byte_size) 150 GEN_HAS_MEMBER(workgroup_fbarrier_count) 151 GEN_HAS_MEMBER(wavefront_sgpr_count) 152 GEN_HAS_MEMBER(workitem_vgpr_count) 153 GEN_HAS_MEMBER(reserved_vgpr_first) 154 GEN_HAS_MEMBER(reserved_vgpr_count) 155 GEN_HAS_MEMBER(reserved_sgpr_first) 156 GEN_HAS_MEMBER(reserved_sgpr_count) 157 GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr) 158 GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr) 159 GEN_HAS_MEMBER(kernarg_segment_alignment) 160 GEN_HAS_MEMBER(group_segment_alignment) 161 GEN_HAS_MEMBER(private_segment_alignment) 162 GEN_HAS_MEMBER(wavefront_size) 163 GEN_HAS_MEMBER(call_convention) 164 GEN_HAS_MEMBER(runtime_loader_kernel_symbol) 165 166 static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldNames() { 167 static constexpr StringLiteral const Table[] = { 168 "", // not found placeholder 169 #define RECORD(name, altName, print, parse) #name 170 #include "Utils/AMDKernelCodeTInfo.h" 171 #undef RECORD 172 }; 173 return ArrayRef(Table); 174 } 175 176 static ArrayRef<StringLiteral> get_amd_kernel_code_t_FldAltNames() { 177 static constexpr StringLiteral const Table[] = { 178 "", // not found placeholder 179 #define RECORD(name, altName, print, parse) #altName 180 #include "Utils/AMDKernelCodeTInfo.h" 181 #undef RECORD 182 }; 183 return ArrayRef(Table); 184 } 185 186 static ArrayRef<bool> hasMCExprVersionTable() { 187 static bool const Table[] = { 188 #define RECORD(name, altName, print, parse) (IsMCExpr##name::RESULT) 189 #include "Utils/AMDKernelCodeTInfo.h" 190 #undef RECORD 191 }; 192 return ArrayRef(Table); 193 } 194 195 using RetrieveFx = const MCExpr *&(*)(AMDGPUMCKernelCodeT &); 196 197 static ArrayRef<RetrieveFx> getMCExprIndexTable() { 198 static const RetrieveFx Table[] = { 199 #define RECORD(name, altName, print, parse) GetMember##name::Get 200 #include "Utils/AMDKernelCodeTInfo.h" 201 #undef RECORD 202 }; 203 return ArrayRef(Table); 204 } 205 206 static StringMap<int> createIndexMap(ArrayRef<StringLiteral> names, 207 ArrayRef<StringLiteral> altNames) { 208 StringMap<int> map; 209 assert(names.size() == altNames.size()); 210 for (unsigned i = 0; i < names.size(); ++i) { 211 map.insert(std::pair(names[i], i)); 212 map.insert(std::pair(altNames[i], i)); 213 } 214 return map; 215 } 216 217 static int get_amd_kernel_code_t_FieldIndex(StringRef name) { 218 static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(), 219 get_amd_kernel_code_t_FldAltNames()); 220 return map.lookup(name) - 1; // returns -1 if not found 221 } 222 223 static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) { 224 unsigned Shift = 0; 225 unsigned Mask = 0; 226 227 Mask = ~Value; 228 for (; !(Mask & 1); Shift++, Mask >>= 1) { 229 } 230 231 return std::make_pair(Shift, Mask); 232 } 233 234 static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask, 235 uint32_t Shift, MCContext &Ctx) { 236 if (Mask) { 237 const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); 238 Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); 239 } 240 if (Shift) { 241 const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); 242 Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx); 243 } 244 return Val; 245 } 246 247 static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask, 248 uint32_t Shift, MCContext &Ctx) { 249 if (Shift) { 250 const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); 251 Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx); 252 } 253 if (Mask) { 254 const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); 255 Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); 256 } 257 return Val; 258 } 259 260 class PrintField { 261 public: 262 template <typename T, T AMDGPUMCKernelCodeT::*ptr, 263 typename std::enable_if_t<!std::is_integral_v<T>, T> * = nullptr> 264 static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C, 265 raw_ostream &OS, MCContext &Ctx) { 266 OS << Name << " = "; 267 const MCExpr *Value = C.*ptr; 268 int64_t Val; 269 if (Value->evaluateAsAbsolute(Val)) 270 OS << Val; 271 else 272 Value->print(OS, Ctx.getAsmInfo()); 273 } 274 275 template <typename T, T AMDGPUMCKernelCodeT::*ptr, 276 typename std::enable_if_t<std::is_integral_v<T>, T> * = nullptr> 277 static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C, 278 raw_ostream &OS, MCContext &) { 279 OS << Name << " = " << (int)(C.*ptr); 280 } 281 }; 282 283 template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1> 284 static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C, 285 raw_ostream &OS, MCContext &) { 286 const auto Mask = (static_cast<T>(1) << width) - 1; 287 OS << Name << " = " << (int)((C.*ptr >> shift) & Mask); 288 } 289 290 using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &, 291 MCContext &); 292 293 static ArrayRef<PrintFx> getPrinterTable() { 294 static const PrintFx Table[] = { 295 #define COMPPGM1(name, aname, AccMacro) \ 296 COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0) 297 #define COMPPGM2(name, aname, AccMacro) \ 298 COMPPGM(name, aname, C_00B84C_##AccMacro, S_00B84C_##AccMacro, 32) 299 #define PRINTFIELD(sname, aname, name) PrintField::printField<FLD_T(name)> 300 #define PRINTCOMP(Complement, PGMType) \ 301 [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \ 302 MCContext &Ctx) { \ 303 OS << Name << " = "; \ 304 auto [Shift, Mask] = getShiftMask(Complement); \ 305 const MCExpr *Value; \ 306 if (PGMType == 0) { \ 307 Value = \ 308 MaskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \ 309 } else { \ 310 Value = \ 311 MaskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \ 312 } \ 313 int64_t Val; \ 314 if (Value->evaluateAsAbsolute(Val)) \ 315 OS << Val; \ 316 else \ 317 Value->print(OS, Ctx.getAsmInfo()); \ 318 } 319 #define RECORD(name, altName, print, parse) print 320 #include "Utils/AMDKernelCodeTInfo.h" 321 #undef RECORD 322 }; 323 return ArrayRef(Table); 324 } 325 326 static bool expectAbsExpression(MCAsmParser &MCParser, int64_t &Value, 327 raw_ostream &Err) { 328 329 if (MCParser.getLexer().isNot(AsmToken::Equal)) { 330 Err << "expected '='"; 331 return false; 332 } 333 MCParser.getLexer().Lex(); 334 335 if (MCParser.parseAbsoluteExpression(Value)) { 336 Err << "integer absolute expression expected"; 337 return false; 338 } 339 return true; 340 } 341 342 template <typename T, T AMDGPUMCKernelCodeT::*ptr> 343 static bool parseField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, 344 raw_ostream &Err) { 345 int64_t Value = 0; 346 if (!expectAbsExpression(MCParser, Value, Err)) 347 return false; 348 C.*ptr = (T)Value; 349 return true; 350 } 351 352 template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1> 353 static bool parseBitField(AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, 354 raw_ostream &Err) { 355 int64_t Value = 0; 356 if (!expectAbsExpression(MCParser, Value, Err)) 357 return false; 358 const uint64_t Mask = ((UINT64_C(1) << width) - 1) << shift; 359 C.*ptr &= (T)~Mask; 360 C.*ptr |= (T)((Value << shift) & Mask); 361 return true; 362 } 363 364 static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, 365 raw_ostream &Err) { 366 if (MCParser.getLexer().isNot(AsmToken::Equal)) { 367 Err << "expected '='"; 368 return false; 369 } 370 MCParser.getLexer().Lex(); 371 372 if (MCParser.parseExpression(Value)) { 373 Err << "Could not parse expression"; 374 return false; 375 } 376 return true; 377 } 378 379 using ParseFx = bool (*)(AMDGPUMCKernelCodeT &, MCAsmParser &, raw_ostream &); 380 381 static ArrayRef<ParseFx> getParserTable() { 382 static const ParseFx Table[] = { 383 #define COMPPGM1(name, aname, AccMacro) \ 384 COMPPGM(name, aname, G_00B848_##AccMacro, C_00B848_##AccMacro, 0) 385 #define COMPPGM2(name, aname, AccMacro) \ 386 COMPPGM(name, aname, G_00B84C_##AccMacro, C_00B84C_##AccMacro, 32) 387 #define PARSECOMP(Complement, PGMType) \ 388 [](AMDGPUMCKernelCodeT &C, MCAsmParser &MCParser, \ 389 raw_ostream &Err) -> bool { \ 390 MCContext &Ctx = MCParser.getContext(); \ 391 const MCExpr *Value; \ 392 if (!parseExpr(MCParser, Value, Err)) \ 393 return false; \ 394 auto [Shift, Mask] = getShiftMask(Complement); \ 395 Value = MaskShiftSet(Value, Mask, Shift, Ctx); \ 396 const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \ 397 if (PGMType == 0) { \ 398 C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \ 399 C.compute_pgm_resource1_registers, Compl, Ctx); \ 400 C.compute_pgm_resource1_registers = MCBinaryExpr::createOr( \ 401 C.compute_pgm_resource1_registers, Value, Ctx); \ 402 } else { \ 403 C.compute_pgm_resource2_registers = MCBinaryExpr::createAnd( \ 404 C.compute_pgm_resource2_registers, Compl, Ctx); \ 405 C.compute_pgm_resource2_registers = MCBinaryExpr::createOr( \ 406 C.compute_pgm_resource2_registers, Value, Ctx); \ 407 } \ 408 return true; \ 409 } 410 #define RECORD(name, altName, print, parse) parse 411 #include "Utils/AMDKernelCodeTInfo.h" 412 #undef RECORD 413 }; 414 return ArrayRef(Table); 415 } 416 417 static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex, 418 raw_ostream &OS, MCContext &Ctx) { 419 auto Printer = getPrinterTable()[FldIndex]; 420 if (Printer) 421 Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx); 422 } 423 424 void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI, 425 MCContext &Ctx, bool InitMCExpr) { 426 AMDGPUMCKernelCodeT(); 427 428 AMDGPU::initDefaultAMDKernelCodeT(*this, STI); 429 430 if (InitMCExpr) { 431 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx); 432 compute_pgm_resource1_registers = 433 MCConstantExpr::create(Lo_32(compute_pgm_resource_registers), Ctx); 434 compute_pgm_resource2_registers = 435 MCConstantExpr::create(Hi_32(compute_pgm_resource_registers), Ctx); 436 is_dynamic_callstack = ZeroExpr; 437 wavefront_sgpr_count = ZeroExpr; 438 workitem_vgpr_count = ZeroExpr; 439 workitem_private_segment_byte_size = ZeroExpr; 440 } 441 } 442 443 void AMDGPUMCKernelCodeT::validate(const MCSubtargetInfo *STI, MCContext &Ctx) { 444 int64_t Value; 445 if (!compute_pgm_resource1_registers->evaluateAsAbsolute(Value)) 446 return; 447 448 if (G_00B848_DX10_CLAMP(Value) && AMDGPU::isGFX12Plus(*STI)) { 449 Ctx.reportError({}, "enable_dx10_clamp=1 is not allowed on GFX12+"); 450 return; 451 } 452 453 if (G_00B848_IEEE_MODE(Value) && AMDGPU::isGFX12Plus(*STI)) { 454 Ctx.reportError({}, "enable_ieee_mode=1 is not allowed on GFX12+"); 455 return; 456 } 457 458 if (G_00B848_WGP_MODE(Value) && !AMDGPU::isGFX10Plus(*STI)) { 459 Ctx.reportError({}, "enable_wgp_mode=1 is only allowed on GFX10+"); 460 return; 461 } 462 463 if (G_00B848_MEM_ORDERED(Value) && !AMDGPU::isGFX10Plus(*STI)) { 464 Ctx.reportError({}, "enable_mem_ordered=1 is only allowed on GFX10+"); 465 return; 466 } 467 468 if (G_00B848_FWD_PROGRESS(Value) && !AMDGPU::isGFX10Plus(*STI)) { 469 Ctx.reportError({}, "enable_fwd_progress=1 is only allowed on GFX10+"); 470 return; 471 } 472 } 473 474 const MCExpr *&AMDGPUMCKernelCodeT::getMCExprForIndex(int Index) { 475 static const auto IndexTable = getMCExprIndexTable(); 476 return IndexTable[Index](*this); 477 } 478 479 bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, 480 raw_ostream &Err) { 481 const int Idx = get_amd_kernel_code_t_FieldIndex(ID); 482 if (Idx < 0) { 483 Err << "unexpected amd_kernel_code_t field name " << ID; 484 return false; 485 } 486 487 if (hasMCExprVersionTable()[Idx]) { 488 const MCExpr *Value; 489 if (!parseExpr(MCParser, Value, Err)) 490 return false; 491 getMCExprForIndex(Idx) = Value; 492 return true; 493 } 494 auto Parser = getParserTable()[Idx]; 495 return Parser ? Parser(*this, MCParser, Err) : false; 496 } 497 498 void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) { 499 const int Size = hasMCExprVersionTable().size(); 500 for (int i = 0; i < Size; ++i) { 501 OS << "\t\t"; 502 if (hasMCExprVersionTable()[i]) { 503 OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = "; 504 int64_t Val; 505 const MCExpr *Value = getMCExprForIndex(i); 506 if (Value->evaluateAsAbsolute(Val)) 507 OS << Val; 508 else 509 Value->print(OS, Ctx.getAsmInfo()); 510 } else { 511 printAmdKernelCodeField(*this, i, OS, Ctx); 512 } 513 OS << '\n'; 514 } 515 } 516 517 void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) { 518 OS.emitIntValue(amd_kernel_code_version_major, /*Size=*/4); 519 OS.emitIntValue(amd_kernel_code_version_minor, /*Size=*/4); 520 OS.emitIntValue(amd_machine_kind, /*Size=*/2); 521 OS.emitIntValue(amd_machine_version_major, /*Size=*/2); 522 OS.emitIntValue(amd_machine_version_minor, /*Size=*/2); 523 OS.emitIntValue(amd_machine_version_stepping, /*Size=*/2); 524 OS.emitIntValue(kernel_code_entry_byte_offset, /*Size=*/8); 525 OS.emitIntValue(kernel_code_prefetch_byte_offset, /*Size=*/8); 526 OS.emitIntValue(kernel_code_prefetch_byte_size, /*Size=*/8); 527 OS.emitIntValue(reserved0, /*Size=*/8); 528 529 if (compute_pgm_resource1_registers != nullptr) 530 OS.emitValue(compute_pgm_resource1_registers, /*Size=*/4); 531 else 532 OS.emitIntValue(Lo_32(compute_pgm_resource_registers), 533 /*Size=*/4); 534 535 if (compute_pgm_resource2_registers != nullptr) 536 OS.emitValue(compute_pgm_resource2_registers, /*Size=*/4); 537 else 538 OS.emitIntValue(Hi_32(compute_pgm_resource_registers), 539 /*Size=*/4); 540 541 if (is_dynamic_callstack != nullptr) { 542 const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx); 543 CodeProps = MCBinaryExpr::createOr( 544 CodeProps, 545 MaskShiftSet(is_dynamic_callstack, 546 (1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1, 547 AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx), 548 Ctx); 549 OS.emitValue(CodeProps, /*Size=*/4); 550 } else 551 OS.emitIntValue(code_properties, /*Size=*/4); 552 553 if (workitem_private_segment_byte_size != nullptr) 554 OS.emitValue(workitem_private_segment_byte_size, /*Size=*/4); 555 else 556 OS.emitIntValue(0, /*Size=*/4); 557 558 OS.emitIntValue(workgroup_group_segment_byte_size, /*Size=*/4); 559 OS.emitIntValue(gds_segment_byte_size, /*Size=*/4); 560 OS.emitIntValue(kernarg_segment_byte_size, /*Size=*/8); 561 OS.emitIntValue(workgroup_fbarrier_count, /*Size=*/4); 562 563 if (wavefront_sgpr_count != nullptr) 564 OS.emitValue(wavefront_sgpr_count, /*Size=*/2); 565 else 566 OS.emitIntValue(0, /*Size=*/2); 567 568 if (workitem_vgpr_count != nullptr) 569 OS.emitValue(workitem_vgpr_count, /*Size=*/2); 570 else 571 OS.emitIntValue(0, /*Size=*/2); 572 573 OS.emitIntValue(reserved_vgpr_first, /*Size=*/2); 574 OS.emitIntValue(reserved_vgpr_count, /*Size=*/2); 575 OS.emitIntValue(reserved_sgpr_first, /*Size=*/2); 576 OS.emitIntValue(reserved_sgpr_count, /*Size=*/2); 577 OS.emitIntValue(debug_wavefront_private_segment_offset_sgpr, 578 /*Size=*/2); 579 OS.emitIntValue(debug_private_segment_buffer_sgpr, /*Size=*/2); 580 OS.emitIntValue(kernarg_segment_alignment, /*Size=*/1); 581 OS.emitIntValue(group_segment_alignment, /*Size=*/1); 582 OS.emitIntValue(private_segment_alignment, /*Size=*/1); 583 OS.emitIntValue(wavefront_size, /*Size=*/1); 584 585 OS.emitIntValue(call_convention, /*Size=*/4); 586 OS.emitBytes(StringRef((const char *)reserved3, /*Size=*/12)); 587 OS.emitIntValue(runtime_loader_kernel_symbol, /*Size=*/8); 588 OS.emitBytes(StringRef((const char *)control_directives, /*Size=*/16 * 8)); 589 } 590