1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// AMDGPU HSA Metadata Streamer. 11 /// 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUHSAMetadataStreamer.h" 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "SIProgramInfo.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/MC/MCContext.h" 23 #include "llvm/MC/MCExpr.h" 24 using namespace llvm; 25 26 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, 27 const DataLayout &DL) { 28 Type *Ty = Arg.getType(); 29 MaybeAlign ArgAlign; 30 if (Arg.hasByRefAttr()) { 31 Ty = Arg.getParamByRefType(); 32 ArgAlign = Arg.getParamAlign(); 33 } 34 35 if (!ArgAlign) 36 ArgAlign = DL.getABITypeAlign(Ty); 37 38 return std::pair(Ty, *ArgAlign); 39 } 40 41 namespace llvm { 42 43 static cl::opt<bool> DumpHSAMetadata( 44 "amdgpu-dump-hsa-metadata", 45 cl::desc("Dump AMDGPU HSA Metadata")); 46 static cl::opt<bool> VerifyHSAMetadata( 47 "amdgpu-verify-hsa-metadata", 48 cl::desc("Verify AMDGPU HSA Metadata")); 49 50 namespace AMDGPU::HSAMD { 51 52 //===----------------------------------------------------------------------===// 53 // HSAMetadataStreamerV4 54 //===----------------------------------------------------------------------===// 55 56 void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { 57 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; 58 } 59 60 void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { 61 errs() << "AMDGPU HSA Metadata Parser Test: "; 62 63 msgpack::Document FromHSAMetadataString; 64 65 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) { 66 errs() << "FAIL\n"; 67 return; 68 } 69 70 std::string ToHSAMetadataString; 71 raw_string_ostream StrOS(ToHSAMetadataString); 72 FromHSAMetadataString.toYAML(StrOS); 73 74 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n'; 75 if (HSAMetadataString != ToHSAMetadataString) { 76 errs() << "Original input: " << HSAMetadataString << '\n' 77 << "Produced output: " << StrOS.str() << '\n'; 78 } 79 } 80 81 std::optional<StringRef> 82 MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { 83 return StringSwitch<std::optional<StringRef>>(AccQual) 84 .Case("read_only", StringRef("read_only")) 85 .Case("write_only", StringRef("write_only")) 86 .Case("read_write", StringRef("read_write")) 87 .Default(std::nullopt); 88 } 89 90 std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier( 91 unsigned AddressSpace) const { 92 switch (AddressSpace) { 93 case AMDGPUAS::PRIVATE_ADDRESS: 94 return StringRef("private"); 95 case AMDGPUAS::GLOBAL_ADDRESS: 96 return StringRef("global"); 97 case AMDGPUAS::CONSTANT_ADDRESS: 98 return StringRef("constant"); 99 case AMDGPUAS::LOCAL_ADDRESS: 100 return StringRef("local"); 101 case AMDGPUAS::FLAT_ADDRESS: 102 return StringRef("generic"); 103 case AMDGPUAS::REGION_ADDRESS: 104 return StringRef("region"); 105 default: 106 return std::nullopt; 107 } 108 } 109 110 StringRef 111 MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, 112 StringRef BaseTypeName) const { 113 if (TypeQual.contains("pipe")) 114 return "pipe"; 115 116 return StringSwitch<StringRef>(BaseTypeName) 117 .Case("image1d_t", "image") 118 .Case("image1d_array_t", "image") 119 .Case("image1d_buffer_t", "image") 120 .Case("image2d_t", "image") 121 .Case("image2d_array_t", "image") 122 .Case("image2d_array_depth_t", "image") 123 .Case("image2d_array_msaa_t", "image") 124 .Case("image2d_array_msaa_depth_t", "image") 125 .Case("image2d_depth_t", "image") 126 .Case("image2d_msaa_t", "image") 127 .Case("image2d_msaa_depth_t", "image") 128 .Case("image3d_t", "image") 129 .Case("sampler_t", "sampler") 130 .Case("queue_t", "queue") 131 .Default(isa<PointerType>(Ty) 132 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS 133 ? "dynamic_shared_pointer" 134 : "global_buffer") 135 : "by_value"); 136 } 137 138 std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, 139 bool Signed) const { 140 switch (Ty->getTypeID()) { 141 case Type::IntegerTyID: { 142 if (!Signed) 143 return (Twine('u') + getTypeName(Ty, true)).str(); 144 145 auto BitWidth = Ty->getIntegerBitWidth(); 146 switch (BitWidth) { 147 case 8: 148 return "char"; 149 case 16: 150 return "short"; 151 case 32: 152 return "int"; 153 case 64: 154 return "long"; 155 default: 156 return (Twine('i') + Twine(BitWidth)).str(); 157 } 158 } 159 case Type::HalfTyID: 160 return "half"; 161 case Type::FloatTyID: 162 return "float"; 163 case Type::DoubleTyID: 164 return "double"; 165 case Type::FixedVectorTyID: { 166 auto *VecTy = cast<FixedVectorType>(Ty); 167 auto *ElTy = VecTy->getElementType(); 168 auto NumElements = VecTy->getNumElements(); 169 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str(); 170 } 171 default: 172 return "unknown"; 173 } 174 } 175 176 msgpack::ArrayDocNode 177 MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { 178 auto Dims = HSAMetadataDoc->getArrayNode(); 179 if (Node->getNumOperands() != 3) 180 return Dims; 181 182 for (auto &Op : Node->operands()) 183 Dims.push_back(Dims.getDocument()->getNode( 184 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue()))); 185 return Dims; 186 } 187 188 void MetadataStreamerMsgPackV4::emitVersion() { 189 auto Version = HSAMetadataDoc->getArrayNode(); 190 Version.push_back(Version.getDocument()->getNode(VersionMajorV4)); 191 Version.push_back(Version.getDocument()->getNode(VersionMinorV4)); 192 getRootMetadata("amdhsa.version") = Version; 193 } 194 195 void MetadataStreamerMsgPackV4::emitTargetID( 196 const IsaInfo::AMDGPUTargetID &TargetID) { 197 getRootMetadata("amdhsa.target") = 198 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); 199 } 200 201 void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { 202 auto *Node = Mod.getNamedMetadata("llvm.printf.fmts"); 203 if (!Node) 204 return; 205 206 auto Printf = HSAMetadataDoc->getArrayNode(); 207 for (auto *Op : Node->operands()) 208 if (Op->getNumOperands()) 209 Printf.push_back(Printf.getDocument()->getNode( 210 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true)); 211 getRootMetadata("amdhsa.printf") = Printf; 212 } 213 214 void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, 215 msgpack::MapDocNode Kern) { 216 // TODO: What about other languages? 217 auto *Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); 218 if (!Node || !Node->getNumOperands()) 219 return; 220 auto *Op0 = Node->getOperand(0); 221 if (Op0->getNumOperands() <= 1) 222 return; 223 224 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C"); 225 auto LanguageVersion = Kern.getDocument()->getArrayNode(); 226 LanguageVersion.push_back(Kern.getDocument()->getNode( 227 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue())); 228 LanguageVersion.push_back(Kern.getDocument()->getNode( 229 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue())); 230 Kern[".language_version"] = LanguageVersion; 231 } 232 233 void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func, 234 msgpack::MapDocNode Kern) { 235 236 if (auto *Node = Func.getMetadata("reqd_work_group_size")) 237 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node); 238 if (auto *Node = Func.getMetadata("work_group_size_hint")) 239 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node); 240 if (auto *Node = Func.getMetadata("vec_type_hint")) { 241 Kern[".vec_type_hint"] = Kern.getDocument()->getNode( 242 getTypeName( 243 cast<ValueAsMetadata>(Node->getOperand(0))->getType(), 244 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()), 245 /*Copy=*/true); 246 } 247 if (Func.hasFnAttribute("runtime-handle")) { 248 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode( 249 Func.getFnAttribute("runtime-handle").getValueAsString().str(), 250 /*Copy=*/true); 251 } 252 if (Func.hasFnAttribute("device-init")) 253 Kern[".kind"] = Kern.getDocument()->getNode("init"); 254 else if (Func.hasFnAttribute("device-fini")) 255 Kern[".kind"] = Kern.getDocument()->getNode("fini"); 256 } 257 258 void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, 259 msgpack::MapDocNode Kern) { 260 auto &Func = MF.getFunction(); 261 unsigned Offset = 0; 262 auto Args = HSAMetadataDoc->getArrayNode(); 263 for (auto &Arg : Func.args()) { 264 if (Arg.hasAttribute("amdgpu-hidden-argument")) 265 continue; 266 267 emitKernelArg(Arg, Offset, Args); 268 } 269 270 emitHiddenKernelArgs(MF, Offset, Args); 271 272 Kern[".args"] = Args; 273 } 274 275 void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, 276 unsigned &Offset, 277 msgpack::ArrayDocNode Args) { 278 const auto *Func = Arg.getParent(); 279 auto ArgNo = Arg.getArgNo(); 280 const MDNode *Node; 281 282 StringRef Name; 283 Node = Func->getMetadata("kernel_arg_name"); 284 if (Node && ArgNo < Node->getNumOperands()) 285 Name = cast<MDString>(Node->getOperand(ArgNo))->getString(); 286 else if (Arg.hasName()) 287 Name = Arg.getName(); 288 289 StringRef TypeName; 290 Node = Func->getMetadata("kernel_arg_type"); 291 if (Node && ArgNo < Node->getNumOperands()) 292 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 293 294 StringRef BaseTypeName; 295 Node = Func->getMetadata("kernel_arg_base_type"); 296 if (Node && ArgNo < Node->getNumOperands()) 297 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString(); 298 299 StringRef ActAccQual; 300 // Do we really need NoAlias check here? 301 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) { 302 if (Arg.onlyReadsMemory()) 303 ActAccQual = "read_only"; 304 else if (Arg.hasAttribute(Attribute::WriteOnly)) 305 ActAccQual = "write_only"; 306 } 307 308 StringRef AccQual; 309 Node = Func->getMetadata("kernel_arg_access_qual"); 310 if (Node && ArgNo < Node->getNumOperands()) 311 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 312 313 StringRef TypeQual; 314 Node = Func->getMetadata("kernel_arg_type_qual"); 315 if (Node && ArgNo < Node->getNumOperands()) 316 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString(); 317 318 const DataLayout &DL = Func->getDataLayout(); 319 320 MaybeAlign PointeeAlign; 321 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); 322 323 // FIXME: Need to distinguish in memory alignment from pointer alignment. 324 if (auto *PtrTy = dyn_cast<PointerType>(Ty)) { 325 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 326 PointeeAlign = Arg.getParamAlign().valueOrOne(); 327 } 328 329 // There's no distinction between byval aggregates and raw aggregates. 330 Type *ArgTy; 331 Align ArgAlign; 332 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL); 333 334 emitKernelArg(DL, ArgTy, ArgAlign, 335 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args, 336 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, 337 AccQual, TypeQual); 338 } 339 340 void MetadataStreamerMsgPackV4::emitKernelArg( 341 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, 342 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, 343 StringRef Name, StringRef TypeName, StringRef BaseTypeName, 344 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) { 345 auto Arg = Args.getDocument()->getMapNode(); 346 347 if (!Name.empty()) 348 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true); 349 if (!TypeName.empty()) 350 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true); 351 auto Size = DL.getTypeAllocSize(Ty); 352 Arg[".size"] = Arg.getDocument()->getNode(Size); 353 Offset = alignTo(Offset, Alignment); 354 Arg[".offset"] = Arg.getDocument()->getNode(Offset); 355 Offset += Size; 356 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true); 357 if (PointeeAlign) 358 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value()); 359 360 if (auto *PtrTy = dyn_cast<PointerType>(Ty)) 361 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace())) 362 // Limiting address space to emit only for a certain ValueKind. 363 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer") 364 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, 365 /*Copy=*/true); 366 367 if (auto AQ = getAccessQualifier(AccQual)) 368 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true); 369 370 if (auto AAQ = getAccessQualifier(ActAccQual)) 371 Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true); 372 373 SmallVector<StringRef, 1> SplitTypeQuals; 374 TypeQual.split(SplitTypeQuals, " ", -1, false); 375 for (StringRef Key : SplitTypeQuals) { 376 if (Key == "const") 377 Arg[".is_const"] = Arg.getDocument()->getNode(true); 378 else if (Key == "restrict") 379 Arg[".is_restrict"] = Arg.getDocument()->getNode(true); 380 else if (Key == "volatile") 381 Arg[".is_volatile"] = Arg.getDocument()->getNode(true); 382 else if (Key == "pipe") 383 Arg[".is_pipe"] = Arg.getDocument()->getNode(true); 384 } 385 386 Args.push_back(Arg); 387 } 388 389 void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( 390 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 391 auto &Func = MF.getFunction(); 392 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 393 394 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); 395 if (!HiddenArgNumBytes) 396 return; 397 398 const Module *M = Func.getParent(); 399 auto &DL = M->getDataLayout(); 400 auto *Int64Ty = Type::getInt64Ty(Func.getContext()); 401 402 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 403 404 if (HiddenArgNumBytes >= 8) 405 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, 406 Args); 407 if (HiddenArgNumBytes >= 16) 408 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, 409 Args); 410 if (HiddenArgNumBytes >= 24) 411 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, 412 Args); 413 414 auto *Int8PtrTy = 415 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 416 417 if (HiddenArgNumBytes >= 32) { 418 // We forbid the use of features requiring hostcall when compiling OpenCL 419 // before code object V5, which makes the mutual exclusion between the 420 // "printf buffer" and "hostcall buffer" here sound. 421 if (M->getNamedMetadata("llvm.printf.fmts")) 422 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 423 Args); 424 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) 425 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 426 Args); 427 else 428 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 429 } 430 431 // Emit "default queue" and "completion action" arguments if enqueue kernel is 432 // used, otherwise emit dummy "none" arguments. 433 if (HiddenArgNumBytes >= 40) { 434 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 435 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 436 Args); 437 } else { 438 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 439 } 440 } 441 442 if (HiddenArgNumBytes >= 48) { 443 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 444 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 445 Args); 446 } else { 447 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 448 } 449 } 450 451 // Emit the pointer argument for multi-grid object. 452 if (HiddenArgNumBytes >= 56) { 453 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 454 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 455 Args); 456 } else { 457 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); 458 } 459 } 460 } 461 462 msgpack::MapDocNode 463 MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, 464 const SIProgramInfo &ProgramInfo, 465 unsigned CodeObjectVersion) const { 466 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); 467 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 468 const Function &F = MF.getFunction(); 469 470 auto Kern = HSAMetadataDoc->getMapNode(); 471 472 Align MaxKernArgAlign; 473 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode( 474 STM.getKernArgSegmentSize(F, MaxKernArgAlign)); 475 Kern[".group_segment_fixed_size"] = 476 Kern.getDocument()->getNode(ProgramInfo.LDSSize); 477 DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"], 478 msgpack::Type::UInt, ProgramInfo.ScratchSize); 479 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) { 480 DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"], 481 msgpack::Type::Boolean, 482 ProgramInfo.DynamicCallStack); 483 } 484 485 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) 486 Kern[".workgroup_processor_mode"] = 487 Kern.getDocument()->getNode(ProgramInfo.WgpMode); 488 489 // FIXME: The metadata treats the minimum as 16? 490 Kern[".kernarg_segment_align"] = 491 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); 492 Kern[".wavefront_size"] = 493 Kern.getDocument()->getNode(STM.getWavefrontSize()); 494 DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt, 495 ProgramInfo.NumSGPR); 496 DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt, 497 ProgramInfo.NumVGPR); 498 499 // Only add AGPR count to metadata for supported devices 500 if (STM.hasMAIInsts()) { 501 DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt, 502 ProgramInfo.NumAccVGPR); 503 } 504 505 Kern[".max_flat_workgroup_size"] = 506 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize()); 507 508 uint32_t NumWGY = MFI.getMaxNumWorkGroupsY(); 509 uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ(); 510 uint32_t NumWGX = MFI.getMaxNumWorkGroupsX(); 511 512 // TODO: Should consider 0 invalid and reject in IR verifier. 513 if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0) 514 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX); 515 516 if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0) 517 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY); 518 519 if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0) 520 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ); 521 522 Kern[".sgpr_spill_count"] = 523 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs()); 524 Kern[".vgpr_spill_count"] = 525 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs()); 526 527 return Kern; 528 } 529 530 bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { 531 DelayedExprs->resolveDelayedExpressions(); 532 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true); 533 } 534 535 void MetadataStreamerMsgPackV4::begin(const Module &Mod, 536 const IsaInfo::AMDGPUTargetID &TargetID) { 537 emitVersion(); 538 emitTargetID(TargetID); 539 emitPrintf(Mod); 540 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode(); 541 DelayedExprs->clear(); 542 } 543 544 void MetadataStreamerMsgPackV4::end() { 545 DelayedExprs->resolveDelayedExpressions(); 546 std::string HSAMetadataString; 547 raw_string_ostream StrOS(HSAMetadataString); 548 HSAMetadataDoc->toYAML(StrOS); 549 550 if (DumpHSAMetadata) 551 dump(StrOS.str()); 552 if (VerifyHSAMetadata) 553 verify(StrOS.str()); 554 } 555 556 void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, 557 const SIProgramInfo &ProgramInfo) { 558 auto &Func = MF.getFunction(); 559 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && 560 Func.getCallingConv() != CallingConv::SPIR_KERNEL) 561 return; 562 563 auto CodeObjectVersion = 564 AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent()); 565 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); 566 567 auto Kernels = 568 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true); 569 570 { 571 Kern[".name"] = Kern.getDocument()->getNode(Func.getName()); 572 Kern[".symbol"] = Kern.getDocument()->getNode( 573 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true); 574 emitKernelLanguage(Func, Kern); 575 emitKernelAttrs(Func, Kern); 576 emitKernelArgs(MF, Kern); 577 } 578 579 Kernels.push_back(Kern); 580 } 581 582 //===----------------------------------------------------------------------===// 583 // HSAMetadataStreamerV5 584 //===----------------------------------------------------------------------===// 585 586 void MetadataStreamerMsgPackV5::emitVersion() { 587 auto Version = HSAMetadataDoc->getArrayNode(); 588 Version.push_back(Version.getDocument()->getNode(VersionMajorV5)); 589 Version.push_back(Version.getDocument()->getNode(VersionMinorV5)); 590 getRootMetadata("amdhsa.version") = Version; 591 } 592 593 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( 594 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { 595 auto &Func = MF.getFunction(); 596 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 597 598 // No implicit kernel argument is used. 599 if (ST.getImplicitArgNumBytes(Func) == 0) 600 return; 601 602 const Module *M = Func.getParent(); 603 auto &DL = M->getDataLayout(); 604 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 605 606 auto *Int64Ty = Type::getInt64Ty(Func.getContext()); 607 auto *Int32Ty = Type::getInt32Ty(Func.getContext()); 608 auto *Int16Ty = Type::getInt16Ty(Func.getContext()); 609 610 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); 611 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args); 612 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args); 613 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args); 614 615 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args); 616 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args); 617 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args); 618 619 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args); 620 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args); 621 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args); 622 623 // Reserved for hidden_tool_correlation_id. 624 Offset += 8; 625 626 Offset += 8; // Reserved. 627 628 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args); 629 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args); 630 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args); 631 632 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args); 633 634 Offset += 6; // Reserved. 635 auto *Int8PtrTy = 636 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); 637 638 if (M->getNamedMetadata("llvm.printf.fmts")) { 639 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, 640 Args); 641 } else { 642 Offset += 8; // Skipped. 643 } 644 645 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { 646 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, 647 Args); 648 } else { 649 Offset += 8; // Skipped. 650 } 651 652 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) { 653 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset, 654 Args); 655 } else { 656 Offset += 8; // Skipped. 657 } 658 659 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr")) 660 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args); 661 else 662 Offset += 8; // Skipped. 663 664 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) { 665 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset, 666 Args); 667 } else { 668 Offset += 8; // Skipped. 669 } 670 671 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) { 672 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset, 673 Args); 674 } else { 675 Offset += 8; // Skipped. 676 } 677 678 // Emit argument for hidden dynamic lds size 679 if (MFI.isDynamicLDSUsed()) { 680 emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset, 681 Args); 682 } else { 683 Offset += 4; // skipped 684 } 685 686 Offset += 68; // Reserved. 687 688 // hidden_private_base and hidden_shared_base are only when the subtarget has 689 // ApertureRegs. 690 if (!ST.hasApertureRegs()) { 691 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args); 692 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args); 693 } else { 694 Offset += 8; // Skipped. 695 } 696 697 if (MFI.getUserSGPRInfo().hasQueuePtr()) 698 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args); 699 } 700 701 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, 702 msgpack::MapDocNode Kern) { 703 MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern); 704 705 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool()) 706 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1); 707 } 708 709 //===----------------------------------------------------------------------===// 710 // HSAMetadataStreamerV6 711 //===----------------------------------------------------------------------===// 712 713 void MetadataStreamerMsgPackV6::emitVersion() { 714 auto Version = HSAMetadataDoc->getArrayNode(); 715 Version.push_back(Version.getDocument()->getNode(VersionMajorV6)); 716 Version.push_back(Version.getDocument()->getNode(VersionMinorV6)); 717 getRootMetadata("amdhsa.version") = Version; 718 } 719 720 } // end namespace AMDGPU::HSAMD 721 } // end namespace llvm 722