xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (revision 0b40f979298a2e7d4c3da7c067fc9747d0f93653)
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 using namespace llvm;
25 
26 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
27                                                      const DataLayout &DL) {
28   Type *Ty = Arg.getType();
29   MaybeAlign ArgAlign;
30   if (Arg.hasByRefAttr()) {
31     Ty = Arg.getParamByRefType();
32     ArgAlign = Arg.getParamAlign();
33   }
34 
35   if (!ArgAlign)
36     ArgAlign = DL.getABITypeAlign(Ty);
37 
38   return std::pair(Ty, *ArgAlign);
39 }
40 
41 namespace llvm {
42 
43 static cl::opt<bool> DumpHSAMetadata(
44     "amdgpu-dump-hsa-metadata",
45     cl::desc("Dump AMDGPU HSA Metadata"));
46 static cl::opt<bool> VerifyHSAMetadata(
47     "amdgpu-verify-hsa-metadata",
48     cl::desc("Verify AMDGPU HSA Metadata"));
49 
50 namespace AMDGPU::HSAMD {
51 
52 //===----------------------------------------------------------------------===//
53 // HSAMetadataStreamerV4
54 //===----------------------------------------------------------------------===//
55 
56 void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
57   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
58 }
59 
60 void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
61   errs() << "AMDGPU HSA Metadata Parser Test: ";
62 
63   msgpack::Document FromHSAMetadataString;
64 
65   if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
66     errs() << "FAIL\n";
67     return;
68   }
69 
70   std::string ToHSAMetadataString;
71   raw_string_ostream StrOS(ToHSAMetadataString);
72   FromHSAMetadataString.toYAML(StrOS);
73 
74   errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
75   if (HSAMetadataString != ToHSAMetadataString) {
76     errs() << "Original input: " << HSAMetadataString << '\n'
77            << "Produced output: " << StrOS.str() << '\n';
78   }
79 }
80 
81 std::optional<StringRef>
82 MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
83   return StringSwitch<std::optional<StringRef>>(AccQual)
84       .Case("read_only", StringRef("read_only"))
85       .Case("write_only", StringRef("write_only"))
86       .Case("read_write", StringRef("read_write"))
87       .Default(std::nullopt);
88 }
89 
90 std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
91     unsigned AddressSpace) const {
92   switch (AddressSpace) {
93   case AMDGPUAS::PRIVATE_ADDRESS:
94     return StringRef("private");
95   case AMDGPUAS::GLOBAL_ADDRESS:
96     return StringRef("global");
97   case AMDGPUAS::CONSTANT_ADDRESS:
98     return StringRef("constant");
99   case AMDGPUAS::LOCAL_ADDRESS:
100     return StringRef("local");
101   case AMDGPUAS::FLAT_ADDRESS:
102     return StringRef("generic");
103   case AMDGPUAS::REGION_ADDRESS:
104     return StringRef("region");
105   default:
106     return std::nullopt;
107   }
108 }
109 
110 StringRef
111 MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
112                                         StringRef BaseTypeName) const {
113   if (TypeQual.contains("pipe"))
114     return "pipe";
115 
116   return StringSwitch<StringRef>(BaseTypeName)
117       .Case("image1d_t", "image")
118       .Case("image1d_array_t", "image")
119       .Case("image1d_buffer_t", "image")
120       .Case("image2d_t", "image")
121       .Case("image2d_array_t", "image")
122       .Case("image2d_array_depth_t", "image")
123       .Case("image2d_array_msaa_t", "image")
124       .Case("image2d_array_msaa_depth_t", "image")
125       .Case("image2d_depth_t", "image")
126       .Case("image2d_msaa_t", "image")
127       .Case("image2d_msaa_depth_t", "image")
128       .Case("image3d_t", "image")
129       .Case("sampler_t", "sampler")
130       .Case("queue_t", "queue")
131       .Default(isa<PointerType>(Ty)
132                    ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
133                           ? "dynamic_shared_pointer"
134                           : "global_buffer")
135                    : "by_value");
136 }
137 
138 std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
139                                                    bool Signed) const {
140   switch (Ty->getTypeID()) {
141   case Type::IntegerTyID: {
142     if (!Signed)
143       return (Twine('u') + getTypeName(Ty, true)).str();
144 
145     auto BitWidth = Ty->getIntegerBitWidth();
146     switch (BitWidth) {
147     case 8:
148       return "char";
149     case 16:
150       return "short";
151     case 32:
152       return "int";
153     case 64:
154       return "long";
155     default:
156       return (Twine('i') + Twine(BitWidth)).str();
157     }
158   }
159   case Type::HalfTyID:
160     return "half";
161   case Type::FloatTyID:
162     return "float";
163   case Type::DoubleTyID:
164     return "double";
165   case Type::FixedVectorTyID: {
166     auto *VecTy = cast<FixedVectorType>(Ty);
167     auto *ElTy = VecTy->getElementType();
168     auto NumElements = VecTy->getNumElements();
169     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
170   }
171   default:
172     return "unknown";
173   }
174 }
175 
176 msgpack::ArrayDocNode
177 MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
178   auto Dims = HSAMetadataDoc->getArrayNode();
179   if (Node->getNumOperands() != 3)
180     return Dims;
181 
182   for (auto &Op : Node->operands())
183     Dims.push_back(Dims.getDocument()->getNode(
184         uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
185   return Dims;
186 }
187 
188 void MetadataStreamerMsgPackV4::emitVersion() {
189   auto Version = HSAMetadataDoc->getArrayNode();
190   Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
191   Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
192   getRootMetadata("amdhsa.version") = Version;
193 }
194 
195 void MetadataStreamerMsgPackV4::emitTargetID(
196     const IsaInfo::AMDGPUTargetID &TargetID) {
197   getRootMetadata("amdhsa.target") =
198       HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
199 }
200 
201 void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
202   auto *Node = Mod.getNamedMetadata("llvm.printf.fmts");
203   if (!Node)
204     return;
205 
206   auto Printf = HSAMetadataDoc->getArrayNode();
207   for (auto *Op : Node->operands())
208     if (Op->getNumOperands())
209       Printf.push_back(Printf.getDocument()->getNode(
210           cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
211   getRootMetadata("amdhsa.printf") = Printf;
212 }
213 
214 void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
215                                                    msgpack::MapDocNode Kern) {
216   // TODO: What about other languages?
217   auto *Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
218   if (!Node || !Node->getNumOperands())
219     return;
220   auto *Op0 = Node->getOperand(0);
221   if (Op0->getNumOperands() <= 1)
222     return;
223 
224   Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
225   auto LanguageVersion = Kern.getDocument()->getArrayNode();
226   LanguageVersion.push_back(Kern.getDocument()->getNode(
227       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
228   LanguageVersion.push_back(Kern.getDocument()->getNode(
229       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
230   Kern[".language_version"] = LanguageVersion;
231 }
232 
233 void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
234                                                 msgpack::MapDocNode Kern) {
235 
236   if (auto *Node = Func.getMetadata("reqd_work_group_size"))
237     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
238   if (auto *Node = Func.getMetadata("work_group_size_hint"))
239     Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
240   if (auto *Node = Func.getMetadata("vec_type_hint")) {
241     Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
242         getTypeName(
243             cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
244             mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
245         /*Copy=*/true);
246   }
247   if (Func.hasFnAttribute("runtime-handle")) {
248     Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
249         Func.getFnAttribute("runtime-handle").getValueAsString().str(),
250         /*Copy=*/true);
251   }
252   if (Func.hasFnAttribute("device-init"))
253     Kern[".kind"] = Kern.getDocument()->getNode("init");
254   else if (Func.hasFnAttribute("device-fini"))
255     Kern[".kind"] = Kern.getDocument()->getNode("fini");
256 }
257 
258 void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
259                                                msgpack::MapDocNode Kern) {
260   auto &Func = MF.getFunction();
261   unsigned Offset = 0;
262   auto Args = HSAMetadataDoc->getArrayNode();
263   for (auto &Arg : Func.args()) {
264     if (Arg.hasAttribute("amdgpu-hidden-argument"))
265       continue;
266 
267     emitKernelArg(Arg, Offset, Args);
268   }
269 
270   emitHiddenKernelArgs(MF, Offset, Args);
271 
272   Kern[".args"] = Args;
273 }
274 
275 void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
276                                               unsigned &Offset,
277                                               msgpack::ArrayDocNode Args) {
278   const auto *Func = Arg.getParent();
279   auto ArgNo = Arg.getArgNo();
280   const MDNode *Node;
281 
282   StringRef Name;
283   Node = Func->getMetadata("kernel_arg_name");
284   if (Node && ArgNo < Node->getNumOperands())
285     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
286   else if (Arg.hasName())
287     Name = Arg.getName();
288 
289   StringRef TypeName;
290   Node = Func->getMetadata("kernel_arg_type");
291   if (Node && ArgNo < Node->getNumOperands())
292     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
293 
294   StringRef BaseTypeName;
295   Node = Func->getMetadata("kernel_arg_base_type");
296   if (Node && ArgNo < Node->getNumOperands())
297     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
298 
299   StringRef ActAccQual;
300   // Do we really need NoAlias check here?
301   if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
302     if (Arg.onlyReadsMemory())
303       ActAccQual = "read_only";
304     else if (Arg.hasAttribute(Attribute::WriteOnly))
305       ActAccQual = "write_only";
306   }
307 
308   StringRef AccQual;
309   Node = Func->getMetadata("kernel_arg_access_qual");
310   if (Node && ArgNo < Node->getNumOperands())
311     AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
312 
313   StringRef TypeQual;
314   Node = Func->getMetadata("kernel_arg_type_qual");
315   if (Node && ArgNo < Node->getNumOperands())
316     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
317 
318   const DataLayout &DL = Func->getDataLayout();
319 
320   MaybeAlign PointeeAlign;
321   Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
322 
323   // FIXME: Need to distinguish in memory alignment from pointer alignment.
324   if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
325     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
326       PointeeAlign = Arg.getParamAlign().valueOrOne();
327   }
328 
329   // There's no distinction between byval aggregates and raw aggregates.
330   Type *ArgTy;
331   Align ArgAlign;
332   std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
333 
334   emitKernelArg(DL, ArgTy, ArgAlign,
335                 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
336                 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
337                 AccQual, TypeQual);
338 }
339 
340 void MetadataStreamerMsgPackV4::emitKernelArg(
341     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
342     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
343     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
344     StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
345   auto Arg = Args.getDocument()->getMapNode();
346 
347   if (!Name.empty())
348     Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
349   if (!TypeName.empty())
350     Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
351   auto Size = DL.getTypeAllocSize(Ty);
352   Arg[".size"] = Arg.getDocument()->getNode(Size);
353   Offset = alignTo(Offset, Alignment);
354   Arg[".offset"] = Arg.getDocument()->getNode(Offset);
355   Offset += Size;
356   Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
357   if (PointeeAlign)
358     Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
359 
360   if (auto *PtrTy = dyn_cast<PointerType>(Ty))
361     if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
362       // Limiting address space to emit only for a certain ValueKind.
363       if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
364         Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
365                                                            /*Copy=*/true);
366 
367   if (auto AQ = getAccessQualifier(AccQual))
368     Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
369 
370   if (auto AAQ = getAccessQualifier(ActAccQual))
371     Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);
372 
373   SmallVector<StringRef, 1> SplitTypeQuals;
374   TypeQual.split(SplitTypeQuals, " ", -1, false);
375   for (StringRef Key : SplitTypeQuals) {
376     if (Key == "const")
377       Arg[".is_const"] = Arg.getDocument()->getNode(true);
378     else if (Key == "restrict")
379       Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
380     else if (Key == "volatile")
381       Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
382     else if (Key == "pipe")
383       Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
384   }
385 
386   Args.push_back(Arg);
387 }
388 
389 void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
390     const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
391   auto &Func = MF.getFunction();
392   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
393 
394   unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
395   if (!HiddenArgNumBytes)
396     return;
397 
398   const Module *M = Func.getParent();
399   auto &DL = M->getDataLayout();
400   auto *Int64Ty = Type::getInt64Ty(Func.getContext());
401 
402   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
403 
404   if (HiddenArgNumBytes >= 8)
405     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
406                   Args);
407   if (HiddenArgNumBytes >= 16)
408     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
409                   Args);
410   if (HiddenArgNumBytes >= 24)
411     emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
412                   Args);
413 
414   auto *Int8PtrTy =
415       PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
416 
417   if (HiddenArgNumBytes >= 32) {
418     // We forbid the use of features requiring hostcall when compiling OpenCL
419     // before code object V5, which makes the mutual exclusion between the
420     // "printf buffer" and "hostcall buffer" here sound.
421     if (M->getNamedMetadata("llvm.printf.fmts"))
422       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
423                     Args);
424     else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
425       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
426                     Args);
427     else
428       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
429   }
430 
431   // Emit "default queue" and "completion action" arguments if enqueue kernel is
432   // used, otherwise emit dummy "none" arguments.
433   if (HiddenArgNumBytes >= 40) {
434     if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
435       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
436                     Args);
437     } else {
438       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
439     }
440   }
441 
442   if (HiddenArgNumBytes >= 48) {
443     if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
444       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
445                     Args);
446     } else {
447       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
448     }
449   }
450 
451   // Emit the pointer argument for multi-grid object.
452   if (HiddenArgNumBytes >= 56) {
453     if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
454       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
455                     Args);
456     } else {
457       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
458     }
459   }
460 }
461 
462 msgpack::MapDocNode
463 MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
464                                              const SIProgramInfo &ProgramInfo,
465                                              unsigned CodeObjectVersion) const {
466   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
467   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
468   const Function &F = MF.getFunction();
469 
470   auto Kern = HSAMetadataDoc->getMapNode();
471 
472   Align MaxKernArgAlign;
473   Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
474       STM.getKernArgSegmentSize(F, MaxKernArgAlign));
475   Kern[".group_segment_fixed_size"] =
476       Kern.getDocument()->getNode(ProgramInfo.LDSSize);
477   DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"],
478                               msgpack::Type::UInt, ProgramInfo.ScratchSize);
479   if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
480     DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"],
481                                 msgpack::Type::Boolean,
482                                 ProgramInfo.DynamicCallStack);
483   }
484 
485   if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
486     Kern[".workgroup_processor_mode"] =
487         Kern.getDocument()->getNode(ProgramInfo.WgpMode);
488 
489   // FIXME: The metadata treats the minimum as 16?
490   Kern[".kernarg_segment_align"] =
491       Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
492   Kern[".wavefront_size"] =
493       Kern.getDocument()->getNode(STM.getWavefrontSize());
494   DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt,
495                               ProgramInfo.NumSGPR);
496   DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt,
497                               ProgramInfo.NumVGPR);
498 
499   // Only add AGPR count to metadata for supported devices
500   if (STM.hasMAIInsts()) {
501     DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt,
502                                 ProgramInfo.NumAccVGPR);
503   }
504 
505   Kern[".max_flat_workgroup_size"] =
506       Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
507 
508   uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();
509   uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();
510   uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();
511 
512   // TODO: Should consider 0 invalid and reject in IR verifier.
513   if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0)
514     Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
515 
516   if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0)
517     Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
518 
519   if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0)
520     Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
521 
522   Kern[".sgpr_spill_count"] =
523       Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
524   Kern[".vgpr_spill_count"] =
525       Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
526 
527   return Kern;
528 }
529 
530 bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
531   DelayedExprs->resolveDelayedExpressions();
532   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
533 }
534 
535 void MetadataStreamerMsgPackV4::begin(const Module &Mod,
536                                       const IsaInfo::AMDGPUTargetID &TargetID) {
537   emitVersion();
538   emitTargetID(TargetID);
539   emitPrintf(Mod);
540   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
541   DelayedExprs->clear();
542 }
543 
544 void MetadataStreamerMsgPackV4::end() {
545   DelayedExprs->resolveDelayedExpressions();
546   std::string HSAMetadataString;
547   raw_string_ostream StrOS(HSAMetadataString);
548   HSAMetadataDoc->toYAML(StrOS);
549 
550   if (DumpHSAMetadata)
551     dump(StrOS.str());
552   if (VerifyHSAMetadata)
553     verify(StrOS.str());
554 }
555 
556 void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
557                                            const SIProgramInfo &ProgramInfo) {
558   auto &Func = MF.getFunction();
559   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
560       Func.getCallingConv() != CallingConv::SPIR_KERNEL)
561     return;
562 
563   auto CodeObjectVersion =
564       AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());
565   auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
566 
567   auto Kernels =
568       getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
569 
570   {
571     Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
572     Kern[".symbol"] = Kern.getDocument()->getNode(
573         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
574     emitKernelLanguage(Func, Kern);
575     emitKernelAttrs(Func, Kern);
576     emitKernelArgs(MF, Kern);
577   }
578 
579   Kernels.push_back(Kern);
580 }
581 
582 //===----------------------------------------------------------------------===//
583 // HSAMetadataStreamerV5
584 //===----------------------------------------------------------------------===//
585 
586 void MetadataStreamerMsgPackV5::emitVersion() {
587   auto Version = HSAMetadataDoc->getArrayNode();
588   Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
589   Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
590   getRootMetadata("amdhsa.version") = Version;
591 }
592 
593 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
594     const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
595   auto &Func = MF.getFunction();
596   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
597 
598   // No implicit kernel argument is used.
599   if (ST.getImplicitArgNumBytes(Func) == 0)
600     return;
601 
602   const Module *M = Func.getParent();
603   auto &DL = M->getDataLayout();
604   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
605 
606   auto *Int64Ty = Type::getInt64Ty(Func.getContext());
607   auto *Int32Ty = Type::getInt32Ty(Func.getContext());
608   auto *Int16Ty = Type::getInt16Ty(Func.getContext());
609 
610   Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
611   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
612   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
613   emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
614 
615   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
616   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
617   emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
618 
619   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
620   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
621   emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
622 
623   // Reserved for hidden_tool_correlation_id.
624   Offset += 8;
625 
626   Offset += 8; // Reserved.
627 
628   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
629   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
630   emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
631 
632   emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
633 
634   Offset += 6; // Reserved.
635   auto *Int8PtrTy =
636       PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
637 
638   if (M->getNamedMetadata("llvm.printf.fmts")) {
639     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
640                   Args);
641   } else {
642     Offset += 8; // Skipped.
643   }
644 
645   if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
646     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
647                   Args);
648   } else {
649     Offset += 8; // Skipped.
650   }
651 
652   if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
653     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
654                 Args);
655   } else {
656     Offset += 8; // Skipped.
657   }
658 
659   if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
660     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
661   else
662     Offset += 8; // Skipped.
663 
664   if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
665     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
666                   Args);
667   } else {
668     Offset += 8; // Skipped.
669   }
670 
671   if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
672     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
673                   Args);
674   } else {
675     Offset += 8; // Skipped.
676   }
677 
678   // Emit argument for hidden dynamic lds size
679   if (MFI.isDynamicLDSUsed()) {
680     emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
681                   Args);
682   } else {
683     Offset += 4; // skipped
684   }
685 
686   Offset += 68; // Reserved.
687 
688   // hidden_private_base and hidden_shared_base are only when the subtarget has
689   // ApertureRegs.
690   if (!ST.hasApertureRegs()) {
691     emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
692     emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
693   } else {
694     Offset += 8; // Skipped.
695   }
696 
697   if (MFI.getUserSGPRInfo().hasQueuePtr())
698     emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
699 }
700 
701 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
702                                                 msgpack::MapDocNode Kern) {
703   MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
704 
705   if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
706     Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
707 }
708 
709 //===----------------------------------------------------------------------===//
710 // HSAMetadataStreamerV6
711 //===----------------------------------------------------------------------===//
712 
713 void MetadataStreamerMsgPackV6::emitVersion() {
714   auto Version = HSAMetadataDoc->getArrayNode();
715   Version.push_back(Version.getDocument()->getNode(VersionMajorV6));
716   Version.push_back(Version.getDocument()->getNode(VersionMinorV6));
717   getRootMetadata("amdhsa.version") = Version;
718 }
719 
720 } // end namespace AMDGPU::HSAMD
721 } // end namespace llvm
722