xref: /llvm-project/llvm/lib/Frontend/Offloading/Utility.cpp (revision 13dcc95dcd4999ff99f2de89d881f1aed5b21709)
1 //===- Utility.cpp ------ Collection of generic offloading utilities ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Frontend/Offloading/Utility.h"
10 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
11 #include "llvm/BinaryFormat/ELF.h"
12 #include "llvm/BinaryFormat/MsgPackDocument.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/GlobalValue.h"
15 #include "llvm/IR/GlobalVariable.h"
16 #include "llvm/IR/Value.h"
17 #include "llvm/Object/ELFObjectFile.h"
18 #include "llvm/Support/MemoryBufferRef.h"
19 #include "llvm/Transforms/Utils/ModuleUtils.h"
20 
21 using namespace llvm;
22 using namespace llvm::offloading;
23 
24 StructType *offloading::getEntryTy(Module &M) {
25   LLVMContext &C = M.getContext();
26   StructType *EntryTy =
27       StructType::getTypeByName(C, "struct.__tgt_offload_entry");
28   if (!EntryTy)
29     EntryTy = StructType::create(
30         "struct.__tgt_offload_entry", Type::getInt64Ty(C), Type::getInt16Ty(C),
31         Type::getInt16Ty(C), Type::getInt32Ty(C), PointerType::getUnqual(C),
32         PointerType::getUnqual(C), Type::getInt64Ty(C), Type::getInt64Ty(C),
33         PointerType::getUnqual(C));
34   return EntryTy;
35 }
36 
37 std::pair<Constant *, GlobalVariable *>
38 offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind,
39                                           Constant *Addr, StringRef Name,
40                                           uint64_t Size, uint32_t Flags,
41                                           uint64_t Data, Constant *AuxAddr) {
42   llvm::Triple Triple(M.getTargetTriple());
43   Type *PtrTy = PointerType::getUnqual(M.getContext());
44   Type *Int64Ty = Type::getInt64Ty(M.getContext());
45   Type *Int32Ty = Type::getInt32Ty(M.getContext());
46   Type *Int16Ty = Type::getInt16Ty(M.getContext());
47 
48   Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name);
49 
50   StringRef Prefix =
51       Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name";
52 
53   // Create the constant string used to look up the symbol in the device.
54   auto *Str =
55       new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
56                          GlobalValue::InternalLinkage, AddrName, Prefix);
57   StringRef SectionName = ".llvm.rodata.offloading";
58   Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
59   Str->setSection(SectionName);
60   Str->setAlignment(Align(1));
61 
62   // Make a metadata node for these constants so it can be queried from IR.
63   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
64   Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
65   MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));
66 
67   // Construct the offloading entry.
68   Constant *EntryData[] = {
69       ConstantExpr::getNullValue(Int64Ty),
70       ConstantInt::get(Int16Ty, 1),
71       ConstantInt::get(Int16Ty, Kind),
72       ConstantInt::get(Int32Ty, Flags),
73       ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, PtrTy),
74       ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, PtrTy),
75       ConstantInt::get(Int64Ty, Size),
76       ConstantInt::get(Int64Ty, Data),
77       AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(AuxAddr, PtrTy)
78               : ConstantExpr::getNullValue(PtrTy)};
79   Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData);
80   return {EntryInitializer, Str};
81 }
82 
83 void offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind,
84                                      Constant *Addr, StringRef Name,
85                                      uint64_t Size, uint32_t Flags,
86                                      uint64_t Data, StringRef SectionName,
87                                      Constant *AuxAddr) {
88   llvm::Triple Triple(M.getTargetTriple());
89 
90   auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer(
91       M, Kind, Addr, Name, Size, Flags, Data, AuxAddr);
92 
93   StringRef Prefix =
94       Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry.";
95   auto *Entry = new GlobalVariable(
96       M, getEntryTy(M),
97       /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer,
98       Prefix + Name, nullptr, GlobalValue::NotThreadLocal,
99       M.getDataLayout().getDefaultGlobalsAddressSpace());
100 
101   // The entry has to be created in the section the linker expects it to be.
102   if (Triple.isOSBinFormatCOFF())
103     Entry->setSection((SectionName + "$OE").str());
104   else
105     Entry->setSection(SectionName);
106   Entry->setAlignment(Align(1));
107 }
108 
109 std::pair<GlobalVariable *, GlobalVariable *>
110 offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
111   llvm::Triple Triple(M.getTargetTriple());
112 
113   auto *ZeroInitilaizer =
114       ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
115   auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr;
116   auto *EntryType = ArrayType::get(getEntryTy(M), 0);
117   auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage
118                                             : GlobalValue::ExternalLinkage;
119 
120   auto *EntriesB =
121       new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
122                          "__start_" + SectionName);
123   EntriesB->setVisibility(GlobalValue::HiddenVisibility);
124   auto *EntriesE =
125       new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit,
126                          "__stop_" + SectionName);
127   EntriesE->setVisibility(GlobalValue::HiddenVisibility);
128 
129   if (Triple.isOSBinFormatELF()) {
130     // We assume that external begin/end symbols that we have created above will
131     // be defined by the linker. This is done whenever a section name with a
132     // valid C-identifier is present. We define a dummy variable here to force
133     // the linker to always provide these symbols.
134     auto *DummyEntry = new GlobalVariable(
135         M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage,
136         ZeroInitilaizer, "__dummy." + SectionName);
137     DummyEntry->setSection(SectionName);
138     appendToCompilerUsed(M, DummyEntry);
139   } else {
140     // The COFF linker will merge sections containing a '$' together into a
141     // single section. The order of entries in this section will be sorted
142     // alphabetically by the characters following the '$' in the name. Set the
143     // sections here to ensure that the beginning and end symbols are sorted.
144     EntriesB->setSection((SectionName + "$OA").str());
145     EntriesE->setSection((SectionName + "$OZ").str());
146   }
147 
148   return std::make_pair(EntriesB, EntriesE);
149 }
150 
151 bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
152                                                         uint32_t ImageFlags,
153                                                         StringRef EnvTargetID) {
154   using namespace llvm::ELF;
155   StringRef EnvArch = EnvTargetID.split(":").first;
156 
157   // Trivial check if the base processors match.
158   if (EnvArch != ImageArch)
159     return false;
160 
161   // Check if the image is requesting xnack on or off.
162   switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
163   case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
164     // The image is 'xnack-' so the environment must be 'xnack-'.
165     if (!EnvTargetID.contains("xnack-"))
166       return false;
167     break;
168   case EF_AMDGPU_FEATURE_XNACK_ON_V4:
169     // The image is 'xnack+' so the environment must be 'xnack+'.
170     if (!EnvTargetID.contains("xnack+"))
171       return false;
172     break;
173   case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
174   case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
175   default:
176     break;
177   }
178 
179   // Check if the image is requesting sramecc on or off.
180   switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
181   case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
182     // The image is 'sramecc-' so the environment must be 'sramecc-'.
183     if (!EnvTargetID.contains("sramecc-"))
184       return false;
185     break;
186   case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
187     // The image is 'sramecc+' so the environment must be 'sramecc+'.
188     if (!EnvTargetID.contains("sramecc+"))
189       return false;
190     break;
191   case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
192   case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
193     break;
194   }
195 
196   return true;
197 }
198 
199 namespace {
200 /// Reads the AMDGPU specific per-kernel-metadata from an image.
201 class KernelInfoReader {
202 public:
203   KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
204       : KernelInfoMap(KIM) {}
205 
206   /// Process ELF note to read AMDGPU metadata from respective information
207   /// fields.
208   Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
209     if (Note.getName() != "AMDGPU")
210       return Error::success(); // We are not interested in other things
211 
212     assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
213            "Parse AMDGPU MetaData");
214     auto Desc = Note.getDesc(Align);
215     StringRef MsgPackString =
216         StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
217     msgpack::Document MsgPackDoc;
218     if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
219       return Error::success();
220 
221     AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
222     if (!Verifier.verify(MsgPackDoc.getRoot()))
223       return Error::success();
224 
225     auto RootMap = MsgPackDoc.getRoot().getMap(true);
226 
227     if (auto Err = iterateAMDKernels(RootMap))
228       return Err;
229 
230     return Error::success();
231   }
232 
233 private:
234   /// Extracts the relevant information via simple string look-up in the msgpack
235   /// document elements.
236   Error
237   extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
238                     std::string &KernelName,
239                     offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
240     if (!V.first.isString())
241       return Error::success();
242 
243     const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
244       return DK.getString() == SK;
245     };
246 
247     const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
248                                            uint32_t *Vals) {
249       assert(DN.isArray() && "MsgPack DocNode is an array node");
250       auto DNA = DN.getArray();
251       assert(DNA.size() == 3 && "ArrayNode has at most three elements");
252 
253       int I = 0;
254       for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
255            ++DNABegin) {
256         Vals[I++] = DNABegin->getUInt();
257       }
258     };
259 
260     if (IsKey(V.first, ".name")) {
261       KernelName = V.second.toString();
262     } else if (IsKey(V.first, ".sgpr_count")) {
263       KernelData.SGPRCount = V.second.getUInt();
264     } else if (IsKey(V.first, ".sgpr_spill_count")) {
265       KernelData.SGPRSpillCount = V.second.getUInt();
266     } else if (IsKey(V.first, ".vgpr_count")) {
267       KernelData.VGPRCount = V.second.getUInt();
268     } else if (IsKey(V.first, ".vgpr_spill_count")) {
269       KernelData.VGPRSpillCount = V.second.getUInt();
270     } else if (IsKey(V.first, ".agpr_count")) {
271       KernelData.AGPRCount = V.second.getUInt();
272     } else if (IsKey(V.first, ".private_segment_fixed_size")) {
273       KernelData.PrivateSegmentSize = V.second.getUInt();
274     } else if (IsKey(V.first, ".group_segment_fixed_size")) {
275       KernelData.GroupSegmentList = V.second.getUInt();
276     } else if (IsKey(V.first, ".reqd_workgroup_size")) {
277       GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
278     } else if (IsKey(V.first, ".workgroup_size_hint")) {
279       GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
280     } else if (IsKey(V.first, ".wavefront_size")) {
281       KernelData.WavefrontSize = V.second.getUInt();
282     } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
283       KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
284     }
285 
286     return Error::success();
287   }
288 
289   /// Get the "amdhsa.kernels" element from the msgpack Document
290   Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
291     auto Res = MDN.find("amdhsa.kernels");
292     if (Res == MDN.end())
293       return createStringError(inconvertibleErrorCode(),
294                                "Could not find amdhsa.kernels key");
295 
296     auto Pair = *Res;
297     assert(Pair.second.isArray() &&
298            "AMDGPU kernel entries are arrays of entries");
299 
300     return Pair.second.getArray();
301   }
302 
303   /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
304   /// MapDocNode that either maps a string to a single value (most of them) or
305   /// to another array of things. Currently, we only handle the case that maps
306   /// to scalar value.
307   Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
308     offloading::amdgpu::AMDGPUKernelMetaData KernelData;
309     std::string KernelName;
310     auto Entry = (*It).getMap();
311     for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
312       if (auto Err = extractKernelData(*MI, KernelName, KernelData))
313         return Err;
314 
315     KernelInfoMap.insert({KernelName, KernelData});
316     return Error::success();
317   }
318 
319   /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
320   Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
321     auto KernelsOrErr = getAMDKernelsArray(MDN);
322     if (auto Err = KernelsOrErr.takeError())
323       return Err;
324 
325     auto KernelsArr = *KernelsOrErr;
326     for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
327       if (!It->isMap())
328         continue; // we expect <key,value> pairs
329 
330       // Obtain the value for the different entries. Each array entry is a
331       // MapDocNode
332       if (auto Err = generateKernelInfo(It))
333         return Err;
334     }
335     return Error::success();
336   }
337 
338   // Kernel names are the keys
339   StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
340 };
341 } // namespace
342 
343 Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
344     MemoryBufferRef MemBuffer,
345     StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
346     uint16_t &ELFABIVersion) {
347   Error Err = Error::success(); // Used later as out-parameter
348 
349   auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
350   if (auto Err = ELFOrError.takeError())
351     return Err;
352 
353   const object::ELF64LEFile ELFObj = ELFOrError.get();
354   Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
355   if (!Sections)
356     return Sections.takeError();
357   KernelInfoReader Reader(KernelInfoMap);
358 
359   // Read the code object version from ELF image header
360   auto Header = ELFObj.getHeader();
361   ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
362   for (const auto &S : *Sections) {
363     if (S.sh_type != ELF::SHT_NOTE)
364       continue;
365 
366     for (const auto N : ELFObj.notes(S, Err)) {
367       if (Err)
368         return Err;
369       // Fills the KernelInfoTabel entries in the reader
370       if ((Err = Reader.processNote(N, S.sh_addralign)))
371         return Err;
372     }
373   }
374   return Error::success();
375 }
376