1 //===- Utility.cpp ------ Collection of generic offloading utilities ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Frontend/Offloading/Utility.h" 10 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" 11 #include "llvm/BinaryFormat/ELF.h" 12 #include "llvm/BinaryFormat/MsgPackDocument.h" 13 #include "llvm/IR/Constants.h" 14 #include "llvm/IR/GlobalValue.h" 15 #include "llvm/IR/GlobalVariable.h" 16 #include "llvm/IR/Value.h" 17 #include "llvm/Object/ELFObjectFile.h" 18 #include "llvm/Support/MemoryBufferRef.h" 19 #include "llvm/Transforms/Utils/ModuleUtils.h" 20 21 using namespace llvm; 22 using namespace llvm::offloading; 23 24 StructType *offloading::getEntryTy(Module &M) { 25 LLVMContext &C = M.getContext(); 26 StructType *EntryTy = 27 StructType::getTypeByName(C, "struct.__tgt_offload_entry"); 28 if (!EntryTy) 29 EntryTy = StructType::create( 30 "struct.__tgt_offload_entry", Type::getInt64Ty(C), Type::getInt16Ty(C), 31 Type::getInt16Ty(C), Type::getInt32Ty(C), PointerType::getUnqual(C), 32 PointerType::getUnqual(C), Type::getInt64Ty(C), Type::getInt64Ty(C), 33 PointerType::getUnqual(C)); 34 return EntryTy; 35 } 36 37 std::pair<Constant *, GlobalVariable *> 38 offloading::getOffloadingEntryInitializer(Module &M, object::OffloadKind Kind, 39 Constant *Addr, StringRef Name, 40 uint64_t Size, uint32_t Flags, 41 uint64_t Data, Constant *AuxAddr) { 42 llvm::Triple Triple(M.getTargetTriple()); 43 Type *PtrTy = PointerType::getUnqual(M.getContext()); 44 Type *Int64Ty = Type::getInt64Ty(M.getContext()); 45 Type *Int32Ty = Type::getInt32Ty(M.getContext()); 46 Type *Int16Ty = Type::getInt16Ty(M.getContext()); 47 48 Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name); 49 50 StringRef Prefix = 51 Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name"; 52 53 // Create the constant string used to look up the symbol in the device. 54 auto *Str = 55 new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true, 56 GlobalValue::InternalLinkage, AddrName, Prefix); 57 StringRef SectionName = ".llvm.rodata.offloading"; 58 Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 59 Str->setSection(SectionName); 60 Str->setAlignment(Align(1)); 61 62 // Make a metadata node for these constants so it can be queried from IR. 63 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols"); 64 Metadata *MDVals[] = {ConstantAsMetadata::get(Str)}; 65 MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals)); 66 67 // Construct the offloading entry. 68 Constant *EntryData[] = { 69 ConstantExpr::getNullValue(Int64Ty), 70 ConstantInt::get(Int16Ty, 1), 71 ConstantInt::get(Int16Ty, Kind), 72 ConstantInt::get(Int32Ty, Flags), 73 ConstantExpr::getPointerBitCastOrAddrSpaceCast(Addr, PtrTy), 74 ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, PtrTy), 75 ConstantInt::get(Int64Ty, Size), 76 ConstantInt::get(Int64Ty, Data), 77 AuxAddr ? ConstantExpr::getPointerBitCastOrAddrSpaceCast(AuxAddr, PtrTy) 78 : ConstantExpr::getNullValue(PtrTy)}; 79 Constant *EntryInitializer = ConstantStruct::get(getEntryTy(M), EntryData); 80 return {EntryInitializer, Str}; 81 } 82 83 void offloading::emitOffloadingEntry(Module &M, object::OffloadKind Kind, 84 Constant *Addr, StringRef Name, 85 uint64_t Size, uint32_t Flags, 86 uint64_t Data, StringRef SectionName, 87 Constant *AuxAddr) { 88 llvm::Triple Triple(M.getTargetTriple()); 89 90 auto [EntryInitializer, NameGV] = getOffloadingEntryInitializer( 91 M, Kind, Addr, Name, Size, Flags, Data, AuxAddr); 92 93 StringRef Prefix = 94 Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry."; 95 auto *Entry = new GlobalVariable( 96 M, getEntryTy(M), 97 /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer, 98 Prefix + Name, nullptr, GlobalValue::NotThreadLocal, 99 M.getDataLayout().getDefaultGlobalsAddressSpace()); 100 101 // The entry has to be created in the section the linker expects it to be. 102 if (Triple.isOSBinFormatCOFF()) 103 Entry->setSection((SectionName + "$OE").str()); 104 else 105 Entry->setSection(SectionName); 106 Entry->setAlignment(Align(1)); 107 } 108 109 std::pair<GlobalVariable *, GlobalVariable *> 110 offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { 111 llvm::Triple Triple(M.getTargetTriple()); 112 113 auto *ZeroInitilaizer = 114 ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); 115 auto *EntryInit = Triple.isOSBinFormatCOFF() ? ZeroInitilaizer : nullptr; 116 auto *EntryType = ArrayType::get(getEntryTy(M), 0); 117 auto Linkage = Triple.isOSBinFormatCOFF() ? GlobalValue::WeakODRLinkage 118 : GlobalValue::ExternalLinkage; 119 120 auto *EntriesB = 121 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, 122 "__start_" + SectionName); 123 EntriesB->setVisibility(GlobalValue::HiddenVisibility); 124 auto *EntriesE = 125 new GlobalVariable(M, EntryType, /*isConstant=*/true, Linkage, EntryInit, 126 "__stop_" + SectionName); 127 EntriesE->setVisibility(GlobalValue::HiddenVisibility); 128 129 if (Triple.isOSBinFormatELF()) { 130 // We assume that external begin/end symbols that we have created above will 131 // be defined by the linker. This is done whenever a section name with a 132 // valid C-identifier is present. We define a dummy variable here to force 133 // the linker to always provide these symbols. 134 auto *DummyEntry = new GlobalVariable( 135 M, ZeroInitilaizer->getType(), true, GlobalVariable::InternalLinkage, 136 ZeroInitilaizer, "__dummy." + SectionName); 137 DummyEntry->setSection(SectionName); 138 appendToCompilerUsed(M, DummyEntry); 139 } else { 140 // The COFF linker will merge sections containing a '$' together into a 141 // single section. The order of entries in this section will be sorted 142 // alphabetically by the characters following the '$' in the name. Set the 143 // sections here to ensure that the beginning and end symbols are sorted. 144 EntriesB->setSection((SectionName + "$OA").str()); 145 EntriesE->setSection((SectionName + "$OZ").str()); 146 } 147 148 return std::make_pair(EntriesB, EntriesE); 149 } 150 151 bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch, 152 uint32_t ImageFlags, 153 StringRef EnvTargetID) { 154 using namespace llvm::ELF; 155 StringRef EnvArch = EnvTargetID.split(":").first; 156 157 // Trivial check if the base processors match. 158 if (EnvArch != ImageArch) 159 return false; 160 161 // Check if the image is requesting xnack on or off. 162 switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) { 163 case EF_AMDGPU_FEATURE_XNACK_OFF_V4: 164 // The image is 'xnack-' so the environment must be 'xnack-'. 165 if (!EnvTargetID.contains("xnack-")) 166 return false; 167 break; 168 case EF_AMDGPU_FEATURE_XNACK_ON_V4: 169 // The image is 'xnack+' so the environment must be 'xnack+'. 170 if (!EnvTargetID.contains("xnack+")) 171 return false; 172 break; 173 case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4: 174 case EF_AMDGPU_FEATURE_XNACK_ANY_V4: 175 default: 176 break; 177 } 178 179 // Check if the image is requesting sramecc on or off. 180 switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) { 181 case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4: 182 // The image is 'sramecc-' so the environment must be 'sramecc-'. 183 if (!EnvTargetID.contains("sramecc-")) 184 return false; 185 break; 186 case EF_AMDGPU_FEATURE_SRAMECC_ON_V4: 187 // The image is 'sramecc+' so the environment must be 'sramecc+'. 188 if (!EnvTargetID.contains("sramecc+")) 189 return false; 190 break; 191 case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4: 192 case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4: 193 break; 194 } 195 196 return true; 197 } 198 199 namespace { 200 /// Reads the AMDGPU specific per-kernel-metadata from an image. 201 class KernelInfoReader { 202 public: 203 KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM) 204 : KernelInfoMap(KIM) {} 205 206 /// Process ELF note to read AMDGPU metadata from respective information 207 /// fields. 208 Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) { 209 if (Note.getName() != "AMDGPU") 210 return Error::success(); // We are not interested in other things 211 212 assert(Note.getType() == ELF::NT_AMDGPU_METADATA && 213 "Parse AMDGPU MetaData"); 214 auto Desc = Note.getDesc(Align); 215 StringRef MsgPackString = 216 StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size()); 217 msgpack::Document MsgPackDoc; 218 if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) 219 return Error::success(); 220 221 AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); 222 if (!Verifier.verify(MsgPackDoc.getRoot())) 223 return Error::success(); 224 225 auto RootMap = MsgPackDoc.getRoot().getMap(true); 226 227 if (auto Err = iterateAMDKernels(RootMap)) 228 return Err; 229 230 return Error::success(); 231 } 232 233 private: 234 /// Extracts the relevant information via simple string look-up in the msgpack 235 /// document elements. 236 Error 237 extractKernelData(msgpack::MapDocNode::MapTy::value_type V, 238 std::string &KernelName, 239 offloading::amdgpu::AMDGPUKernelMetaData &KernelData) { 240 if (!V.first.isString()) 241 return Error::success(); 242 243 const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) { 244 return DK.getString() == SK; 245 }; 246 247 const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN, 248 uint32_t *Vals) { 249 assert(DN.isArray() && "MsgPack DocNode is an array node"); 250 auto DNA = DN.getArray(); 251 assert(DNA.size() == 3 && "ArrayNode has at most three elements"); 252 253 int I = 0; 254 for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd; 255 ++DNABegin) { 256 Vals[I++] = DNABegin->getUInt(); 257 } 258 }; 259 260 if (IsKey(V.first, ".name")) { 261 KernelName = V.second.toString(); 262 } else if (IsKey(V.first, ".sgpr_count")) { 263 KernelData.SGPRCount = V.second.getUInt(); 264 } else if (IsKey(V.first, ".sgpr_spill_count")) { 265 KernelData.SGPRSpillCount = V.second.getUInt(); 266 } else if (IsKey(V.first, ".vgpr_count")) { 267 KernelData.VGPRCount = V.second.getUInt(); 268 } else if (IsKey(V.first, ".vgpr_spill_count")) { 269 KernelData.VGPRSpillCount = V.second.getUInt(); 270 } else if (IsKey(V.first, ".agpr_count")) { 271 KernelData.AGPRCount = V.second.getUInt(); 272 } else if (IsKey(V.first, ".private_segment_fixed_size")) { 273 KernelData.PrivateSegmentSize = V.second.getUInt(); 274 } else if (IsKey(V.first, ".group_segment_fixed_size")) { 275 KernelData.GroupSegmentList = V.second.getUInt(); 276 } else if (IsKey(V.first, ".reqd_workgroup_size")) { 277 GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize); 278 } else if (IsKey(V.first, ".workgroup_size_hint")) { 279 GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint); 280 } else if (IsKey(V.first, ".wavefront_size")) { 281 KernelData.WavefrontSize = V.second.getUInt(); 282 } else if (IsKey(V.first, ".max_flat_workgroup_size")) { 283 KernelData.MaxFlatWorkgroupSize = V.second.getUInt(); 284 } 285 286 return Error::success(); 287 } 288 289 /// Get the "amdhsa.kernels" element from the msgpack Document 290 Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) { 291 auto Res = MDN.find("amdhsa.kernels"); 292 if (Res == MDN.end()) 293 return createStringError(inconvertibleErrorCode(), 294 "Could not find amdhsa.kernels key"); 295 296 auto Pair = *Res; 297 assert(Pair.second.isArray() && 298 "AMDGPU kernel entries are arrays of entries"); 299 300 return Pair.second.getArray(); 301 } 302 303 /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a 304 /// MapDocNode that either maps a string to a single value (most of them) or 305 /// to another array of things. Currently, we only handle the case that maps 306 /// to scalar value. 307 Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) { 308 offloading::amdgpu::AMDGPUKernelMetaData KernelData; 309 std::string KernelName; 310 auto Entry = (*It).getMap(); 311 for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI) 312 if (auto Err = extractKernelData(*MI, KernelName, KernelData)) 313 return Err; 314 315 KernelInfoMap.insert({KernelName, KernelData}); 316 return Error::success(); 317 } 318 319 /// Go over the list of AMD kernels in the "amdhsa.kernels" entry 320 Error iterateAMDKernels(msgpack::MapDocNode &MDN) { 321 auto KernelsOrErr = getAMDKernelsArray(MDN); 322 if (auto Err = KernelsOrErr.takeError()) 323 return Err; 324 325 auto KernelsArr = *KernelsOrErr; 326 for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) { 327 if (!It->isMap()) 328 continue; // we expect <key,value> pairs 329 330 // Obtain the value for the different entries. Each array entry is a 331 // MapDocNode 332 if (auto Err = generateKernelInfo(It)) 333 return Err; 334 } 335 return Error::success(); 336 } 337 338 // Kernel names are the keys 339 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap; 340 }; 341 } // namespace 342 343 Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( 344 MemoryBufferRef MemBuffer, 345 StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap, 346 uint16_t &ELFABIVersion) { 347 Error Err = Error::success(); // Used later as out-parameter 348 349 auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer()); 350 if (auto Err = ELFOrError.takeError()) 351 return Err; 352 353 const object::ELF64LEFile ELFObj = ELFOrError.get(); 354 Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections(); 355 if (!Sections) 356 return Sections.takeError(); 357 KernelInfoReader Reader(KernelInfoMap); 358 359 // Read the code object version from ELF image header 360 auto Header = ELFObj.getHeader(); 361 ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]); 362 for (const auto &S : *Sections) { 363 if (S.sh_type != ELF::SHT_NOTE) 364 continue; 365 366 for (const auto N : ELFObj.notes(S, Err)) { 367 if (Err) 368 return Err; 369 // Fills the KernelInfoTabel entries in the reader 370 if ((Err = Reader.processNote(N, S.sh_addralign))) 371 return Err; 372 } 373 } 374 return Error::success(); 375 } 376