1 //===- ELFObjHandler.cpp --------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===-----------------------------------------------------------------------===/ 8 9 #include "llvm/InterfaceStub/ELFObjHandler.h" 10 #include "llvm/InterfaceStub/ELFStub.h" 11 #include "llvm/Object/Binary.h" 12 #include "llvm/Object/ELFObjectFile.h" 13 #include "llvm/Object/ELFTypes.h" 14 #include "llvm/Support/Errc.h" 15 #include "llvm/Support/Error.h" 16 #include "llvm/Support/MemoryBuffer.h" 17 18 using llvm::MemoryBufferRef; 19 using llvm::object::ELFObjectFile; 20 21 using namespace llvm; 22 using namespace llvm::object; 23 using namespace llvm::ELF; 24 25 namespace llvm { 26 namespace elfabi { 27 28 // Simple struct to hold relevant .dynamic entries. 29 struct DynamicEntries { 30 uint64_t StrTabAddr = 0; 31 uint64_t StrSize = 0; 32 Optional<uint64_t> SONameOffset; 33 std::vector<uint64_t> NeededLibNames; 34 // Symbol table: 35 uint64_t DynSymAddr = 0; 36 // Hash tables: 37 Optional<uint64_t> ElfHash; 38 Optional<uint64_t> GnuHash; 39 }; 40 41 /// This function behaves similarly to StringRef::substr(), but attempts to 42 /// terminate the returned StringRef at the first null terminator. If no null 43 /// terminator is found, an error is returned. 44 /// 45 /// @param Str Source string to create a substring from. 46 /// @param Offset The start index of the desired substring. 47 static Expected<StringRef> terminatedSubstr(StringRef Str, size_t Offset) { 48 size_t StrEnd = Str.find('\0', Offset); 49 if (StrEnd == StringLiteral::npos) { 50 return createError( 51 "String overran bounds of string table (no null terminator)"); 52 } 53 54 size_t StrLen = StrEnd - Offset; 55 return Str.substr(Offset, StrLen); 56 } 57 58 /// This function takes an error, and appends a string of text to the end of 59 /// that error. Since "appending" to an Error isn't supported behavior of an 60 /// Error, this function technically creates a new error with the combined 61 /// message and consumes the old error. 62 /// 63 /// @param Err Source error. 64 /// @param After Text to append at the end of Err's error message. 65 Error appendToError(Error Err, StringRef After) { 66 std::string Message; 67 raw_string_ostream Stream(Message); 68 Stream << Err; 69 Stream << " " << After; 70 consumeError(std::move(Err)); 71 return createError(Stream.str().c_str()); 72 } 73 74 /// This function populates a DynamicEntries struct using an ELFT::DynRange. 75 /// After populating the struct, the members are validated with 76 /// some basic sanity checks. 77 /// 78 /// @param Dyn Target DynamicEntries struct to populate. 79 /// @param DynTable Source dynamic table. 80 template <class ELFT> 81 static Error populateDynamic(DynamicEntries &Dyn, 82 typename ELFT::DynRange DynTable) { 83 if (DynTable.empty()) 84 return createError("No .dynamic section found"); 85 86 // Search .dynamic for relevant entries. 87 bool FoundDynStr = false; 88 bool FoundDynStrSz = false; 89 bool FoundDynSym = false; 90 for (auto &Entry : DynTable) { 91 switch (Entry.d_tag) { 92 case DT_SONAME: 93 Dyn.SONameOffset = Entry.d_un.d_val; 94 break; 95 case DT_STRTAB: 96 Dyn.StrTabAddr = Entry.d_un.d_ptr; 97 FoundDynStr = true; 98 break; 99 case DT_STRSZ: 100 Dyn.StrSize = Entry.d_un.d_val; 101 FoundDynStrSz = true; 102 break; 103 case DT_NEEDED: 104 Dyn.NeededLibNames.push_back(Entry.d_un.d_val); 105 break; 106 case DT_SYMTAB: 107 Dyn.DynSymAddr = Entry.d_un.d_ptr; 108 FoundDynSym = true; 109 break; 110 case DT_HASH: 111 Dyn.ElfHash = Entry.d_un.d_ptr; 112 break; 113 case DT_GNU_HASH: 114 Dyn.GnuHash = Entry.d_un.d_ptr; 115 } 116 } 117 118 if (!FoundDynStr) { 119 return createError( 120 "Couldn't locate dynamic string table (no DT_STRTAB entry)"); 121 } 122 if (!FoundDynStrSz) { 123 return createError( 124 "Couldn't determine dynamic string table size (no DT_STRSZ entry)"); 125 } 126 if (!FoundDynSym) { 127 return createError( 128 "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)"); 129 } 130 if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) { 131 return createStringError(object_error::parse_failed, 132 "DT_SONAME string offset (0x%016" PRIx64 133 ") outside of dynamic string table", 134 *Dyn.SONameOffset); 135 } 136 for (uint64_t Offset : Dyn.NeededLibNames) { 137 if (Offset >= Dyn.StrSize) { 138 return createStringError(object_error::parse_failed, 139 "DT_NEEDED string offset (0x%016" PRIx64 140 ") outside of dynamic string table", 141 Offset); 142 } 143 } 144 145 return Error::success(); 146 } 147 148 /// This function finds the number of dynamic symbols using a GNU hash table. 149 /// 150 /// @param Table The GNU hash table for .dynsym. 151 template <class ELFT> 152 static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) { 153 using Elf_Word = typename ELFT::Word; 154 if (Table.nbuckets == 0) 155 return Table.symndx + 1; 156 uint64_t LastSymIdx = 0; 157 uint64_t BucketVal = 0; 158 // Find the index of the first symbol in the last chain. 159 for (Elf_Word Val : Table.buckets()) { 160 BucketVal = std::max(BucketVal, (uint64_t)Val); 161 } 162 LastSymIdx += BucketVal; 163 const Elf_Word *It = 164 reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end()); 165 // Locate the end of the chain to find the last symbol index. 166 while ((*It & 1) == 0) { 167 LastSymIdx++; 168 It++; 169 } 170 return LastSymIdx + 1; 171 } 172 173 /// This function determines the number of dynamic symbols. 174 /// Without access to section headers, the number of symbols must be determined 175 /// by parsing dynamic hash tables. 176 /// 177 /// @param Dyn Entries with the locations of hash tables. 178 /// @param ElfFile The ElfFile that the section contents reside in. 179 template <class ELFT> 180 static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn, 181 const ELFFile<ELFT> &ElfFile) { 182 using Elf_Hash = typename ELFT::Hash; 183 using Elf_GnuHash = typename ELFT::GnuHash; 184 // Search GNU hash table to try to find the upper bound of dynsym. 185 if (Dyn.GnuHash.hasValue()) { 186 Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash); 187 if (!TablePtr) 188 return TablePtr.takeError(); 189 const Elf_GnuHash *Table = 190 reinterpret_cast<const Elf_GnuHash *>(TablePtr.get()); 191 return getDynSymtabSize<ELFT>(*Table); 192 } 193 // Search SYSV hash table to try to find the upper bound of dynsym. 194 if (Dyn.ElfHash.hasValue()) { 195 Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash); 196 if (!TablePtr) 197 return TablePtr.takeError(); 198 const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get()); 199 return Table->nchain; 200 } 201 return 0; 202 } 203 204 /// This function extracts symbol type from a symbol's st_info member and 205 /// maps it to an ELFSymbolType enum. 206 /// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported. 207 /// Other symbol types are mapped to ELFSymbolType::Unknown. 208 /// 209 /// @param Info Binary symbol st_info to extract symbol type from. 210 static ELFSymbolType convertInfoToType(uint8_t Info) { 211 Info = Info & 0xf; 212 switch (Info) { 213 case ELF::STT_NOTYPE: 214 return ELFSymbolType::NoType; 215 case ELF::STT_OBJECT: 216 return ELFSymbolType::Object; 217 case ELF::STT_FUNC: 218 return ELFSymbolType::Func; 219 case ELF::STT_TLS: 220 return ELFSymbolType::TLS; 221 default: 222 return ELFSymbolType::Unknown; 223 } 224 } 225 226 /// This function creates an ELFSymbol and populates all members using 227 /// information from a binary ELFT::Sym. 228 /// 229 /// @param SymName The desired name of the ELFSymbol. 230 /// @param RawSym ELFT::Sym to extract symbol information from. 231 template <class ELFT> 232 static ELFSymbol createELFSym(StringRef SymName, 233 const typename ELFT::Sym &RawSym) { 234 ELFSymbol TargetSym{std::string(SymName)}; 235 uint8_t Binding = RawSym.getBinding(); 236 if (Binding == STB_WEAK) 237 TargetSym.Weak = true; 238 else 239 TargetSym.Weak = false; 240 241 TargetSym.Undefined = RawSym.isUndefined(); 242 TargetSym.Type = convertInfoToType(RawSym.st_info); 243 244 if (TargetSym.Type == ELFSymbolType::Func) { 245 TargetSym.Size = 0; 246 } else { 247 TargetSym.Size = RawSym.st_size; 248 } 249 return TargetSym; 250 } 251 252 /// This function populates an ELFStub with symbols using information read 253 /// from an ELF binary. 254 /// 255 /// @param TargetStub ELFStub to add symbols to. 256 /// @param DynSym Range of dynamic symbols to add to TargetStub. 257 /// @param DynStr StringRef to the dynamic string table. 258 template <class ELFT> 259 static Error populateSymbols(ELFStub &TargetStub, 260 const typename ELFT::SymRange DynSym, 261 StringRef DynStr) { 262 // Skips the first symbol since it's the NULL symbol. 263 for (auto RawSym : DynSym.drop_front(1)) { 264 // If a symbol does not have global or weak binding, ignore it. 265 uint8_t Binding = RawSym.getBinding(); 266 if (!(Binding == STB_GLOBAL || Binding == STB_WEAK)) 267 continue; 268 // If a symbol doesn't have default or protected visibility, ignore it. 269 uint8_t Visibility = RawSym.getVisibility(); 270 if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED)) 271 continue; 272 // Create an ELFSymbol and populate it with information from the symbol 273 // table entry. 274 Expected<StringRef> SymName = terminatedSubstr(DynStr, RawSym.st_name); 275 if (!SymName) 276 return SymName.takeError(); 277 ELFSymbol Sym = createELFSym<ELFT>(*SymName, RawSym); 278 TargetStub.Symbols.insert(std::move(Sym)); 279 // TODO: Populate symbol warning. 280 } 281 return Error::success(); 282 } 283 284 /// Returns a new ELFStub with all members populated from an ELFObjectFile. 285 /// @param ElfObj Source ELFObjectFile. 286 template <class ELFT> 287 static Expected<std::unique_ptr<ELFStub>> 288 buildStub(const ELFObjectFile<ELFT> &ElfObj) { 289 using Elf_Dyn_Range = typename ELFT::DynRange; 290 using Elf_Phdr_Range = typename ELFT::PhdrRange; 291 using Elf_Sym_Range = typename ELFT::SymRange; 292 using Elf_Sym = typename ELFT::Sym; 293 std::unique_ptr<ELFStub> DestStub = std::make_unique<ELFStub>(); 294 const ELFFile<ELFT> *ElfFile = ElfObj.getELFFile(); 295 // Fetch .dynamic table. 296 Expected<Elf_Dyn_Range> DynTable = ElfFile->dynamicEntries(); 297 if (!DynTable) { 298 return DynTable.takeError(); 299 } 300 301 // Fetch program headers. 302 Expected<Elf_Phdr_Range> PHdrs = ElfFile->program_headers(); 303 if (!PHdrs) { 304 return PHdrs.takeError(); 305 } 306 307 // Collect relevant .dynamic entries. 308 DynamicEntries DynEnt; 309 if (Error Err = populateDynamic<ELFT>(DynEnt, *DynTable)) 310 return std::move(Err); 311 312 // Get pointer to in-memory location of .dynstr section. 313 Expected<const uint8_t *> DynStrPtr = 314 ElfFile->toMappedAddr(DynEnt.StrTabAddr); 315 if (!DynStrPtr) 316 return appendToError(DynStrPtr.takeError(), 317 "when locating .dynstr section contents"); 318 319 StringRef DynStr(reinterpret_cast<const char *>(DynStrPtr.get()), 320 DynEnt.StrSize); 321 322 // Populate Arch from ELF header. 323 DestStub->Arch = ElfFile->getHeader().e_machine; 324 325 // Populate SoName from .dynamic entries and dynamic string table. 326 if (DynEnt.SONameOffset.hasValue()) { 327 Expected<StringRef> NameOrErr = 328 terminatedSubstr(DynStr, *DynEnt.SONameOffset); 329 if (!NameOrErr) { 330 return appendToError(NameOrErr.takeError(), "when reading DT_SONAME"); 331 } 332 DestStub->SoName = std::string(*NameOrErr); 333 } 334 335 // Populate NeededLibs from .dynamic entries and dynamic string table. 336 for (uint64_t NeededStrOffset : DynEnt.NeededLibNames) { 337 Expected<StringRef> LibNameOrErr = 338 terminatedSubstr(DynStr, NeededStrOffset); 339 if (!LibNameOrErr) { 340 return appendToError(LibNameOrErr.takeError(), "when reading DT_NEEDED"); 341 } 342 DestStub->NeededLibs.push_back(std::string(*LibNameOrErr)); 343 } 344 345 // Populate Symbols from .dynsym table and dynamic string table. 346 Expected<uint64_t> SymCount = getNumSyms(DynEnt, *ElfFile); 347 if (!SymCount) 348 return SymCount.takeError(); 349 if (*SymCount > 0) { 350 // Get pointer to in-memory location of .dynsym section. 351 Expected<const uint8_t *> DynSymPtr = 352 ElfFile->toMappedAddr(DynEnt.DynSymAddr); 353 if (!DynSymPtr) 354 return appendToError(DynSymPtr.takeError(), 355 "when locating .dynsym section contents"); 356 Elf_Sym_Range DynSyms = ArrayRef<Elf_Sym>( 357 reinterpret_cast<const Elf_Sym *>(*DynSymPtr), *SymCount); 358 Error SymReadError = populateSymbols<ELFT>(*DestStub, DynSyms, DynStr); 359 if (SymReadError) 360 return appendToError(std::move(SymReadError), 361 "when reading dynamic symbols"); 362 } 363 364 return std::move(DestStub); 365 } 366 367 Expected<std::unique_ptr<ELFStub>> readELFFile(MemoryBufferRef Buf) { 368 Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(Buf); 369 if (!BinOrErr) { 370 return BinOrErr.takeError(); 371 } 372 373 Binary *Bin = BinOrErr->get(); 374 if (auto Obj = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) { 375 return buildStub(*Obj); 376 } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) { 377 return buildStub(*Obj); 378 } else if (auto Obj = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) { 379 return buildStub(*Obj); 380 } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) { 381 return buildStub(*Obj); 382 } 383 384 return createStringError(errc::not_supported, "Unsupported binary format"); 385 } 386 387 } // end namespace elfabi 388 } // end namespace llvm 389