1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOReader.h" 10 #include "MachOObject.h" 11 #include "llvm/BinaryFormat/MachO.h" 12 #include "llvm/Object/MachO.h" 13 #include "llvm/Support/SystemZ/zOSSupport.h" 14 #include <memory> 15 16 using namespace llvm; 17 using namespace llvm::objcopy; 18 using namespace llvm::objcopy::macho; 19 20 void MachOReader::readHeader(Object &O) const { 21 O.Header.Magic = MachOObj.getHeader().magic; 22 O.Header.CPUType = MachOObj.getHeader().cputype; 23 O.Header.CPUSubType = MachOObj.getHeader().cpusubtype; 24 O.Header.FileType = MachOObj.getHeader().filetype; 25 O.Header.NCmds = MachOObj.getHeader().ncmds; 26 O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds; 27 O.Header.Flags = MachOObj.getHeader().flags; 28 } 29 30 template <typename SectionType> 31 static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) { 32 StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname))); 33 StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname))); 34 Section S(SegName, SectName); 35 S.Index = Index; 36 S.Addr = Sec.addr; 37 S.Size = Sec.size; 38 S.OriginalOffset = Sec.offset; 39 S.Align = Sec.align; 40 S.RelOff = Sec.reloff; 41 S.NReloc = Sec.nreloc; 42 S.Flags = Sec.flags; 43 S.Reserved1 = Sec.reserved1; 44 S.Reserved2 = Sec.reserved2; 45 S.Reserved3 = 0; 46 return S; 47 } 48 49 Section constructSection(const MachO::section &Sec, uint32_t Index) { 50 return constructSectionCommon(Sec, Index); 51 } 52 53 Section constructSection(const MachO::section_64 &Sec, uint32_t Index) { 54 Section S = constructSectionCommon(Sec, Index); 55 S.Reserved3 = Sec.reserved3; 56 return S; 57 } 58 59 template <typename SectionType, typename SegmentType> 60 Expected<std::vector<std::unique_ptr<Section>>> static extractSections( 61 const object::MachOObjectFile::LoadCommandInfo &LoadCmd, 62 const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) { 63 std::vector<std::unique_ptr<Section>> Sections; 64 for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 65 sizeof(SegmentType)), 66 End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr + 67 LoadCmd.C.cmdsize); 68 Curr < End; ++Curr) { 69 SectionType Sec; 70 memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr), 71 sizeof(SectionType)); 72 73 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 74 MachO::swapStruct(Sec); 75 76 Sections.push_back( 77 std::make_unique<Section>(constructSection(Sec, NextSectionIndex))); 78 79 Section &S = *Sections.back(); 80 81 Expected<object::SectionRef> SecRef = 82 MachOObj.getSection(NextSectionIndex++); 83 if (!SecRef) 84 return SecRef.takeError(); 85 86 Expected<ArrayRef<uint8_t>> Data = 87 MachOObj.getSectionContents(SecRef->getRawDataRefImpl()); 88 if (!Data) 89 return Data.takeError(); 90 91 S.Content = 92 StringRef(reinterpret_cast<const char *>(Data->data()), Data->size()); 93 94 const uint32_t CPUType = MachOObj.getHeader().cputype; 95 S.Relocations.reserve(S.NReloc); 96 for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()), 97 RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl()); 98 RI != RE; ++RI) { 99 RelocationInfo R; 100 R.Symbol = nullptr; // We'll fill this field later. 101 R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl()); 102 R.Scattered = MachOObj.isRelocationScattered(R.Info); 103 unsigned Type = MachOObj.getAnyRelocationType(R.Info); 104 // TODO Support CPU_TYPE_ARM. 105 R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 && 106 Type == MachO::ARM64_RELOC_ADDEND); 107 R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info); 108 S.Relocations.push_back(R); 109 } 110 111 assert(S.NReloc == S.Relocations.size() && 112 "Incorrect number of relocations"); 113 } 114 return std::move(Sections); 115 } 116 117 Error MachOReader::readLoadCommands(Object &O) const { 118 // For MachO sections indices start from 1. 119 uint32_t NextSectionIndex = 1; 120 static constexpr char TextSegmentName[] = "__TEXT"; 121 for (auto LoadCmd : MachOObj.load_commands()) { 122 LoadCommand LC; 123 switch (LoadCmd.C.cmd) { 124 case MachO::LC_CODE_SIGNATURE: 125 O.CodeSignatureCommandIndex = O.LoadCommands.size(); 126 break; 127 case MachO::LC_SEGMENT: 128 // LoadCmd.Ptr might not be aligned temporarily as 129 // MachO::segment_command requires, but the segname char pointer do not 130 // have alignment restrictions. 131 if (StringRef(reinterpret_cast<const char *>( 132 LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) == 133 TextSegmentName) 134 O.TextSegmentCommandIndex = O.LoadCommands.size(); 135 136 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 137 extractSections<MachO::section, MachO::segment_command>( 138 LoadCmd, MachOObj, NextSectionIndex)) 139 LC.Sections = std::move(*Sections); 140 else 141 return Sections.takeError(); 142 break; 143 case MachO::LC_SEGMENT_64: 144 // LoadCmd.Ptr might not be aligned temporarily as 145 // MachO::segment_command_64 requires, but the segname char pointer do 146 // not have alignment restrictions. 147 if (StringRef(reinterpret_cast<const char *>( 148 LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) == 149 TextSegmentName) 150 O.TextSegmentCommandIndex = O.LoadCommands.size(); 151 152 if (Expected<std::vector<std::unique_ptr<Section>>> Sections = 153 extractSections<MachO::section_64, MachO::segment_command_64>( 154 LoadCmd, MachOObj, NextSectionIndex)) 155 LC.Sections = std::move(*Sections); 156 else 157 return Sections.takeError(); 158 break; 159 case MachO::LC_SYMTAB: 160 O.SymTabCommandIndex = O.LoadCommands.size(); 161 break; 162 case MachO::LC_DYSYMTAB: 163 O.DySymTabCommandIndex = O.LoadCommands.size(); 164 break; 165 case MachO::LC_DYLD_INFO: 166 case MachO::LC_DYLD_INFO_ONLY: 167 O.DyLdInfoCommandIndex = O.LoadCommands.size(); 168 break; 169 case MachO::LC_DATA_IN_CODE: 170 O.DataInCodeCommandIndex = O.LoadCommands.size(); 171 break; 172 case MachO::LC_LINKER_OPTIMIZATION_HINT: 173 O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size(); 174 break; 175 case MachO::LC_FUNCTION_STARTS: 176 O.FunctionStartsCommandIndex = O.LoadCommands.size(); 177 break; 178 case MachO::LC_DYLIB_CODE_SIGN_DRS: 179 O.DylibCodeSignDRsIndex = O.LoadCommands.size(); 180 break; 181 case MachO::LC_DYLD_EXPORTS_TRIE: 182 O.ExportsTrieCommandIndex = O.LoadCommands.size(); 183 break; 184 case MachO::LC_DYLD_CHAINED_FIXUPS: 185 O.ChainedFixupsCommandIndex = O.LoadCommands.size(); 186 break; 187 case MachO::LC_ENCRYPTION_INFO: 188 case MachO::LC_ENCRYPTION_INFO_64: 189 O.EncryptionInfoCommandIndex = O.LoadCommands.size(); 190 break; 191 } 192 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 193 case MachO::LCName: \ 194 memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \ 195 sizeof(MachO::LCStruct)); \ 196 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \ 197 MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \ 198 if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \ 199 LC.Payload = ArrayRef<uint8_t>( \ 200 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \ 201 sizeof(MachO::LCStruct), \ 202 LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \ 203 break; 204 205 switch (LoadCmd.C.cmd) { 206 default: 207 memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr, 208 sizeof(MachO::load_command)); 209 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) 210 MachO::swapStruct(LC.MachOLoadCommand.load_command_data); 211 if (LoadCmd.C.cmdsize > sizeof(MachO::load_command)) 212 LC.Payload = ArrayRef<uint8_t>( 213 reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + 214 sizeof(MachO::load_command), 215 LoadCmd.C.cmdsize - sizeof(MachO::load_command)); 216 break; 217 #include "llvm/BinaryFormat/MachO.def" 218 } 219 O.LoadCommands.push_back(std::move(LC)); 220 } 221 return Error::success(); 222 } 223 224 template <typename nlist_t> 225 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) { 226 assert(nlist.n_strx < StrTable.size() && 227 "n_strx exceeds the size of the string table"); 228 SymbolEntry SE; 229 SE.Name = StringRef(StrTable.data() + nlist.n_strx).str(); 230 SE.n_type = nlist.n_type; 231 SE.n_sect = nlist.n_sect; 232 SE.n_desc = nlist.n_desc; 233 SE.n_value = nlist.n_value; 234 return SE; 235 } 236 237 void MachOReader::readSymbolTable(Object &O) const { 238 StringRef StrTable = MachOObj.getStringTableData(); 239 for (auto Symbol : MachOObj.symbols()) { 240 SymbolEntry SE = 241 (MachOObj.is64Bit() 242 ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry( 243 Symbol.getRawDataRefImpl())) 244 : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry( 245 Symbol.getRawDataRefImpl()))); 246 247 O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE)); 248 } 249 } 250 251 void MachOReader::setSymbolInRelocationInfo(Object &O) const { 252 std::vector<const Section *> Sections; 253 for (auto &LC : O.LoadCommands) 254 for (std::unique_ptr<Section> &Sec : LC.Sections) 255 Sections.push_back(Sec.get()); 256 257 for (LoadCommand &LC : O.LoadCommands) 258 for (std::unique_ptr<Section> &Sec : LC.Sections) 259 for (auto &Reloc : Sec->Relocations) 260 if (!Reloc.Scattered && !Reloc.IsAddend) { 261 const uint32_t SymbolNum = 262 Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()); 263 if (Reloc.Extern) { 264 Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum); 265 } else { 266 // FIXME: Refactor error handling in MachOReader and report an error 267 // if we encounter an invalid relocation. 268 assert(SymbolNum >= 1 && SymbolNum <= Sections.size() && 269 "Invalid section index."); 270 Reloc.Sec = Sections[SymbolNum - 1]; 271 } 272 } 273 } 274 275 void MachOReader::readRebaseInfo(Object &O) const { 276 O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes(); 277 } 278 279 void MachOReader::readBindInfo(Object &O) const { 280 O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes(); 281 } 282 283 void MachOReader::readWeakBindInfo(Object &O) const { 284 O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes(); 285 } 286 287 void MachOReader::readLazyBindInfo(Object &O) const { 288 O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes(); 289 } 290 291 void MachOReader::readExportInfo(Object &O) const { 292 // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE 293 ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie(); 294 if (Trie.empty()) 295 Trie = MachOObj.getDyldExportsTrie(); 296 O.Exports.Trie = Trie; 297 } 298 299 void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex, 300 LinkData &LD) const { 301 if (!LCIndex) 302 return; 303 const MachO::linkedit_data_command &LC = 304 O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; 305 LD.Data = 306 arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize)); 307 } 308 309 void MachOReader::readDataInCodeData(Object &O) const { 310 return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode); 311 } 312 313 void MachOReader::readLinkerOptimizationHint(Object &O) const { 314 return readLinkData(O, O.LinkerOptimizationHintCommandIndex, 315 O.LinkerOptimizationHint); 316 } 317 318 void MachOReader::readFunctionStartsData(Object &O) const { 319 return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts); 320 } 321 322 void MachOReader::readDylibCodeSignDRs(Object &O) const { 323 return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs); 324 } 325 326 void MachOReader::readExportsTrie(Object &O) const { 327 return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie); 328 } 329 330 void MachOReader::readChainedFixups(Object &O) const { 331 return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups); 332 } 333 334 void MachOReader::readIndirectSymbolTable(Object &O) const { 335 MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand(); 336 constexpr uint32_t AbsOrLocalMask = 337 MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS; 338 for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) { 339 uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i); 340 if ((Index & AbsOrLocalMask) != 0) 341 O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt); 342 else 343 O.IndirectSymTable.Symbols.emplace_back( 344 Index, O.SymTable.getSymbolByIndex(Index)); 345 } 346 } 347 348 void MachOReader::readSwiftVersion(Object &O) const { 349 struct ObjCImageInfo { 350 uint32_t Version; 351 uint32_t Flags; 352 } ImageInfo; 353 354 for (const LoadCommand &LC : O.LoadCommands) 355 for (const std::unique_ptr<Section> &Sec : LC.Sections) 356 if (Sec->Sectname == "__objc_imageinfo" && 357 (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" || 358 Sec->Segname == "__DATA_DIRTY") && 359 Sec->Content.size() >= sizeof(ObjCImageInfo)) { 360 memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo)); 361 if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) { 362 sys::swapByteOrder(ImageInfo.Version); 363 sys::swapByteOrder(ImageInfo.Flags); 364 } 365 O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff; 366 return; 367 } 368 } 369 370 Expected<std::unique_ptr<Object>> MachOReader::create() const { 371 auto Obj = std::make_unique<Object>(); 372 readHeader(*Obj); 373 if (Error E = readLoadCommands(*Obj)) 374 return std::move(E); 375 readSymbolTable(*Obj); 376 setSymbolInRelocationInfo(*Obj); 377 readRebaseInfo(*Obj); 378 readBindInfo(*Obj); 379 readWeakBindInfo(*Obj); 380 readLazyBindInfo(*Obj); 381 readExportInfo(*Obj); 382 readDataInCodeData(*Obj); 383 readLinkerOptimizationHint(*Obj); 384 readFunctionStartsData(*Obj); 385 readDylibCodeSignDRs(*Obj); 386 readExportsTrie(*Obj); 387 readChainedFixups(*Obj); 388 readIndirectSymbolTable(*Obj); 389 readSwiftVersion(*Obj); 390 return std::move(Obj); 391 } 392