1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MachOLayoutBuilder.h" 10 #include "llvm/Support/Alignment.h" 11 #include "llvm/Support/Errc.h" 12 #include "llvm/Support/ErrorHandling.h" 13 #include "llvm/Support/SystemZ/zOSSupport.h" 14 15 using namespace llvm; 16 using namespace llvm::objcopy::macho; 17 18 StringTableBuilder::Kind 19 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) { 20 if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) 21 return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO; 22 return Is64Bit ? StringTableBuilder::MachO64Linked 23 : StringTableBuilder::MachOLinked; 24 } 25 26 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const { 27 uint32_t Size = 0; 28 for (const LoadCommand &LC : O.LoadCommands) { 29 const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 30 auto cmd = MLC.load_command_data.cmd; 31 switch (cmd) { 32 case MachO::LC_SEGMENT: 33 Size += sizeof(MachO::segment_command) + 34 sizeof(MachO::section) * LC.Sections.size(); 35 continue; 36 case MachO::LC_SEGMENT_64: 37 Size += sizeof(MachO::segment_command_64) + 38 sizeof(MachO::section_64) * LC.Sections.size(); 39 continue; 40 } 41 42 switch (cmd) { 43 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 44 case MachO::LCName: \ 45 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \ 46 break; 47 #include "llvm/BinaryFormat/MachO.def" 48 #undef HANDLE_LOAD_COMMAND 49 } 50 } 51 52 return Size; 53 } 54 55 void MachOLayoutBuilder::constructStringTable() { 56 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols) 57 StrTableBuilder.add(Sym->Name); 58 StrTableBuilder.finalize(); 59 } 60 61 void MachOLayoutBuilder::updateSymbolIndexes() { 62 uint32_t Index = 0; 63 for (auto &Symbol : O.SymTable.Symbols) 64 Symbol->Index = Index++; 65 } 66 67 // Updates the index and the number of local/external/undefined symbols. 68 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) { 69 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB); 70 // Make sure that nlist entries in the symbol table are sorted by the those 71 // types. The order is: local < defined external < undefined external. 72 assert(llvm::is_sorted(O.SymTable.Symbols, 73 [](const std::unique_ptr<SymbolEntry> &A, 74 const std::unique_ptr<SymbolEntry> &B) { 75 bool AL = A->isLocalSymbol(), 76 BL = B->isLocalSymbol(); 77 if (AL != BL) 78 return AL; 79 return !AL && !A->isUndefinedSymbol() && 80 B->isUndefinedSymbol(); 81 }) && 82 "Symbols are not sorted by their types."); 83 84 uint32_t NumLocalSymbols = 0; 85 auto Iter = O.SymTable.Symbols.begin(); 86 auto End = O.SymTable.Symbols.end(); 87 for (; Iter != End; ++Iter) { 88 if ((*Iter)->isExternalSymbol()) 89 break; 90 91 ++NumLocalSymbols; 92 } 93 94 uint32_t NumExtDefSymbols = 0; 95 for (; Iter != End; ++Iter) { 96 if ((*Iter)->isUndefinedSymbol()) 97 break; 98 99 ++NumExtDefSymbols; 100 } 101 102 MLC.dysymtab_command_data.ilocalsym = 0; 103 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols; 104 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols; 105 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols; 106 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols; 107 MLC.dysymtab_command_data.nundefsym = 108 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols); 109 } 110 111 // Recomputes and updates offset and size fields in load commands and sections 112 // since they could be modified. 113 uint64_t MachOLayoutBuilder::layoutSegments() { 114 auto HeaderSize = 115 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 116 const bool IsObjectFile = 117 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT; 118 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0; 119 if (O.EncryptionInfoCommandIndex) { 120 // If we are emitting an encryptable binary, our load commands must have a 121 // separate (non-encrypted) page to themselves. 122 Offset = alignToPowerOf2(HeaderSize + O.Header.SizeOfCmds, PageSize); 123 } 124 for (LoadCommand &LC : O.LoadCommands) { 125 auto &MLC = LC.MachOLoadCommand; 126 StringRef Segname; 127 uint64_t SegmentVmAddr; 128 uint64_t SegmentVmSize; 129 switch (MLC.load_command_data.cmd) { 130 case MachO::LC_SEGMENT: 131 SegmentVmAddr = MLC.segment_command_data.vmaddr; 132 SegmentVmSize = MLC.segment_command_data.vmsize; 133 Segname = StringRef(MLC.segment_command_data.segname, 134 strnlen(MLC.segment_command_data.segname, 135 sizeof(MLC.segment_command_data.segname))); 136 break; 137 case MachO::LC_SEGMENT_64: 138 SegmentVmAddr = MLC.segment_command_64_data.vmaddr; 139 SegmentVmSize = MLC.segment_command_64_data.vmsize; 140 Segname = StringRef(MLC.segment_command_64_data.segname, 141 strnlen(MLC.segment_command_64_data.segname, 142 sizeof(MLC.segment_command_64_data.segname))); 143 break; 144 default: 145 continue; 146 } 147 148 if (Segname == "__LINKEDIT") { 149 // We update the __LINKEDIT segment later (in layoutTail). 150 assert(LC.Sections.empty() && "__LINKEDIT segment has sections"); 151 LinkEditLoadCommand = &MLC; 152 continue; 153 } 154 155 // Update file offsets and sizes of sections. 156 uint64_t SegOffset = Offset; 157 uint64_t SegFileSize = 0; 158 uint64_t VMSize = 0; 159 for (std::unique_ptr<Section> &Sec : LC.Sections) { 160 assert(SegmentVmAddr <= Sec->Addr && 161 "Section's address cannot be smaller than Segment's one"); 162 uint32_t SectOffset = Sec->Addr - SegmentVmAddr; 163 if (IsObjectFile) { 164 if (!Sec->hasValidOffset()) { 165 Sec->Offset = 0; 166 } else { 167 uint64_t PaddingSize = 168 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align)); 169 Sec->Offset = SegOffset + SegFileSize + PaddingSize; 170 Sec->Size = Sec->Content.size(); 171 SegFileSize += PaddingSize + Sec->Size; 172 } 173 } else { 174 if (!Sec->hasValidOffset()) { 175 Sec->Offset = 0; 176 } else { 177 Sec->Offset = SegOffset + SectOffset; 178 Sec->Size = Sec->Content.size(); 179 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size); 180 } 181 } 182 VMSize = std::max(VMSize, SectOffset + Sec->Size); 183 } 184 185 if (IsObjectFile) { 186 Offset += SegFileSize; 187 } else { 188 Offset = alignTo(Offset + SegFileSize, PageSize); 189 SegFileSize = alignTo(SegFileSize, PageSize); 190 // Use the original vmsize if the segment is __PAGEZERO. 191 VMSize = 192 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize); 193 } 194 195 switch (MLC.load_command_data.cmd) { 196 case MachO::LC_SEGMENT: 197 MLC.segment_command_data.cmdsize = 198 sizeof(MachO::segment_command) + 199 sizeof(MachO::section) * LC.Sections.size(); 200 MLC.segment_command_data.nsects = LC.Sections.size(); 201 MLC.segment_command_data.fileoff = SegOffset; 202 MLC.segment_command_data.vmsize = VMSize; 203 MLC.segment_command_data.filesize = SegFileSize; 204 break; 205 case MachO::LC_SEGMENT_64: 206 MLC.segment_command_64_data.cmdsize = 207 sizeof(MachO::segment_command_64) + 208 sizeof(MachO::section_64) * LC.Sections.size(); 209 MLC.segment_command_64_data.nsects = LC.Sections.size(); 210 MLC.segment_command_64_data.fileoff = SegOffset; 211 MLC.segment_command_64_data.vmsize = VMSize; 212 MLC.segment_command_64_data.filesize = SegFileSize; 213 break; 214 } 215 } 216 217 return Offset; 218 } 219 220 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) { 221 for (LoadCommand &LC : O.LoadCommands) 222 for (std::unique_ptr<Section> &Sec : LC.Sections) { 223 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset; 224 Sec->NReloc = Sec->Relocations.size(); 225 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc; 226 } 227 228 return Offset; 229 } 230 231 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) { 232 // If we are building the layout of an executable or dynamic library 233 // which does not have any segments other than __LINKEDIT, 234 // the Offset can be equal to zero by this time. It happens because of the 235 // convention that in such cases the file offsets specified by LC_SEGMENT 236 // start with zero (unlike the case of a relocatable object file). 237 const uint64_t HeaderSize = 238 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 239 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) || 240 Offset >= HeaderSize + O.Header.SizeOfCmds) && 241 "Incorrect tail offset"); 242 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds); 243 244 // The exports trie can be in either LC_DYLD_INFO or in 245 // LC_DYLD_EXPORTS_TRIE, but not both. 246 size_t DyldInfoExportsTrieSize = 0; 247 size_t DyldExportsTrieSize = 0; 248 for (const auto &LC : O.LoadCommands) { 249 switch (LC.MachOLoadCommand.load_command_data.cmd) { 250 case MachO::LC_DYLD_INFO: 251 case MachO::LC_DYLD_INFO_ONLY: 252 DyldInfoExportsTrieSize = O.Exports.Trie.size(); 253 break; 254 case MachO::LC_DYLD_EXPORTS_TRIE: 255 DyldExportsTrieSize = O.Exports.Trie.size(); 256 break; 257 default: 258 break; 259 } 260 } 261 assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) && 262 "Export trie in both LCs"); 263 264 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); 265 uint64_t StartOfLinkEdit = Offset; 266 267 // The order of LINKEDIT elements is as follows: 268 // rebase info, binding info, weak binding info, lazy binding info, export 269 // trie, chained fixups, dyld exports trie, function starts, data-in-code, 270 // symbol table, indirect symbol table, symbol table strings, 271 // dylib codesign drs, and code signature. 272 auto updateOffset = [&Offset](size_t Size) { 273 uint64_t PreviousOffset = Offset; 274 Offset += Size; 275 return PreviousOffset; 276 }; 277 278 uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size()); 279 uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size()); 280 uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size()); 281 uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size()); 282 uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize); 283 uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size()); 284 uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize); 285 uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size()); 286 uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size()); 287 uint64_t StartOfLinkerOptimizationHint = 288 updateOffset(O.LinkerOptimizationHint.Data.size()); 289 uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size()); 290 uint64_t StartOfIndirectSymbols = 291 updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size()); 292 uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize()); 293 uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size()); 294 295 uint64_t StartOfCodeSignature = Offset; 296 uint32_t CodeSignatureSize = 0; 297 if (O.CodeSignatureCommandIndex) { 298 StartOfCodeSignature = alignTo(StartOfCodeSignature, 16); 299 300 // Note: These calculations are to be kept in sync with the same 301 // calculations performed in LLD's CodeSignatureSection. 302 const uint32_t AllHeadersSize = 303 alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1, 304 CodeSignature.Align); 305 const uint32_t BlockCount = 306 (StartOfCodeSignature + CodeSignature.BlockSize - 1) / 307 CodeSignature.BlockSize; 308 const uint32_t Size = 309 alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize, 310 CodeSignature.Align); 311 312 CodeSignature.StartOffset = StartOfCodeSignature; 313 CodeSignature.AllHeadersSize = AllHeadersSize; 314 CodeSignature.BlockCount = BlockCount; 315 CodeSignature.OutputFileName = OutputFileName; 316 CodeSignature.Size = Size; 317 CodeSignatureSize = Size; 318 } 319 uint64_t LinkEditSize = 320 StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit; 321 322 // Now we have determined the layout of the contents of the __LINKEDIT 323 // segment. Update its load command. 324 if (LinkEditLoadCommand) { 325 MachO::macho_load_command *MLC = LinkEditLoadCommand; 326 switch (LinkEditLoadCommand->load_command_data.cmd) { 327 case MachO::LC_SEGMENT: 328 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command); 329 MLC->segment_command_data.fileoff = StartOfLinkEdit; 330 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize); 331 MLC->segment_command_data.filesize = LinkEditSize; 332 break; 333 case MachO::LC_SEGMENT_64: 334 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64); 335 MLC->segment_command_64_data.fileoff = StartOfLinkEdit; 336 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize); 337 MLC->segment_command_64_data.filesize = LinkEditSize; 338 break; 339 } 340 } 341 342 for (LoadCommand &LC : O.LoadCommands) { 343 auto &MLC = LC.MachOLoadCommand; 344 auto cmd = MLC.load_command_data.cmd; 345 switch (cmd) { 346 case MachO::LC_CODE_SIGNATURE: 347 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature; 348 MLC.linkedit_data_command_data.datasize = CodeSignatureSize; 349 break; 350 case MachO::LC_DYLIB_CODE_SIGN_DRS: 351 MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs; 352 MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size(); 353 break; 354 case MachO::LC_SYMTAB: 355 MLC.symtab_command_data.symoff = StartOfSymbols; 356 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size(); 357 MLC.symtab_command_data.stroff = StartOfSymbolStrings; 358 MLC.symtab_command_data.strsize = StrTableBuilder.getSize(); 359 break; 360 case MachO::LC_DYSYMTAB: { 361 if (MLC.dysymtab_command_data.ntoc != 0 || 362 MLC.dysymtab_command_data.nmodtab != 0 || 363 MLC.dysymtab_command_data.nextrefsyms != 0 || 364 MLC.dysymtab_command_data.nlocrel != 0 || 365 MLC.dysymtab_command_data.nextrel != 0) 366 return createStringError(llvm::errc::not_supported, 367 "shared library is not yet supported"); 368 MLC.dysymtab_command_data.indirectsymoff = 369 O.IndirectSymTable.Symbols.size() ? StartOfIndirectSymbols : 0; 370 MLC.dysymtab_command_data.nindirectsyms = 371 O.IndirectSymTable.Symbols.size(); 372 updateDySymTab(MLC); 373 break; 374 } 375 case MachO::LC_DATA_IN_CODE: 376 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode; 377 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size(); 378 break; 379 case MachO::LC_LINKER_OPTIMIZATION_HINT: 380 MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint; 381 MLC.linkedit_data_command_data.datasize = 382 O.LinkerOptimizationHint.Data.size(); 383 break; 384 case MachO::LC_FUNCTION_STARTS: 385 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts; 386 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size(); 387 break; 388 case MachO::LC_DYLD_CHAINED_FIXUPS: 389 MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups; 390 MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size(); 391 break; 392 case MachO::LC_DYLD_EXPORTS_TRIE: 393 MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie; 394 MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize; 395 break; 396 case MachO::LC_DYLD_INFO: 397 case MachO::LC_DYLD_INFO_ONLY: 398 MLC.dyld_info_command_data.rebase_off = 399 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo; 400 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size(); 401 MLC.dyld_info_command_data.bind_off = 402 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo; 403 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size(); 404 MLC.dyld_info_command_data.weak_bind_off = 405 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo; 406 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size(); 407 MLC.dyld_info_command_data.lazy_bind_off = 408 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo; 409 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size(); 410 MLC.dyld_info_command_data.export_off = 411 O.Exports.Trie.empty() ? 0 : StartOfExportTrie; 412 MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize; 413 break; 414 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in 415 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a 416 // relative virtual address. At the moment modification of the __TEXT 417 // segment of executables isn't supported anyway (e.g. data in code entries 418 // are not recalculated). Moreover, in general 419 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because 420 // without making additional assumptions (e.g. that the entire __TEXT 421 // segment should be encrypted) we do not know how to recalculate the 422 // boundaries of the encrypted part. For now just copy over these load 423 // commands until we encounter a real world usecase where 424 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted. 425 case MachO::LC_ENCRYPTION_INFO: 426 case MachO::LC_ENCRYPTION_INFO_64: 427 case MachO::LC_LOAD_DYLINKER: 428 case MachO::LC_MAIN: 429 case MachO::LC_RPATH: 430 case MachO::LC_SEGMENT: 431 case MachO::LC_SEGMENT_64: 432 case MachO::LC_VERSION_MIN_MACOSX: 433 case MachO::LC_VERSION_MIN_IPHONEOS: 434 case MachO::LC_VERSION_MIN_TVOS: 435 case MachO::LC_VERSION_MIN_WATCHOS: 436 case MachO::LC_BUILD_VERSION: 437 case MachO::LC_ID_DYLIB: 438 case MachO::LC_LOAD_DYLIB: 439 case MachO::LC_LOAD_WEAK_DYLIB: 440 case MachO::LC_UUID: 441 case MachO::LC_SOURCE_VERSION: 442 case MachO::LC_THREAD: 443 case MachO::LC_UNIXTHREAD: 444 case MachO::LC_SUB_FRAMEWORK: 445 case MachO::LC_SUB_UMBRELLA: 446 case MachO::LC_SUB_CLIENT: 447 case MachO::LC_SUB_LIBRARY: 448 case MachO::LC_LINKER_OPTION: 449 // Nothing to update. 450 break; 451 default: 452 // Abort if it's unsupported in order to prevent corrupting the object. 453 return createStringError(llvm::errc::not_supported, 454 "unsupported load command (cmd=0x%x)", cmd); 455 } 456 } 457 458 return Error::success(); 459 } 460 461 Error MachOLayoutBuilder::layout() { 462 O.Header.NCmds = O.LoadCommands.size(); 463 O.Header.SizeOfCmds = computeSizeOfCmds(); 464 constructStringTable(); 465 updateSymbolIndexes(); 466 uint64_t Offset = layoutSegments(); 467 Offset = layoutRelocations(Offset); 468 return layoutTail(Offset); 469 } 470