xref: /llvm-project/llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp (revision 1a830aa1fe1e88749b563fefe18382842e0cff90)
1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOLayoutBuilder.h"
10 #include "llvm/Support/Alignment.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/ErrorHandling.h"
13 #include "llvm/Support/SystemZ/zOSSupport.h"
14 
15 using namespace llvm;
16 using namespace llvm::objcopy::macho;
17 
18 StringTableBuilder::Kind
19 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
20   if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
21     return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
22   return Is64Bit ? StringTableBuilder::MachO64Linked
23                  : StringTableBuilder::MachOLinked;
24 }
25 
26 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
27   uint32_t Size = 0;
28   for (const LoadCommand &LC : O.LoadCommands) {
29     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
30     auto cmd = MLC.load_command_data.cmd;
31     switch (cmd) {
32     case MachO::LC_SEGMENT:
33       Size += sizeof(MachO::segment_command) +
34               sizeof(MachO::section) * LC.Sections.size();
35       continue;
36     case MachO::LC_SEGMENT_64:
37       Size += sizeof(MachO::segment_command_64) +
38               sizeof(MachO::section_64) * LC.Sections.size();
39       continue;
40     }
41 
42     switch (cmd) {
43 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
44   case MachO::LCName:                                                          \
45     Size += sizeof(MachO::LCStruct) + LC.Payload.size();                       \
46     break;
47 #include "llvm/BinaryFormat/MachO.def"
48 #undef HANDLE_LOAD_COMMAND
49     }
50   }
51 
52   return Size;
53 }
54 
55 void MachOLayoutBuilder::constructStringTable() {
56   for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
57     StrTableBuilder.add(Sym->Name);
58   StrTableBuilder.finalize();
59 }
60 
61 void MachOLayoutBuilder::updateSymbolIndexes() {
62   uint32_t Index = 0;
63   for (auto &Symbol : O.SymTable.Symbols)
64     Symbol->Index = Index++;
65 }
66 
67 // Updates the index and the number of local/external/undefined symbols.
68 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
69   assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
70   // Make sure that nlist entries in the symbol table are sorted by the those
71   // types. The order is: local < defined external < undefined external.
72   assert(llvm::is_sorted(O.SymTable.Symbols,
73                          [](const std::unique_ptr<SymbolEntry> &A,
74                             const std::unique_ptr<SymbolEntry> &B) {
75                            bool AL = A->isLocalSymbol(),
76                                 BL = B->isLocalSymbol();
77                            if (AL != BL)
78                              return AL;
79                            return !AL && !A->isUndefinedSymbol() &&
80                                   B->isUndefinedSymbol();
81                          }) &&
82          "Symbols are not sorted by their types.");
83 
84   uint32_t NumLocalSymbols = 0;
85   auto Iter = O.SymTable.Symbols.begin();
86   auto End = O.SymTable.Symbols.end();
87   for (; Iter != End; ++Iter) {
88     if ((*Iter)->isExternalSymbol())
89       break;
90 
91     ++NumLocalSymbols;
92   }
93 
94   uint32_t NumExtDefSymbols = 0;
95   for (; Iter != End; ++Iter) {
96     if ((*Iter)->isUndefinedSymbol())
97       break;
98 
99     ++NumExtDefSymbols;
100   }
101 
102   MLC.dysymtab_command_data.ilocalsym = 0;
103   MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
104   MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
105   MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
106   MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
107   MLC.dysymtab_command_data.nundefsym =
108       O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
109 }
110 
111 // Recomputes and updates offset and size fields in load commands and sections
112 // since they could be modified.
113 uint64_t MachOLayoutBuilder::layoutSegments() {
114   auto HeaderSize =
115       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
116   const bool IsObjectFile =
117       O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
118   uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
119   if (O.EncryptionInfoCommandIndex) {
120     // If we are emitting an encryptable binary, our load commands must have a
121     // separate (non-encrypted) page to themselves.
122     Offset = alignToPowerOf2(HeaderSize + O.Header.SizeOfCmds, PageSize);
123   }
124   for (LoadCommand &LC : O.LoadCommands) {
125     auto &MLC = LC.MachOLoadCommand;
126     StringRef Segname;
127     uint64_t SegmentVmAddr;
128     uint64_t SegmentVmSize;
129     switch (MLC.load_command_data.cmd) {
130     case MachO::LC_SEGMENT:
131       SegmentVmAddr = MLC.segment_command_data.vmaddr;
132       SegmentVmSize = MLC.segment_command_data.vmsize;
133       Segname = StringRef(MLC.segment_command_data.segname,
134                           strnlen(MLC.segment_command_data.segname,
135                                   sizeof(MLC.segment_command_data.segname)));
136       break;
137     case MachO::LC_SEGMENT_64:
138       SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
139       SegmentVmSize = MLC.segment_command_64_data.vmsize;
140       Segname = StringRef(MLC.segment_command_64_data.segname,
141                           strnlen(MLC.segment_command_64_data.segname,
142                                   sizeof(MLC.segment_command_64_data.segname)));
143       break;
144     default:
145       continue;
146     }
147 
148     if (Segname == "__LINKEDIT") {
149       // We update the __LINKEDIT segment later (in layoutTail).
150       assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
151       LinkEditLoadCommand = &MLC;
152       continue;
153     }
154 
155     // Update file offsets and sizes of sections.
156     uint64_t SegOffset = Offset;
157     uint64_t SegFileSize = 0;
158     uint64_t VMSize = 0;
159     for (std::unique_ptr<Section> &Sec : LC.Sections) {
160       assert(SegmentVmAddr <= Sec->Addr &&
161              "Section's address cannot be smaller than Segment's one");
162       uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
163       if (IsObjectFile) {
164         if (!Sec->hasValidOffset()) {
165           Sec->Offset = 0;
166         } else {
167           uint64_t PaddingSize =
168               offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
169           Sec->Offset = SegOffset + SegFileSize + PaddingSize;
170           Sec->Size = Sec->Content.size();
171           SegFileSize += PaddingSize + Sec->Size;
172         }
173       } else {
174         if (!Sec->hasValidOffset()) {
175           Sec->Offset = 0;
176         } else {
177           Sec->Offset = SegOffset + SectOffset;
178           Sec->Size = Sec->Content.size();
179           SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
180         }
181       }
182       VMSize = std::max(VMSize, SectOffset + Sec->Size);
183     }
184 
185     if (IsObjectFile) {
186       Offset += SegFileSize;
187     } else {
188       Offset = alignTo(Offset + SegFileSize, PageSize);
189       SegFileSize = alignTo(SegFileSize, PageSize);
190       // Use the original vmsize if the segment is __PAGEZERO.
191       VMSize =
192           Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
193     }
194 
195     switch (MLC.load_command_data.cmd) {
196     case MachO::LC_SEGMENT:
197       MLC.segment_command_data.cmdsize =
198           sizeof(MachO::segment_command) +
199           sizeof(MachO::section) * LC.Sections.size();
200       MLC.segment_command_data.nsects = LC.Sections.size();
201       MLC.segment_command_data.fileoff = SegOffset;
202       MLC.segment_command_data.vmsize = VMSize;
203       MLC.segment_command_data.filesize = SegFileSize;
204       break;
205     case MachO::LC_SEGMENT_64:
206       MLC.segment_command_64_data.cmdsize =
207           sizeof(MachO::segment_command_64) +
208           sizeof(MachO::section_64) * LC.Sections.size();
209       MLC.segment_command_64_data.nsects = LC.Sections.size();
210       MLC.segment_command_64_data.fileoff = SegOffset;
211       MLC.segment_command_64_data.vmsize = VMSize;
212       MLC.segment_command_64_data.filesize = SegFileSize;
213       break;
214     }
215   }
216 
217   return Offset;
218 }
219 
220 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
221   for (LoadCommand &LC : O.LoadCommands)
222     for (std::unique_ptr<Section> &Sec : LC.Sections) {
223       Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
224       Sec->NReloc = Sec->Relocations.size();
225       Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
226     }
227 
228   return Offset;
229 }
230 
231 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
232   // If we are building the layout of an executable or dynamic library
233   // which does not have any segments other than __LINKEDIT,
234   // the Offset can be equal to zero by this time. It happens because of the
235   // convention that in such cases the file offsets specified by LC_SEGMENT
236   // start with zero (unlike the case of a relocatable object file).
237   const uint64_t HeaderSize =
238       Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
239   assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
240           Offset >= HeaderSize + O.Header.SizeOfCmds) &&
241          "Incorrect tail offset");
242   Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
243 
244   // The exports trie can be in either LC_DYLD_INFO or in
245   // LC_DYLD_EXPORTS_TRIE, but not both.
246   size_t DyldInfoExportsTrieSize = 0;
247   size_t DyldExportsTrieSize = 0;
248   for (const auto &LC : O.LoadCommands) {
249     switch (LC.MachOLoadCommand.load_command_data.cmd) {
250     case MachO::LC_DYLD_INFO:
251     case MachO::LC_DYLD_INFO_ONLY:
252       DyldInfoExportsTrieSize = O.Exports.Trie.size();
253       break;
254     case MachO::LC_DYLD_EXPORTS_TRIE:
255       DyldExportsTrieSize = O.Exports.Trie.size();
256       break;
257     default:
258       break;
259     }
260   }
261   assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) &&
262          "Export trie in both LCs");
263 
264   uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
265   uint64_t StartOfLinkEdit = Offset;
266 
267   // The order of LINKEDIT elements is as follows:
268   // rebase info, binding info, weak binding info, lazy binding info, export
269   // trie, chained fixups, dyld exports trie, function starts, data-in-code,
270   // symbol table, indirect symbol table, symbol table strings,
271   // dylib codesign drs, and code signature.
272   auto updateOffset = [&Offset](size_t Size) {
273     uint64_t PreviousOffset = Offset;
274     Offset += Size;
275     return PreviousOffset;
276   };
277 
278   uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size());
279   uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size());
280   uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size());
281   uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size());
282   uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize);
283   uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size());
284   uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize);
285   uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size());
286   uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size());
287   uint64_t StartOfLinkerOptimizationHint =
288       updateOffset(O.LinkerOptimizationHint.Data.size());
289   uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size());
290   uint64_t StartOfIndirectSymbols =
291       updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
292   uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize());
293   uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size());
294 
295   uint64_t StartOfCodeSignature = Offset;
296   uint32_t CodeSignatureSize = 0;
297   if (O.CodeSignatureCommandIndex) {
298     StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
299 
300     // Note: These calculations are to be kept in sync with the same
301     // calculations performed in LLD's CodeSignatureSection.
302     const uint32_t AllHeadersSize =
303         alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
304                 CodeSignature.Align);
305     const uint32_t BlockCount =
306         (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
307         CodeSignature.BlockSize;
308     const uint32_t Size =
309         alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
310                 CodeSignature.Align);
311 
312     CodeSignature.StartOffset = StartOfCodeSignature;
313     CodeSignature.AllHeadersSize = AllHeadersSize;
314     CodeSignature.BlockCount = BlockCount;
315     CodeSignature.OutputFileName = OutputFileName;
316     CodeSignature.Size = Size;
317     CodeSignatureSize = Size;
318   }
319   uint64_t LinkEditSize =
320       StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
321 
322   // Now we have determined the layout of the contents of the __LINKEDIT
323   // segment. Update its load command.
324   if (LinkEditLoadCommand) {
325     MachO::macho_load_command *MLC = LinkEditLoadCommand;
326     switch (LinkEditLoadCommand->load_command_data.cmd) {
327     case MachO::LC_SEGMENT:
328       MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
329       MLC->segment_command_data.fileoff = StartOfLinkEdit;
330       MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
331       MLC->segment_command_data.filesize = LinkEditSize;
332       break;
333     case MachO::LC_SEGMENT_64:
334       MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
335       MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
336       MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
337       MLC->segment_command_64_data.filesize = LinkEditSize;
338       break;
339     }
340   }
341 
342   for (LoadCommand &LC : O.LoadCommands) {
343     auto &MLC = LC.MachOLoadCommand;
344     auto cmd = MLC.load_command_data.cmd;
345     switch (cmd) {
346     case MachO::LC_CODE_SIGNATURE:
347       MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
348       MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
349       break;
350     case MachO::LC_DYLIB_CODE_SIGN_DRS:
351       MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs;
352       MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size();
353       break;
354     case MachO::LC_SYMTAB:
355       MLC.symtab_command_data.symoff = StartOfSymbols;
356       MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
357       MLC.symtab_command_data.stroff = StartOfSymbolStrings;
358       MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
359       break;
360     case MachO::LC_DYSYMTAB: {
361       if (MLC.dysymtab_command_data.ntoc != 0 ||
362           MLC.dysymtab_command_data.nmodtab != 0 ||
363           MLC.dysymtab_command_data.nextrefsyms != 0 ||
364           MLC.dysymtab_command_data.nlocrel != 0 ||
365           MLC.dysymtab_command_data.nextrel != 0)
366         return createStringError(llvm::errc::not_supported,
367                                  "shared library is not yet supported");
368       MLC.dysymtab_command_data.indirectsymoff =
369           O.IndirectSymTable.Symbols.size() ? StartOfIndirectSymbols : 0;
370       MLC.dysymtab_command_data.nindirectsyms =
371           O.IndirectSymTable.Symbols.size();
372       updateDySymTab(MLC);
373       break;
374     }
375     case MachO::LC_DATA_IN_CODE:
376       MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
377       MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
378       break;
379     case MachO::LC_LINKER_OPTIMIZATION_HINT:
380       MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint;
381       MLC.linkedit_data_command_data.datasize =
382           O.LinkerOptimizationHint.Data.size();
383       break;
384     case MachO::LC_FUNCTION_STARTS:
385       MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
386       MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
387       break;
388     case MachO::LC_DYLD_CHAINED_FIXUPS:
389       MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
390       MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
391       break;
392     case MachO::LC_DYLD_EXPORTS_TRIE:
393       MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
394       MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize;
395       break;
396     case MachO::LC_DYLD_INFO:
397     case MachO::LC_DYLD_INFO_ONLY:
398       MLC.dyld_info_command_data.rebase_off =
399           O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
400       MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
401       MLC.dyld_info_command_data.bind_off =
402           O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
403       MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
404       MLC.dyld_info_command_data.weak_bind_off =
405           O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
406       MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
407       MLC.dyld_info_command_data.lazy_bind_off =
408           O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
409       MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
410       MLC.dyld_info_command_data.export_off =
411           O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
412       MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize;
413       break;
414     // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
415     // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
416     // relative virtual address. At the moment modification of the __TEXT
417     // segment of executables isn't supported anyway (e.g. data in code entries
418     // are not recalculated). Moreover, in general
419     // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
420     // without making additional assumptions (e.g. that the entire __TEXT
421     // segment should be encrypted) we do not know how to recalculate the
422     // boundaries of the encrypted part. For now just copy over these load
423     // commands until we encounter a real world usecase where
424     // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
425     case MachO::LC_ENCRYPTION_INFO:
426     case MachO::LC_ENCRYPTION_INFO_64:
427     case MachO::LC_LOAD_DYLINKER:
428     case MachO::LC_MAIN:
429     case MachO::LC_RPATH:
430     case MachO::LC_SEGMENT:
431     case MachO::LC_SEGMENT_64:
432     case MachO::LC_VERSION_MIN_MACOSX:
433     case MachO::LC_VERSION_MIN_IPHONEOS:
434     case MachO::LC_VERSION_MIN_TVOS:
435     case MachO::LC_VERSION_MIN_WATCHOS:
436     case MachO::LC_BUILD_VERSION:
437     case MachO::LC_ID_DYLIB:
438     case MachO::LC_LOAD_DYLIB:
439     case MachO::LC_LOAD_WEAK_DYLIB:
440     case MachO::LC_UUID:
441     case MachO::LC_SOURCE_VERSION:
442     case MachO::LC_THREAD:
443     case MachO::LC_UNIXTHREAD:
444     case MachO::LC_SUB_FRAMEWORK:
445     case MachO::LC_SUB_UMBRELLA:
446     case MachO::LC_SUB_CLIENT:
447     case MachO::LC_SUB_LIBRARY:
448     case MachO::LC_LINKER_OPTION:
449       // Nothing to update.
450       break;
451     default:
452       // Abort if it's unsupported in order to prevent corrupting the object.
453       return createStringError(llvm::errc::not_supported,
454                                "unsupported load command (cmd=0x%x)", cmd);
455     }
456   }
457 
458   return Error::success();
459 }
460 
461 Error MachOLayoutBuilder::layout() {
462   O.Header.NCmds = O.LoadCommands.size();
463   O.Header.SizeOfCmds = computeSizeOfCmds();
464   constructStringTable();
465   updateSymbolIndexes();
466   uint64_t Offset = layoutSegments();
467   Offset = layoutRelocations(Offset);
468   return layoutTail(Offset);
469 }
470