xref: /llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp (revision 1a830aa1fe1e88749b563fefe18382842e0cff90)
1 //===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOReader.h"
10 #include "MachOObject.h"
11 #include "llvm/BinaryFormat/MachO.h"
12 #include "llvm/Object/MachO.h"
13 #include "llvm/Support/SystemZ/zOSSupport.h"
14 #include <memory>
15 
16 using namespace llvm;
17 using namespace llvm::objcopy;
18 using namespace llvm::objcopy::macho;
19 
20 void MachOReader::readHeader(Object &O) const {
21   O.Header.Magic = MachOObj.getHeader().magic;
22   O.Header.CPUType = MachOObj.getHeader().cputype;
23   O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
24   O.Header.FileType = MachOObj.getHeader().filetype;
25   O.Header.NCmds = MachOObj.getHeader().ncmds;
26   O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
27   O.Header.Flags = MachOObj.getHeader().flags;
28 }
29 
30 template <typename SectionType>
31 static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {
32   StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
33   StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
34   Section S(SegName, SectName);
35   S.Index = Index;
36   S.Addr = Sec.addr;
37   S.Size = Sec.size;
38   S.OriginalOffset = Sec.offset;
39   S.Align = Sec.align;
40   S.RelOff = Sec.reloff;
41   S.NReloc = Sec.nreloc;
42   S.Flags = Sec.flags;
43   S.Reserved1 = Sec.reserved1;
44   S.Reserved2 = Sec.reserved2;
45   S.Reserved3 = 0;
46   return S;
47 }
48 
49 Section constructSection(const MachO::section &Sec, uint32_t Index) {
50   return constructSectionCommon(Sec, Index);
51 }
52 
53 Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {
54   Section S = constructSectionCommon(Sec, Index);
55   S.Reserved3 = Sec.reserved3;
56   return S;
57 }
58 
59 template <typename SectionType, typename SegmentType>
60 Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
61     const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
62     const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {
63   std::vector<std::unique_ptr<Section>> Sections;
64   for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
65                                                          sizeof(SegmentType)),
66             End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +
67                                                         LoadCmd.C.cmdsize);
68        Curr < End; ++Curr) {
69     SectionType Sec;
70     memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr),
71            sizeof(SectionType));
72 
73     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
74       MachO::swapStruct(Sec);
75 
76     Sections.push_back(
77         std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
78 
79     Section &S = *Sections.back();
80 
81     Expected<object::SectionRef> SecRef =
82         MachOObj.getSection(NextSectionIndex++);
83     if (!SecRef)
84       return SecRef.takeError();
85 
86     Expected<ArrayRef<uint8_t>> Data =
87         MachOObj.getSectionContents(SecRef->getRawDataRefImpl());
88     if (!Data)
89       return Data.takeError();
90 
91     S.Content =
92         StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());
93 
94     const uint32_t CPUType = MachOObj.getHeader().cputype;
95     S.Relocations.reserve(S.NReloc);
96     for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
97               RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
98          RI != RE; ++RI) {
99       RelocationInfo R;
100       R.Symbol = nullptr; // We'll fill this field later.
101       R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
102       R.Scattered = MachOObj.isRelocationScattered(R.Info);
103       unsigned Type = MachOObj.getAnyRelocationType(R.Info);
104       // TODO Support CPU_TYPE_ARM.
105       R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&
106                                     Type == MachO::ARM64_RELOC_ADDEND);
107       R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);
108       S.Relocations.push_back(R);
109     }
110 
111     assert(S.NReloc == S.Relocations.size() &&
112            "Incorrect number of relocations");
113   }
114   return std::move(Sections);
115 }
116 
117 Error MachOReader::readLoadCommands(Object &O) const {
118   // For MachO sections indices start from 1.
119   uint32_t NextSectionIndex = 1;
120   static constexpr char TextSegmentName[] = "__TEXT";
121   for (auto LoadCmd : MachOObj.load_commands()) {
122     LoadCommand LC;
123     switch (LoadCmd.C.cmd) {
124     case MachO::LC_CODE_SIGNATURE:
125       O.CodeSignatureCommandIndex = O.LoadCommands.size();
126       break;
127     case MachO::LC_SEGMENT:
128       // LoadCmd.Ptr might not be aligned temporarily as
129       // MachO::segment_command requires, but the segname char pointer do not
130       // have alignment restrictions.
131       if (StringRef(reinterpret_cast<const char *>(
132               LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
133           TextSegmentName)
134         O.TextSegmentCommandIndex = O.LoadCommands.size();
135 
136       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
137               extractSections<MachO::section, MachO::segment_command>(
138                   LoadCmd, MachOObj, NextSectionIndex))
139         LC.Sections = std::move(*Sections);
140       else
141         return Sections.takeError();
142       break;
143     case MachO::LC_SEGMENT_64:
144       // LoadCmd.Ptr might not be aligned temporarily as
145       // MachO::segment_command_64 requires, but the segname char pointer do
146       // not have alignment restrictions.
147       if (StringRef(reinterpret_cast<const char *>(
148               LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
149           TextSegmentName)
150         O.TextSegmentCommandIndex = O.LoadCommands.size();
151 
152       if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
153               extractSections<MachO::section_64, MachO::segment_command_64>(
154                   LoadCmd, MachOObj, NextSectionIndex))
155         LC.Sections = std::move(*Sections);
156       else
157         return Sections.takeError();
158       break;
159     case MachO::LC_SYMTAB:
160       O.SymTabCommandIndex = O.LoadCommands.size();
161       break;
162     case MachO::LC_DYSYMTAB:
163       O.DySymTabCommandIndex = O.LoadCommands.size();
164       break;
165     case MachO::LC_DYLD_INFO:
166     case MachO::LC_DYLD_INFO_ONLY:
167       O.DyLdInfoCommandIndex = O.LoadCommands.size();
168       break;
169     case MachO::LC_DATA_IN_CODE:
170       O.DataInCodeCommandIndex = O.LoadCommands.size();
171       break;
172     case MachO::LC_LINKER_OPTIMIZATION_HINT:
173       O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();
174       break;
175     case MachO::LC_FUNCTION_STARTS:
176       O.FunctionStartsCommandIndex = O.LoadCommands.size();
177       break;
178     case MachO::LC_DYLIB_CODE_SIGN_DRS:
179       O.DylibCodeSignDRsIndex = O.LoadCommands.size();
180       break;
181     case MachO::LC_DYLD_EXPORTS_TRIE:
182       O.ExportsTrieCommandIndex = O.LoadCommands.size();
183       break;
184     case MachO::LC_DYLD_CHAINED_FIXUPS:
185       O.ChainedFixupsCommandIndex = O.LoadCommands.size();
186       break;
187     case MachO::LC_ENCRYPTION_INFO:
188     case MachO::LC_ENCRYPTION_INFO_64:
189       O.EncryptionInfoCommandIndex = O.LoadCommands.size();
190       break;
191     }
192 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
193   case MachO::LCName:                                                          \
194     memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr,        \
195            sizeof(MachO::LCStruct));                                           \
196     if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)                  \
197       MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data);                  \
198     if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct))                           \
199       LC.Payload = ArrayRef<uint8_t>(                                          \
200           reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +       \
201               sizeof(MachO::LCStruct),                                         \
202           LoadCmd.C.cmdsize - sizeof(MachO::LCStruct));                        \
203     break;
204 
205     switch (LoadCmd.C.cmd) {
206     default:
207       memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
208              sizeof(MachO::load_command));
209       if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
210         MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
211       if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))
212         LC.Payload = ArrayRef<uint8_t>(
213             reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
214                 sizeof(MachO::load_command),
215             LoadCmd.C.cmdsize - sizeof(MachO::load_command));
216       break;
217 #include "llvm/BinaryFormat/MachO.def"
218     }
219     O.LoadCommands.push_back(std::move(LC));
220   }
221   return Error::success();
222 }
223 
224 template <typename nlist_t>
225 SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
226   assert(nlist.n_strx < StrTable.size() &&
227          "n_strx exceeds the size of the string table");
228   SymbolEntry SE;
229   SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
230   SE.n_type = nlist.n_type;
231   SE.n_sect = nlist.n_sect;
232   SE.n_desc = nlist.n_desc;
233   SE.n_value = nlist.n_value;
234   return SE;
235 }
236 
237 void MachOReader::readSymbolTable(Object &O) const {
238   StringRef StrTable = MachOObj.getStringTableData();
239   for (auto Symbol : MachOObj.symbols()) {
240     SymbolEntry SE =
241         (MachOObj.is64Bit()
242              ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
243                                                   Symbol.getRawDataRefImpl()))
244              : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
245                                                   Symbol.getRawDataRefImpl())));
246 
247     O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
248   }
249 }
250 
251 void MachOReader::setSymbolInRelocationInfo(Object &O) const {
252   std::vector<const Section *> Sections;
253   for (auto &LC : O.LoadCommands)
254     for (std::unique_ptr<Section> &Sec : LC.Sections)
255       Sections.push_back(Sec.get());
256 
257   for (LoadCommand &LC : O.LoadCommands)
258     for (std::unique_ptr<Section> &Sec : LC.Sections)
259       for (auto &Reloc : Sec->Relocations)
260         if (!Reloc.Scattered && !Reloc.IsAddend) {
261           const uint32_t SymbolNum =
262               Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());
263           if (Reloc.Extern) {
264             Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);
265           } else {
266             // FIXME: Refactor error handling in MachOReader and report an error
267             // if we encounter an invalid relocation.
268             assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&
269                    "Invalid section index.");
270             Reloc.Sec = Sections[SymbolNum - 1];
271           }
272         }
273 }
274 
275 void MachOReader::readRebaseInfo(Object &O) const {
276   O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
277 }
278 
279 void MachOReader::readBindInfo(Object &O) const {
280   O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
281 }
282 
283 void MachOReader::readWeakBindInfo(Object &O) const {
284   O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
285 }
286 
287 void MachOReader::readLazyBindInfo(Object &O) const {
288   O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
289 }
290 
291 void MachOReader::readExportInfo(Object &O) const {
292   // This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE
293   ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();
294   if (Trie.empty())
295     Trie = MachOObj.getDyldExportsTrie();
296   O.Exports.Trie = Trie;
297 }
298 
299 void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,
300                                LinkData &LD) const {
301   if (!LCIndex)
302     return;
303   const MachO::linkedit_data_command &LC =
304       O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
305   LD.Data =
306       arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
307 }
308 
309 void MachOReader::readDataInCodeData(Object &O) const {
310   return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
311 }
312 
313 void MachOReader::readLinkerOptimizationHint(Object &O) const {
314   return readLinkData(O, O.LinkerOptimizationHintCommandIndex,
315                       O.LinkerOptimizationHint);
316 }
317 
318 void MachOReader::readFunctionStartsData(Object &O) const {
319   return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
320 }
321 
322 void MachOReader::readDylibCodeSignDRs(Object &O) const {
323   return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs);
324 }
325 
326 void MachOReader::readExportsTrie(Object &O) const {
327   return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
328 }
329 
330 void MachOReader::readChainedFixups(Object &O) const {
331   return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
332 }
333 
334 void MachOReader::readIndirectSymbolTable(Object &O) const {
335   MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
336   constexpr uint32_t AbsOrLocalMask =
337       MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;
338   for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {
339     uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);
340     if ((Index & AbsOrLocalMask) != 0)
341       O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt);
342     else
343       O.IndirectSymTable.Symbols.emplace_back(
344           Index, O.SymTable.getSymbolByIndex(Index));
345   }
346 }
347 
348 void MachOReader::readSwiftVersion(Object &O) const {
349   struct ObjCImageInfo {
350     uint32_t Version;
351     uint32_t Flags;
352   } ImageInfo;
353 
354   for (const LoadCommand &LC : O.LoadCommands)
355     for (const std::unique_ptr<Section> &Sec : LC.Sections)
356       if (Sec->Sectname == "__objc_imageinfo" &&
357           (Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||
358            Sec->Segname == "__DATA_DIRTY") &&
359           Sec->Content.size() >= sizeof(ObjCImageInfo)) {
360         memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));
361         if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
362           sys::swapByteOrder(ImageInfo.Version);
363           sys::swapByteOrder(ImageInfo.Flags);
364         }
365         O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;
366         return;
367       }
368 }
369 
370 Expected<std::unique_ptr<Object>> MachOReader::create() const {
371   auto Obj = std::make_unique<Object>();
372   readHeader(*Obj);
373   if (Error E = readLoadCommands(*Obj))
374     return std::move(E);
375   readSymbolTable(*Obj);
376   setSymbolInRelocationInfo(*Obj);
377   readRebaseInfo(*Obj);
378   readBindInfo(*Obj);
379   readWeakBindInfo(*Obj);
380   readLazyBindInfo(*Obj);
381   readExportInfo(*Obj);
382   readDataInCodeData(*Obj);
383   readLinkerOptimizationHint(*Obj);
384   readFunctionStartsData(*Obj);
385   readDylibCodeSignDRs(*Obj);
386   readExportsTrie(*Obj);
387   readChainedFixups(*Obj);
388   readIndirectSymbolTable(*Obj);
389   readSwiftVersion(*Obj);
390   return std::move(Obj);
391 }
392