xref: /llvm-project/llvm/tools/obj2yaml/macho2yaml.cpp (revision ec41462d7a7d2fcd74dcf1c60218f134fcfd55b2)
1 //===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "obj2yaml.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/Object/MachOUniversal.h"
12 #include "llvm/ObjectYAML/DWARFYAML.h"
13 #include "llvm/ObjectYAML/ObjectYAML.h"
14 #include "llvm/Support/Errc.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/ErrorHandling.h"
17 #include "llvm/Support/LEB128.h"
18 #include "llvm/Support/SystemZ/zOSSupport.h"
19 
20 #include <string.h> // for memcpy
21 
22 using namespace llvm;
23 
24 class MachODumper {
25 
26   template <typename StructType>
27   Expected<const char *> processLoadCommandData(
28       MachOYAML::LoadCommand &LC,
29       const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
30       MachOYAML::Object &Y);
31 
32   const object::MachOObjectFile &Obj;
33   std::unique_ptr<DWARFContext> DWARFCtx;
34   unsigned RawSegment;
35   void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y);
36   Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y);
37   void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y);
38   void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y);
39   void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y);
40   void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes,
41                        ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false);
42   void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y);
43   void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y);
44   void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y);
45   void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y);
46   void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y);
47 
48   template <typename SectionType>
49   Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec,
50                                                       size_t SecIndex);
51   template <typename SectionType>
52   Expected<MachOYAML::Section> constructSection(SectionType Sec,
53                                                 size_t SecIndex);
54   template <typename SectionType, typename SegmentType>
55   Expected<const char *>
56   extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
57                   std::vector<MachOYAML::Section> &Sections,
58                   MachOYAML::Object &Y);
59 
60 public:
MachODumper(const object::MachOObjectFile & O,std::unique_ptr<DWARFContext> DCtx,unsigned RawSegments)61   MachODumper(const object::MachOObjectFile &O,
62               std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments)
63       : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {}
64   Expected<std::unique_ptr<MachOYAML::Object>> dump();
65 };
66 
67 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
68   case MachO::LCName:                                                          \
69     memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr,                    \
70            sizeof(MachO::LCStruct));                                           \
71     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)                       \
72       MachO::swapStruct(LC.Data.LCStruct##_data);                              \
73     if (Expected<const char *> ExpectedEndPtr =                                \
74             processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get()))    \
75       EndPtr = *ExpectedEndPtr;                                                \
76     else                                                                       \
77       return ExpectedEndPtr.takeError();                                       \
78     break;
79 
80 template <typename SectionType>
81 Expected<MachOYAML::Section>
constructSectionCommon(SectionType Sec,size_t SecIndex)82 MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) {
83   MachOYAML::Section TempSec;
84   memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
85   memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
86   TempSec.addr = Sec.addr;
87   TempSec.size = Sec.size;
88   TempSec.offset = Sec.offset;
89   TempSec.align = Sec.align;
90   TempSec.reloff = Sec.reloff;
91   TempSec.nreloc = Sec.nreloc;
92   TempSec.flags = Sec.flags;
93   TempSec.reserved1 = Sec.reserved1;
94   TempSec.reserved2 = Sec.reserved2;
95   TempSec.reserved3 = 0;
96   if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE))
97     TempSec.content =
98         yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
99 
100   if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) {
101     TempSec.relocations.reserve(TempSec.nreloc);
102     for (const object::RelocationRef &Reloc : SecRef->relocations()) {
103       const object::DataRefImpl Rel = Reloc.getRawDataRefImpl();
104       const MachO::any_relocation_info RE = Obj.getRelocation(Rel);
105       MachOYAML::Relocation R;
106       R.address = Obj.getAnyRelocationAddress(RE);
107       R.is_pcrel = Obj.getAnyRelocationPCRel(RE);
108       R.length = Obj.getAnyRelocationLength(RE);
109       R.type = Obj.getAnyRelocationType(RE);
110       R.is_scattered = Obj.isRelocationScattered(RE);
111       R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE));
112       R.is_extern =
113           (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE));
114       R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0);
115       TempSec.relocations.push_back(R);
116     }
117   } else {
118     return SecRef.takeError();
119   }
120   return TempSec;
121 }
122 
123 template <>
constructSection(MachO::section Sec,size_t SecIndex)124 Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec,
125                                                            size_t SecIndex) {
126   Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
127   if (TempSec)
128     TempSec->reserved3 = 0;
129   return TempSec;
130 }
131 
132 template <>
133 Expected<MachOYAML::Section>
constructSection(MachO::section_64 Sec,size_t SecIndex)134 MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) {
135   Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex);
136   if (TempSec)
137     TempSec->reserved3 = Sec.reserved3;
138   return TempSec;
139 }
140 
dumpDebugSection(StringRef SecName,DWARFContext & DCtx,DWARFYAML::Data & DWARF)141 static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx,
142                               DWARFYAML::Data &DWARF) {
143   if (SecName == "__debug_abbrev")
144     return dumpDebugAbbrev(DCtx, DWARF);
145   if (SecName == "__debug_aranges")
146     return dumpDebugARanges(DCtx, DWARF);
147   if (SecName == "__debug_info") {
148     dumpDebugInfo(DCtx, DWARF);
149     return Error::success();
150   }
151   if (SecName == "__debug_line") {
152     dumpDebugLines(DCtx, DWARF);
153     return Error::success();
154   }
155   if (SecName.starts_with("__debug_pub")) {
156     // FIXME: We should extract pub-section dumpers from this function.
157     dumpDebugPubSections(DCtx, DWARF);
158     return Error::success();
159   }
160   if (SecName == "__debug_ranges")
161     return dumpDebugRanges(DCtx, DWARF);
162   if (SecName == "__debug_str")
163     return dumpDebugStrings(DCtx, DWARF);
164   return createStringError(errc::not_supported,
165                            "dumping " + SecName + " section is not supported");
166 }
167 
168 template <typename SectionType, typename SegmentType>
extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,std::vector<MachOYAML::Section> & Sections,MachOYAML::Object & Y)169 Expected<const char *> MachODumper::extractSections(
170     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
171     std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) {
172   auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
173   const SectionType *Curr =
174       reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
175   for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
176     SectionType Sec;
177     memcpy((void *)&Sec, Curr, sizeof(SectionType));
178     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
179       MachO::swapStruct(Sec);
180     // For MachO section indices start from 1.
181     if (Expected<MachOYAML::Section> S =
182             constructSection(Sec, Sections.size() + 1)) {
183       StringRef SecName(S->sectname);
184 
185       // Copy data sections if requested.
186       if ((RawSegment & ::RawSegments::data) &&
187           StringRef(S->segname).starts_with("__DATA"))
188         S->content =
189             yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size));
190 
191       if (SecName.starts_with("__debug_")) {
192         // If the DWARF section cannot be successfully parsed, emit raw content
193         // instead of an entry in the DWARF section of the YAML.
194         if (Error Err = dumpDebugSection(SecName, *DWARFCtx, Y.DWARF))
195           consumeError(std::move(Err));
196         else
197           S->content.reset();
198       }
199       Sections.push_back(std::move(*S));
200     } else
201       return S.takeError();
202   }
203   return reinterpret_cast<const char *>(Curr);
204 }
205 
206 template <typename StructType>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)207 Expected<const char *> MachODumper::processLoadCommandData(
208     MachOYAML::LoadCommand &LC,
209     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
210     MachOYAML::Object &Y) {
211   return LoadCmd.Ptr + sizeof(StructType);
212 }
213 
214 template <>
215 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)216 MachODumper::processLoadCommandData<MachO::segment_command>(
217     MachOYAML::LoadCommand &LC,
218     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
219     MachOYAML::Object &Y) {
220   return extractSections<MachO::section, MachO::segment_command>(
221       LoadCmd, LC.Sections, Y);
222 }
223 
224 template <>
225 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)226 MachODumper::processLoadCommandData<MachO::segment_command_64>(
227     MachOYAML::LoadCommand &LC,
228     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
229     MachOYAML::Object &Y) {
230   return extractSections<MachO::section_64, MachO::segment_command_64>(
231       LoadCmd, LC.Sections, Y);
232 }
233 
234 template <typename StructType>
235 const char *
readString(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd)236 readString(MachOYAML::LoadCommand &LC,
237            const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) {
238   auto Start = LoadCmd.Ptr + sizeof(StructType);
239   auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType);
240   auto Size = strnlen(Start, MaxSize);
241   LC.Content = StringRef(Start, Size).str();
242   return Start + Size;
243 }
244 
245 template <>
246 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)247 MachODumper::processLoadCommandData<MachO::dylib_command>(
248     MachOYAML::LoadCommand &LC,
249     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
250     MachOYAML::Object &Y) {
251   return readString<MachO::dylib_command>(LC, LoadCmd);
252 }
253 
254 template <>
255 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)256 MachODumper::processLoadCommandData<MachO::dylinker_command>(
257     MachOYAML::LoadCommand &LC,
258     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
259     MachOYAML::Object &Y) {
260   return readString<MachO::dylinker_command>(LC, LoadCmd);
261 }
262 
263 template <>
264 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)265 MachODumper::processLoadCommandData<MachO::rpath_command>(
266     MachOYAML::LoadCommand &LC,
267     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
268     MachOYAML::Object &Y) {
269   return readString<MachO::rpath_command>(LC, LoadCmd);
270 }
271 
272 template <>
273 Expected<const char *>
processLoadCommandData(MachOYAML::LoadCommand & LC,const llvm::object::MachOObjectFile::LoadCommandInfo & LoadCmd,MachOYAML::Object & Y)274 MachODumper::processLoadCommandData<MachO::build_version_command>(
275     MachOYAML::LoadCommand &LC,
276     const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd,
277     MachOYAML::Object &Y) {
278   auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command);
279   auto NTools = LC.Data.build_version_command_data.ntools;
280   for (unsigned i = 0; i < NTools; ++i) {
281     auto Curr = Start + i * sizeof(MachO::build_tool_version);
282     MachO::build_tool_version BV;
283     memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version));
284     if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
285       MachO::swapStruct(BV);
286     LC.Tools.push_back(BV);
287   }
288   return Start + NTools * sizeof(MachO::build_tool_version);
289 }
290 
dump()291 Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() {
292   auto Y = std::make_unique<MachOYAML::Object>();
293   Y->IsLittleEndian = Obj.isLittleEndian();
294   dumpHeader(Y);
295   if (Error Err = dumpLoadCommands(Y))
296     return std::move(Err);
297   if (RawSegment & ::RawSegments::linkedit)
298     Y->RawLinkEditSegment =
299         yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT"));
300   else
301     dumpLinkEdit(Y);
302 
303   return std::move(Y);
304 }
305 
dumpHeader(std::unique_ptr<MachOYAML::Object> & Y)306 void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) {
307   Y->Header.magic = Obj.getHeader().magic;
308   Y->Header.cputype = Obj.getHeader().cputype;
309   Y->Header.cpusubtype = Obj.getHeader().cpusubtype;
310   Y->Header.filetype = Obj.getHeader().filetype;
311   Y->Header.ncmds = Obj.getHeader().ncmds;
312   Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds;
313   Y->Header.flags = Obj.getHeader().flags;
314   Y->Header.reserved = 0;
315 }
316 
dumpLoadCommands(std::unique_ptr<MachOYAML::Object> & Y)317 Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
318   for (auto LoadCmd : Obj.load_commands()) {
319     MachOYAML::LoadCommand LC;
320     const char *EndPtr = LoadCmd.Ptr;
321     switch (LoadCmd.C.cmd) {
322     default:
323       memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr,
324              sizeof(MachO::load_command));
325       if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
326         MachO::swapStruct(LC.Data.load_command_data);
327       if (Expected<const char *> ExpectedEndPtr =
328               processLoadCommandData<MachO::load_command>(LC, LoadCmd, *Y))
329         EndPtr = *ExpectedEndPtr;
330       else
331         return ExpectedEndPtr.takeError();
332       break;
333 #include "llvm/BinaryFormat/MachO.def"
334     }
335     auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
336     if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
337                      [](const char C) { return C == 0; })) {
338       LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr,
339                              &EndPtr[RemainingBytes]);
340       RemainingBytes = 0;
341     }
342     LC.ZeroPadBytes = RemainingBytes;
343     Y->LoadCommands.push_back(std::move(LC));
344   }
345   return Error::success();
346 }
347 
dumpLinkEdit(std::unique_ptr<MachOYAML::Object> & Y)348 void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) {
349   dumpRebaseOpcodes(Y);
350   dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes());
351   dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes,
352                   Obj.getDyldInfoWeakBindOpcodes());
353   dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(),
354                   true);
355   dumpExportTrie(Y);
356   dumpSymbols(Y);
357   dumpIndirectSymbols(Y);
358   dumpFunctionStarts(Y);
359   dumpChainedFixups(Y);
360   dumpDataInCode(Y);
361 }
362 
dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> & Y)363 void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) {
364   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
365 
366   auto FunctionStarts = Obj.getFunctionStarts();
367   for (auto Addr : FunctionStarts)
368     LEData.FunctionStarts.push_back(Addr);
369 }
370 
dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> & Y)371 void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) {
372   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
373 
374   auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes();
375   for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end();
376        ++OpCode) {
377     MachOYAML::RebaseOpcode RebaseOp;
378     RebaseOp.Opcode =
379         static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK);
380     RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK;
381 
382     unsigned Count;
383     uint64_t ULEB = 0;
384 
385     switch (RebaseOp.Opcode) {
386     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
387 
388       ULEB = decodeULEB128(OpCode + 1, &Count);
389       RebaseOp.ExtraData.push_back(ULEB);
390       OpCode += Count;
391       [[fallthrough]];
392     // Intentionally no break here -- This opcode has two ULEB values
393     case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
394     case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
395     case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
396     case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
397 
398       ULEB = decodeULEB128(OpCode + 1, &Count);
399       RebaseOp.ExtraData.push_back(ULEB);
400       OpCode += Count;
401       break;
402     default:
403       break;
404     }
405 
406     LEData.RebaseOpcodes.push_back(RebaseOp);
407 
408     if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE)
409       break;
410   }
411 }
412 
ReadStringRef(const uint8_t * Start)413 StringRef ReadStringRef(const uint8_t *Start) {
414   const uint8_t *Itr = Start;
415   for (; *Itr; ++Itr)
416     ;
417   return StringRef(reinterpret_cast<const char *>(Start), Itr - Start);
418 }
419 
dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> & BindOpcodes,ArrayRef<uint8_t> OpcodeBuffer,bool Lazy)420 void MachODumper::dumpBindOpcodes(
421     std::vector<MachOYAML::BindOpcode> &BindOpcodes,
422     ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) {
423   for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end();
424        ++OpCode) {
425     MachOYAML::BindOpcode BindOp;
426     BindOp.Opcode =
427         static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
428     BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
429 
430     unsigned Count;
431     uint64_t ULEB = 0;
432     int64_t SLEB = 0;
433 
434     switch (BindOp.Opcode) {
435     case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
436       ULEB = decodeULEB128(OpCode + 1, &Count);
437       BindOp.ULEBExtraData.push_back(ULEB);
438       OpCode += Count;
439       [[fallthrough]];
440     // Intentionally no break here -- this opcode has two ULEB values
441 
442     case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
443     case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
444     case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
445     case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
446       ULEB = decodeULEB128(OpCode + 1, &Count);
447       BindOp.ULEBExtraData.push_back(ULEB);
448       OpCode += Count;
449       break;
450 
451     case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
452       SLEB = decodeSLEB128(OpCode + 1, &Count);
453       BindOp.SLEBExtraData.push_back(SLEB);
454       OpCode += Count;
455       break;
456 
457     case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
458       BindOp.Symbol = ReadStringRef(OpCode + 1);
459       OpCode += BindOp.Symbol.size() + 1;
460       break;
461     default:
462       break;
463     }
464 
465     BindOpcodes.push_back(BindOp);
466 
467     // Lazy bindings have DONE opcodes between operations, so we need to keep
468     // processing after a DONE.
469     if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE)
470       break;
471   }
472 }
473 
474 /*!
475  * /brief processes a node from the export trie, and its children.
476  *
477  * To my knowledge there is no documentation of the encoded format of this data
478  * other than in the heads of the Apple linker engineers. To that end hopefully
479  * this comment and the implementation below can serve to light the way for
480  * anyone crazy enough to come down this path in the future.
481  *
482  * This function reads and preserves the trie structure of the export trie. To
483  * my knowledge there is no code anywhere else that reads the data and preserves
484  * the Trie. LD64 (sources available at opensource.apple.com) has a similar
485  * implementation that parses the export trie into a vector. That code as well
486  * as LLVM's libObject MachO implementation were the basis for this.
487  *
488  * The export trie is an encoded trie. The node serialization is a bit awkward.
489  * The below pseudo-code is the best description I've come up with for it.
490  *
491  * struct SerializedNode {
492  *   ULEB128 TerminalSize;
493  *   struct TerminalData { <-- This is only present if TerminalSize > 0
494  *     ULEB128 Flags;
495  *     ULEB128 Address; <-- Present if (! Flags & REEXPORT )
496  *     ULEB128 Other; <-- Present if ( Flags & REEXPORT ||
497  *                                     Flags & STUB_AND_RESOLVER )
498  *     char[] ImportName; <-- Present if ( Flags & REEXPORT )
499  *   }
500  *   uint8_t ChildrenCount;
501  *   Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount];
502  *   SerializedNode Children[ChildrenCount]
503  * }
504  *
505  * Terminal nodes are nodes that represent actual exports. They can appear
506  * anywhere in the tree other than at the root; they do not need to be leaf
507  * nodes. When reading the data out of the trie this routine reads it in-order,
508  * but it puts the child names and offsets directly into the child nodes. This
509  * results in looping over the children twice during serialization and
510  * de-serialization, but it makes the YAML representation more human readable.
511  *
512  * Below is an example of the graph from a "Hello World" executable:
513  *
514  * -------
515  * | ''  |
516  * -------
517  *    |
518  * -------
519  * | '_' |
520  * -------
521  *    |
522  *    |----------------------------------------|
523  *    |                                        |
524  *  ------------------------      ---------------------
525  *  | '_mh_execute_header' |      | 'main'            |
526  *  | Flags: 0x00000000    |      | Flags: 0x00000000 |
527  *  | Addr:  0x00000000    |      | Addr:  0x00001160 |
528  *  ------------------------      ---------------------
529  *
530  * This graph represents the trie for the exports "__mh_execute_header" and
531  * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are
532  * terminal.
533 */
534 
processExportNode(const uint8_t * Start,const uint8_t * CurrPtr,const uint8_t * const End,MachOYAML::ExportEntry & Entry)535 const uint8_t *processExportNode(const uint8_t *Start, const uint8_t *CurrPtr,
536                                  const uint8_t *const End,
537                                  MachOYAML::ExportEntry &Entry) {
538   if (CurrPtr >= End)
539     return CurrPtr;
540   unsigned Count = 0;
541   Entry.TerminalSize = decodeULEB128(CurrPtr, &Count);
542   CurrPtr += Count;
543   if (Entry.TerminalSize != 0) {
544     Entry.Flags = decodeULEB128(CurrPtr, &Count);
545     CurrPtr += Count;
546     if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
547       Entry.Address = 0;
548       Entry.Other = decodeULEB128(CurrPtr, &Count);
549       CurrPtr += Count;
550       Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr));
551     } else {
552       Entry.Address = decodeULEB128(CurrPtr, &Count);
553       CurrPtr += Count;
554       if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
555         Entry.Other = decodeULEB128(CurrPtr, &Count);
556         CurrPtr += Count;
557       } else
558         Entry.Other = 0;
559     }
560   }
561   uint8_t childrenCount = *CurrPtr++;
562   if (childrenCount == 0)
563     return CurrPtr;
564 
565   Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount,
566                         MachOYAML::ExportEntry());
567   for (auto &Child : Entry.Children) {
568     Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr));
569     CurrPtr += Child.Name.length() + 1;
570     Child.NodeOffset = decodeULEB128(CurrPtr, &Count);
571     CurrPtr += Count;
572   }
573   for (auto &Child : Entry.Children) {
574     CurrPtr = processExportNode(Start, Start + Child.NodeOffset, End, Child);
575   }
576   return CurrPtr;
577 }
578 
dumpExportTrie(std::unique_ptr<MachOYAML::Object> & Y)579 void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) {
580   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
581   // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE
582   auto ExportsTrie = Obj.getDyldInfoExportsTrie();
583   if (ExportsTrie.empty())
584     ExportsTrie = Obj.getDyldExportsTrie();
585   processExportNode(ExportsTrie.begin(), ExportsTrie.begin(), ExportsTrie.end(),
586                     LEData.ExportTrie);
587 }
588 
589 template <typename nlist_t>
constructNameList(const nlist_t & nlist)590 MachOYAML::NListEntry constructNameList(const nlist_t &nlist) {
591   MachOYAML::NListEntry NL;
592   NL.n_strx = nlist.n_strx;
593   NL.n_type = nlist.n_type;
594   NL.n_sect = nlist.n_sect;
595   NL.n_desc = nlist.n_desc;
596   NL.n_value = nlist.n_value;
597   return NL;
598 }
599 
dumpSymbols(std::unique_ptr<MachOYAML::Object> & Y)600 void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
601   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
602 
603   for (auto Symbol : Obj.symbols()) {
604     MachOYAML::NListEntry NLE =
605         Obj.is64Bit()
606             ? constructNameList<MachO::nlist_64>(
607                   Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
608             : constructNameList<MachO::nlist>(
609                   Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
610     LEData.NameList.push_back(NLE);
611   }
612 
613   StringRef RemainingTable = Obj.getStringTableData();
614   while (RemainingTable.size() > 0) {
615     auto SymbolPair = RemainingTable.split('\0');
616     RemainingTable = SymbolPair.second;
617     LEData.StringTable.push_back(SymbolPair.first);
618   }
619 }
620 
dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> & Y)621 void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) {
622   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
623 
624   MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
625   for (unsigned i = 0; i < DLC.nindirectsyms; ++i)
626     LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i));
627 }
628 
dumpChainedFixups(std::unique_ptr<MachOYAML::Object> & Y)629 void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) {
630   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
631 
632   for (const auto &LC : Y->LoadCommands) {
633     if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) {
634       const MachO::linkedit_data_command &DC =
635           LC.Data.linkedit_data_command_data;
636       if (DC.dataoff) {
637         assert(DC.dataoff < Obj.getData().size());
638         assert(DC.dataoff + DC.datasize <= Obj.getData().size());
639         const char *Bytes = Obj.getData().data() + DC.dataoff;
640         for (size_t Idx = 0; Idx < DC.datasize; Idx++) {
641           LEData.ChainedFixups.push_back(Bytes[Idx]);
642         }
643       }
644       break;
645     }
646   }
647 }
648 
dumpDataInCode(std::unique_ptr<MachOYAML::Object> & Y)649 void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) {
650   MachOYAML::LinkEditData &LEData = Y->LinkEdit;
651 
652   MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand();
653   uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry);
654   for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) {
655     MachO::data_in_code_entry DICE =
656         Obj.getDataInCodeTableEntry(DIC.dataoff, Idx);
657     MachOYAML::DataInCodeEntry Entry{DICE.offset, DICE.length, DICE.kind};
658     LEData.DataInCode.emplace_back(Entry);
659   }
660 }
661 
macho2yaml(raw_ostream & Out,const object::MachOObjectFile & Obj,unsigned RawSegments)662 Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj,
663                  unsigned RawSegments) {
664   std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj);
665   MachODumper Dumper(Obj, std::move(DCtx), RawSegments);
666   Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump();
667   if (!YAML)
668     return YAML.takeError();
669 
670   yaml::YamlObjectFile YAMLFile;
671   YAMLFile.MachO = std::move(YAML.get());
672 
673   yaml::Output Yout(Out);
674   Yout << YAMLFile;
675   return Error::success();
676 }
677 
macho2yaml(raw_ostream & Out,const object::MachOUniversalBinary & Obj,unsigned RawSegments)678 Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj,
679                  unsigned RawSegments) {
680   yaml::YamlObjectFile YAMLFile;
681   YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary());
682   MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO;
683   YAML.Header.magic = Obj.getMagic();
684   YAML.Header.nfat_arch = Obj.getNumberOfObjects();
685 
686   for (auto Slice : Obj.objects()) {
687     MachOYAML::FatArch arch;
688     arch.cputype = Slice.getCPUType();
689     arch.cpusubtype = Slice.getCPUSubType();
690     arch.offset = Slice.getOffset();
691     arch.size = Slice.getSize();
692     arch.align = Slice.getAlign();
693     arch.reserved = Slice.getReserved();
694     YAML.FatArchs.push_back(arch);
695 
696     auto SliceObj = Slice.getAsObjectFile();
697     if (!SliceObj)
698       return SliceObj.takeError();
699 
700     std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get());
701     MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments);
702     Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump();
703     if (!YAMLObj)
704       return YAMLObj.takeError();
705     YAML.Slices.push_back(*YAMLObj.get());
706   }
707 
708   yaml::Output Yout(Out);
709   Yout << YAML;
710   return Error::success();
711 }
712 
macho2yaml(raw_ostream & Out,const object::Binary & Binary,unsigned RawSegments)713 Error macho2yaml(raw_ostream &Out, const object::Binary &Binary,
714                  unsigned RawSegments) {
715   if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary))
716     return macho2yaml(Out, *MachOObj, RawSegments);
717 
718   if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary))
719     return macho2yaml(Out, *MachOObj, RawSegments);
720 
721   llvm_unreachable("unexpected Mach-O file format");
722 }
723