xref: /llvm-project/llvm/lib/MC/MachObjectWriter.cpp (revision a3ba6a7f972dee85cc073bb4c98bd074e9c276d6)
1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmInfoDarwin.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectFileInfo.h"
23 #include "llvm/MC/MCObjectWriter.h"
24 #include "llvm/MC/MCSection.h"
25 #include "llvm/MC/MCSectionMachO.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCSymbolMachO.h"
28 #include "llvm/MC/MCValue.h"
29 #include "llvm/Support/Alignment.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cassert>
38 #include <cstdint>
39 #include <string>
40 #include <utility>
41 #include <vector>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "mc"
46 
47 void MachObjectWriter::reset() {
48   Relocations.clear();
49   IndirectSymBase.clear();
50   SectionAddress.clear();
51   SectionOrder.clear();
52   StringTable.clear();
53   LocalSymbolData.clear();
54   ExternalSymbolData.clear();
55   UndefinedSymbolData.clear();
56   MCObjectWriter::reset();
57 }
58 
59 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
60   // Undefined symbols are always extern.
61   if (S.isUndefined())
62     return true;
63 
64   // References to weak definitions require external relocation entries; the
65   // definition may not always be the one in the same object file.
66   if (cast<MCSymbolMachO>(S).isWeakDefinition())
67     return true;
68 
69   // Otherwise, we can use an internal relocation.
70   return false;
71 }
72 
73 bool MachObjectWriter::
74 MachSymbolData::operator<(const MachSymbolData &RHS) const {
75   return Symbol->getName() < RHS.Symbol->getName();
76 }
77 
78 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
79   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
80     (MCFixupKind) Kind);
81 
82   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
83 }
84 
85 uint64_t
86 MachObjectWriter::getFragmentAddress(const MCAssembler &Asm,
87                                      const MCFragment *Fragment) const {
88   return getSectionAddress(Fragment->getParent()) +
89          Asm.getFragmentOffset(*Fragment);
90 }
91 
92 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
93                                             const MCAssembler &Asm) const {
94   // If this is a variable, then recursively evaluate now.
95   if (S.isVariable()) {
96     if (const MCConstantExpr *C =
97           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
98       return C->getValue();
99 
100     MCValue Target;
101     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Asm, nullptr))
102       report_fatal_error("unable to evaluate offset for variable '" +
103                          S.getName() + "'");
104 
105     // Verify that any used symbols are defined.
106     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
107       report_fatal_error("unable to evaluate offset to undefined symbol '" +
108                          Target.getSymA()->getSymbol().getName() + "'");
109     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
110       report_fatal_error("unable to evaluate offset to undefined symbol '" +
111                          Target.getSymB()->getSymbol().getName() + "'");
112 
113     uint64_t Address = Target.getConstant();
114     if (Target.getSymA())
115       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Asm);
116     if (Target.getSymB())
117       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Asm);
118     return Address;
119   }
120 
121   return getSectionAddress(S.getFragment()->getParent()) +
122          Asm.getSymbolOffset(S);
123 }
124 
125 uint64_t MachObjectWriter::getPaddingSize(const MCAssembler &Asm,
126                                           const MCSection *Sec) const {
127   uint64_t EndAddr = getSectionAddress(Sec) + Asm.getSectionAddressSize(*Sec);
128   unsigned Next = cast<MCSectionMachO>(Sec)->getLayoutOrder() + 1;
129   if (Next >= SectionOrder.size())
130     return 0;
131 
132   const MCSection &NextSec = *SectionOrder[Next];
133   if (NextSec.isVirtualSection())
134     return 0;
135   return offsetToAlignment(EndAddr, NextSec.getAlign());
136 }
137 
138 static bool isSymbolLinkerVisible(const MCSymbol &Symbol) {
139   // Non-temporary labels should always be visible to the linker.
140   if (!Symbol.isTemporary())
141     return true;
142 
143   if (Symbol.isUsedInReloc())
144     return true;
145 
146   return false;
147 }
148 
149 const MCSymbol *MachObjectWriter::getAtom(const MCSymbol &S) const {
150   // Linker visible symbols define atoms.
151   if (isSymbolLinkerVisible(S))
152     return &S;
153 
154   // Absolute and undefined symbols have no defining atom.
155   if (!S.isInSection())
156     return nullptr;
157 
158   // Non-linker visible symbols in sections which can't be atomized have no
159   // defining atom.
160   if (!MCAsmInfoDarwin::isSectionAtomizableBySymbols(
161           *S.getFragment()->getParent()))
162     return nullptr;
163 
164   // Otherwise, return the atom for the containing fragment.
165   return S.getFragment()->getAtom();
166 }
167 
168 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
169                                    unsigned NumLoadCommands,
170                                    unsigned LoadCommandsSize,
171                                    bool SubsectionsViaSymbols) {
172   uint32_t Flags = 0;
173 
174   if (SubsectionsViaSymbols)
175     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
176 
177   // struct mach_header (28 bytes) or
178   // struct mach_header_64 (32 bytes)
179 
180   uint64_t Start = W.OS.tell();
181   (void) Start;
182 
183   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
184 
185   W.write<uint32_t>(TargetObjectWriter->getCPUType());
186   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
187 
188   W.write<uint32_t>(Type);
189   W.write<uint32_t>(NumLoadCommands);
190   W.write<uint32_t>(LoadCommandsSize);
191   W.write<uint32_t>(Flags);
192   if (is64Bit())
193     W.write<uint32_t>(0); // reserved
194 
195   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
196                                            : sizeof(MachO::mach_header)));
197 }
198 
199 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
200   assert(Size >= Str.size());
201   W.OS << Str;
202   W.OS.write_zeros(Size - Str.size());
203 }
204 
205 /// writeSegmentLoadCommand - Write a segment load command.
206 ///
207 /// \param NumSections The number of sections in this segment.
208 /// \param SectionDataSize The total size of the sections.
209 void MachObjectWriter::writeSegmentLoadCommand(
210     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
211     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
212     uint32_t InitProt) {
213   // struct segment_command (56 bytes) or
214   // struct segment_command_64 (72 bytes)
215 
216   uint64_t Start = W.OS.tell();
217   (void) Start;
218 
219   unsigned SegmentLoadCommandSize =
220     is64Bit() ? sizeof(MachO::segment_command_64):
221     sizeof(MachO::segment_command);
222   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
223   W.write<uint32_t>(SegmentLoadCommandSize +
224           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
225                          sizeof(MachO::section)));
226 
227   writeWithPadding(Name, 16);
228   if (is64Bit()) {
229     W.write<uint64_t>(VMAddr);                 // vmaddr
230     W.write<uint64_t>(VMSize); // vmsize
231     W.write<uint64_t>(SectionDataStartOffset); // file offset
232     W.write<uint64_t>(SectionDataSize); // file size
233   } else {
234     W.write<uint32_t>(VMAddr);                 // vmaddr
235     W.write<uint32_t>(VMSize); // vmsize
236     W.write<uint32_t>(SectionDataStartOffset); // file offset
237     W.write<uint32_t>(SectionDataSize); // file size
238   }
239   // maxprot
240   W.write<uint32_t>(MaxProt);
241   // initprot
242   W.write<uint32_t>(InitProt);
243   W.write<uint32_t>(NumSections);
244   W.write<uint32_t>(0); // flags
245 
246   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
247 }
248 
249 void MachObjectWriter::writeSection(const MCAssembler &Asm,
250                                     const MCSection &Sec, uint64_t VMAddr,
251                                     uint64_t FileOffset, unsigned Flags,
252                                     uint64_t RelocationsStart,
253                                     unsigned NumRelocations) {
254   uint64_t SectionSize = Asm.getSectionAddressSize(Sec);
255   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
256 
257   // The offset is unused for virtual sections.
258   if (Section.isVirtualSection()) {
259     assert(Asm.getSectionFileSize(Sec) == 0 && "Invalid file size!");
260     FileOffset = 0;
261   }
262 
263   // struct section (68 bytes) or
264   // struct section_64 (80 bytes)
265 
266   uint64_t Start = W.OS.tell();
267   (void) Start;
268 
269   writeWithPadding(Section.getName(), 16);
270   writeWithPadding(Section.getSegmentName(), 16);
271   if (is64Bit()) {
272     W.write<uint64_t>(VMAddr);      // address
273     W.write<uint64_t>(SectionSize); // size
274   } else {
275     W.write<uint32_t>(VMAddr);      // address
276     W.write<uint32_t>(SectionSize); // size
277   }
278   W.write<uint32_t>(FileOffset);
279 
280   W.write<uint32_t>(Log2(Section.getAlign()));
281   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
282   W.write<uint32_t>(NumRelocations);
283   W.write<uint32_t>(Flags);
284   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
285   W.write<uint32_t>(Section.getStubSize()); // reserved2
286   if (is64Bit())
287     W.write<uint32_t>(0); // reserved3
288 
289   assert(W.OS.tell() - Start ==
290          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
291 }
292 
293 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
294                                               uint32_t NumSymbols,
295                                               uint32_t StringTableOffset,
296                                               uint32_t StringTableSize) {
297   // struct symtab_command (24 bytes)
298 
299   uint64_t Start = W.OS.tell();
300   (void) Start;
301 
302   W.write<uint32_t>(MachO::LC_SYMTAB);
303   W.write<uint32_t>(sizeof(MachO::symtab_command));
304   W.write<uint32_t>(SymbolOffset);
305   W.write<uint32_t>(NumSymbols);
306   W.write<uint32_t>(StringTableOffset);
307   W.write<uint32_t>(StringTableSize);
308 
309   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
310 }
311 
312 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
313                                                 uint32_t NumLocalSymbols,
314                                                 uint32_t FirstExternalSymbol,
315                                                 uint32_t NumExternalSymbols,
316                                                 uint32_t FirstUndefinedSymbol,
317                                                 uint32_t NumUndefinedSymbols,
318                                                 uint32_t IndirectSymbolOffset,
319                                                 uint32_t NumIndirectSymbols) {
320   // struct dysymtab_command (80 bytes)
321 
322   uint64_t Start = W.OS.tell();
323   (void) Start;
324 
325   W.write<uint32_t>(MachO::LC_DYSYMTAB);
326   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
327   W.write<uint32_t>(FirstLocalSymbol);
328   W.write<uint32_t>(NumLocalSymbols);
329   W.write<uint32_t>(FirstExternalSymbol);
330   W.write<uint32_t>(NumExternalSymbols);
331   W.write<uint32_t>(FirstUndefinedSymbol);
332   W.write<uint32_t>(NumUndefinedSymbols);
333   W.write<uint32_t>(0); // tocoff
334   W.write<uint32_t>(0); // ntoc
335   W.write<uint32_t>(0); // modtaboff
336   W.write<uint32_t>(0); // nmodtab
337   W.write<uint32_t>(0); // extrefsymoff
338   W.write<uint32_t>(0); // nextrefsyms
339   W.write<uint32_t>(IndirectSymbolOffset);
340   W.write<uint32_t>(NumIndirectSymbols);
341   W.write<uint32_t>(0); // extreloff
342   W.write<uint32_t>(0); // nextrel
343   W.write<uint32_t>(0); // locreloff
344   W.write<uint32_t>(0); // nlocrel
345 
346   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
347 }
348 
349 MachObjectWriter::MachSymbolData *
350 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
351   for (auto *SymbolData :
352        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
353     for (MachSymbolData &Entry : *SymbolData)
354       if (Entry.Symbol == &Sym)
355         return &Entry;
356 
357   return nullptr;
358 }
359 
360 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
361   const MCSymbol *S = &Sym;
362   while (S->isVariable()) {
363     const MCExpr *Value = S->getVariableValue();
364     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
365     if (!Ref)
366       return *S;
367     S = &Ref->getSymbol();
368   }
369   return *S;
370 }
371 
372 void MachObjectWriter::writeNlist(MachSymbolData &MSD, const MCAssembler &Asm) {
373   const MCSymbol *Symbol = MSD.Symbol;
374   const MCSymbol &Data = *Symbol;
375   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
376   uint8_t SectionIndex = MSD.SectionIndex;
377   uint8_t Type = 0;
378   uint64_t Address = 0;
379   bool IsAlias = Symbol != AliasedSymbol;
380 
381   const MCSymbol &OrigSymbol = *Symbol;
382   MachSymbolData *AliaseeInfo;
383   if (IsAlias) {
384     AliaseeInfo = findSymbolData(*AliasedSymbol);
385     if (AliaseeInfo)
386       SectionIndex = AliaseeInfo->SectionIndex;
387     Symbol = AliasedSymbol;
388     // FIXME: Should this update Data as well?
389   }
390 
391   // Set the N_TYPE bits. See <mach-o/nlist.h>.
392   //
393   // FIXME: Are the prebound or indirect fields possible here?
394   if (IsAlias && Symbol->isUndefined())
395     Type = MachO::N_INDR;
396   else if (Symbol->isUndefined())
397     Type = MachO::N_UNDF;
398   else if (Symbol->isAbsolute())
399     Type = MachO::N_ABS;
400   else
401     Type = MachO::N_SECT;
402 
403   // FIXME: Set STAB bits.
404 
405   if (Data.isPrivateExtern())
406     Type |= MachO::N_PEXT;
407 
408   // Set external bit.
409   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
410     Type |= MachO::N_EXT;
411 
412   // Compute the symbol address.
413   if (IsAlias && Symbol->isUndefined())
414     Address = AliaseeInfo->StringIndex;
415   else if (Symbol->isDefined())
416     Address = getSymbolAddress(OrigSymbol, Asm);
417   else if (Symbol->isCommon()) {
418     // Common symbols are encoded with the size in the address
419     // field, and their alignment in the flags.
420     Address = Symbol->getCommonSize();
421   }
422 
423   // struct nlist (12 bytes)
424 
425   W.write<uint32_t>(MSD.StringIndex);
426   W.OS << char(Type);
427   W.OS << char(SectionIndex);
428 
429   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
430   // value.
431   bool EncodeAsAltEntry =
432     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
433   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
434   if (is64Bit())
435     W.write<uint64_t>(Address);
436   else
437     W.write<uint32_t>(Address);
438 }
439 
440 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
441                                                 uint32_t DataOffset,
442                                                 uint32_t DataSize) {
443   uint64_t Start = W.OS.tell();
444   (void) Start;
445 
446   W.write<uint32_t>(Type);
447   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
448   W.write<uint32_t>(DataOffset);
449   W.write<uint32_t>(DataSize);
450 
451   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
452 }
453 
454 static unsigned ComputeLinkerOptionsLoadCommandSize(
455   const std::vector<std::string> &Options, bool is64Bit)
456 {
457   unsigned Size = sizeof(MachO::linker_option_command);
458   for (const std::string &Option : Options)
459     Size += Option.size() + 1;
460   return alignTo(Size, is64Bit ? 8 : 4);
461 }
462 
463 void MachObjectWriter::writeLinkerOptionsLoadCommand(
464   const std::vector<std::string> &Options)
465 {
466   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
467   uint64_t Start = W.OS.tell();
468   (void) Start;
469 
470   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
471   W.write<uint32_t>(Size);
472   W.write<uint32_t>(Options.size());
473   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
474   for (const std::string &Option : Options) {
475     // Write each string, including the null byte.
476     W.OS << Option << '\0';
477     BytesWritten += Option.size() + 1;
478   }
479 
480   // Pad to a multiple of the pointer size.
481   W.OS.write_zeros(
482       offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
483 
484   assert(W.OS.tell() - Start == Size);
485 }
486 
487 static bool isFixupTargetValid(const MCValue &Target) {
488   // Target is (LHS - RHS + cst).
489   // We don't support the form where LHS is null: -RHS + cst
490   if (!Target.getSymA() && Target.getSymB())
491     return false;
492   return true;
493 }
494 
495 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
496                                         const MCFragment *Fragment,
497                                         const MCFixup &Fixup, MCValue Target,
498                                         uint64_t &FixedValue) {
499   if (!isFixupTargetValid(Target)) {
500     Asm.getContext().reportError(Fixup.getLoc(),
501                                  "unsupported relocation expression");
502     return;
503   }
504 
505   TargetObjectWriter->recordRelocation(this, Asm, Fragment, Fixup, Target,
506                                        FixedValue);
507 }
508 
509 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
510   // This is the point where 'as' creates actual symbols for indirect symbols
511   // (in the following two passes). It would be easier for us to do this sooner
512   // when we see the attribute, but that makes getting the order in the symbol
513   // table much more complicated than it is worth.
514   //
515   // FIXME: Revisit this when the dust settles.
516 
517   // Report errors for use of .indirect_symbol not in a symbol pointer section
518   // or stub section.
519   for (IndirectSymbolData &ISD : Asm.getIndirectSymbols()) {
520     const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
521 
522     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
523         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
524         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
525         Section.getType() != MachO::S_SYMBOL_STUBS) {
526       MCSymbol &Symbol = *ISD.Symbol;
527       report_fatal_error("indirect symbol '" + Symbol.getName() +
528                          "' not in a symbol pointer or stub section");
529     }
530   }
531 
532   // Bind non-lazy symbol pointers first.
533   for (auto [IndirectIndex, ISD] : enumerate(Asm.getIndirectSymbols())) {
534     const auto &Section = cast<MCSectionMachO>(*ISD.Section);
535 
536     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
537         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
538       continue;
539 
540     // Initialize the section indirect symbol base, if necessary.
541     IndirectSymBase.insert(std::make_pair(ISD.Section, IndirectIndex));
542 
543     Asm.registerSymbol(*ISD.Symbol);
544   }
545 
546   // Then lazy symbol pointers and symbol stubs.
547   for (auto [IndirectIndex, ISD] : enumerate(Asm.getIndirectSymbols())) {
548     const auto &Section = cast<MCSectionMachO>(*ISD.Section);
549 
550     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
551         Section.getType() != MachO::S_SYMBOL_STUBS)
552       continue;
553 
554     // Initialize the section indirect symbol base, if necessary.
555     IndirectSymBase.insert(std::make_pair(ISD.Section, IndirectIndex));
556 
557     // Set the symbol type to undefined lazy, but only on construction.
558     //
559     // FIXME: Do not hardcode.
560     if (Asm.registerSymbol(*ISD.Symbol))
561       cast<MCSymbolMachO>(ISD.Symbol)->setReferenceTypeUndefinedLazy(true);
562   }
563 }
564 
565 /// computeSymbolTable - Compute the symbol table data
566 void MachObjectWriter::computeSymbolTable(
567     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
568     std::vector<MachSymbolData> &ExternalSymbolData,
569     std::vector<MachSymbolData> &UndefinedSymbolData) {
570   // Build section lookup table.
571   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
572   unsigned Index = 1;
573   for (MCAssembler::iterator it = Asm.begin(),
574          ie = Asm.end(); it != ie; ++it, ++Index)
575     SectionIndexMap[&*it] = Index;
576   assert(Index <= 256 && "Too many sections!");
577 
578   // Build the string table.
579   for (const MCSymbol &Symbol : Asm.symbols()) {
580     if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible())
581       continue;
582 
583     StringTable.add(Symbol.getName());
584   }
585   StringTable.finalize();
586 
587   // Build the symbol arrays but only for non-local symbols.
588   //
589   // The particular order that we collect and then sort the symbols is chosen to
590   // match 'as'. Even though it doesn't matter for correctness, this is
591   // important for letting us diff .o files.
592   for (const MCSymbol &Symbol : Asm.symbols()) {
593     // Ignore non-linker visible symbols.
594     if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible())
595       continue;
596 
597     if (!Symbol.isExternal() && !Symbol.isUndefined())
598       continue;
599 
600     MachSymbolData MSD;
601     MSD.Symbol = &Symbol;
602     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
603 
604     if (Symbol.isUndefined()) {
605       MSD.SectionIndex = 0;
606       UndefinedSymbolData.push_back(MSD);
607     } else if (Symbol.isAbsolute()) {
608       MSD.SectionIndex = 0;
609       ExternalSymbolData.push_back(MSD);
610     } else {
611       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
612       assert(MSD.SectionIndex && "Invalid section index!");
613       ExternalSymbolData.push_back(MSD);
614     }
615   }
616 
617   // Now add the data for local symbols.
618   for (const MCSymbol &Symbol : Asm.symbols()) {
619     // Ignore non-linker visible symbols.
620     if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible())
621       continue;
622 
623     if (Symbol.isExternal() || Symbol.isUndefined())
624       continue;
625 
626     MachSymbolData MSD;
627     MSD.Symbol = &Symbol;
628     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
629 
630     if (Symbol.isAbsolute()) {
631       MSD.SectionIndex = 0;
632       LocalSymbolData.push_back(MSD);
633     } else {
634       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
635       assert(MSD.SectionIndex && "Invalid section index!");
636       LocalSymbolData.push_back(MSD);
637     }
638   }
639 
640   // External and undefined symbols are required to be in lexicographic order.
641   llvm::sort(ExternalSymbolData);
642   llvm::sort(UndefinedSymbolData);
643 
644   // Set the symbol indices.
645   Index = 0;
646   for (auto *SymbolData :
647        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
648     for (MachSymbolData &Entry : *SymbolData)
649       Entry.Symbol->setIndex(Index++);
650 
651   for (const MCSection &Section : Asm) {
652     for (RelAndSymbol &Rel : Relocations[&Section]) {
653       if (!Rel.Sym)
654         continue;
655 
656       // Set the Index and the IsExtern bit.
657       unsigned Index = Rel.Sym->getIndex();
658       assert(isInt<24>(Index));
659       if (W.Endian == llvm::endianness::little)
660         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
661       else
662         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
663     }
664   }
665 }
666 
667 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm) {
668   // Assign layout order indices to sections.
669   unsigned i = 0;
670   // Compute the section layout order. Virtual sections must go last.
671   for (MCSection &Sec : Asm) {
672     if (!Sec.isVirtualSection()) {
673       SectionOrder.push_back(&Sec);
674       cast<MCSectionMachO>(Sec).setLayoutOrder(i++);
675     }
676   }
677   for (MCSection &Sec : Asm) {
678     if (Sec.isVirtualSection()) {
679       SectionOrder.push_back(&Sec);
680       cast<MCSectionMachO>(Sec).setLayoutOrder(i++);
681     }
682   }
683 
684   uint64_t StartAddress = 0;
685   for (const MCSection *Sec : SectionOrder) {
686     StartAddress = alignTo(StartAddress, Sec->getAlign());
687     SectionAddress[Sec] = StartAddress;
688     StartAddress += Asm.getSectionAddressSize(*Sec);
689 
690     // Explicitly pad the section to match the alignment requirements of the
691     // following one. This is for 'gas' compatibility, it shouldn't
692     /// strictly be necessary.
693     StartAddress += getPaddingSize(Asm, Sec);
694   }
695 }
696 
697 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm) {
698   computeSectionAddresses(Asm);
699 
700   // Create symbol data for any indirect symbols.
701   bindIndirectSymbols(Asm);
702 }
703 
704 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
705     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
706     bool InSet, bool IsPCRel) const {
707   if (InSet)
708     return true;
709 
710   // The effective address is
711   //     addr(atom(A)) + offset(A)
712   //   - addr(atom(B)) - offset(B)
713   // and the offsets are not relocatable, so the fixup is fully resolved when
714   //  addr(atom(A)) - addr(atom(B)) == 0.
715   const MCSymbol &SA = findAliasedSymbol(SymA);
716   const MCSection &SecA = SA.getSection();
717   const MCSection &SecB = *FB.getParent();
718 
719   if (IsPCRel) {
720     // The simple (Darwin, except on x86_64) way of dealing with this was to
721     // assume that any reference to a temporary symbol *must* be a temporary
722     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
723     // relocation to a temporary symbol (in the same section) is fully
724     // resolved. This also works in conjunction with absolutized .set, which
725     // requires the compiler to use .set to absolutize the differences between
726     // symbols which the compiler knows to be assembly time constants, so we
727     // don't need to worry about considering symbol differences fully resolved.
728     //
729     // If the file isn't using sub-sections-via-symbols, we can make the
730     // same assumptions about any symbol that we normally make about
731     // assembler locals.
732 
733     bool hasReliableSymbolDifference = isX86_64();
734     if (!hasReliableSymbolDifference) {
735       if (!SA.isInSection() || &SecA != &SecB ||
736           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
737            Asm.getSubsectionsViaSymbols()))
738         return false;
739       return true;
740     }
741   }
742 
743   // If they are not in the same section, we can't compute the diff.
744   if (&SecA != &SecB)
745     return false;
746 
747   // If the atoms are the same, they are guaranteed to have the same address.
748   return SA.getFragment()->getAtom() == FB.getAtom();
749 }
750 
751 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
752   switch (Type) {
753   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
754   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
755   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
756   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
757   }
758   llvm_unreachable("Invalid mc version min type");
759 }
760 
761 void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) {
762   MCSection *AddrSigSection =
763       Asm.getContext().getObjectFileInfo()->getAddrSigSection();
764   unsigned Log2Size = is64Bit() ? 3 : 2;
765   for (const MCSymbol *S : getAddrsigSyms()) {
766     if (!S->isRegistered())
767       continue;
768     MachO::any_relocation_info MRE;
769     MRE.r_word0 = 0;
770     MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28);
771     addRelocation(S, AddrSigSection, MRE);
772   }
773 }
774 
775 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm) {
776   uint64_t StartOffset = W.OS.tell();
777 
778   populateAddrSigSection(Asm);
779 
780   // Compute symbol table information and bind symbol indices.
781   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
782                      UndefinedSymbolData);
783 
784   if (!Asm.CGProfile.empty()) {
785     MCSection *CGProfileSection = Asm.getContext().getMachOSection(
786         "__LLVM", "__cg_profile", 0, SectionKind::getMetadata());
787     auto &Frag = cast<MCDataFragment>(*CGProfileSection->begin());
788     Frag.getContents().clear();
789     raw_svector_ostream OS(Frag.getContents());
790     for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) {
791       uint32_t FromIndex = CGPE.From->getSymbol().getIndex();
792       uint32_t ToIndex = CGPE.To->getSymbol().getIndex();
793       support::endian::write(OS, FromIndex, W.Endian);
794       support::endian::write(OS, ToIndex, W.Endian);
795       support::endian::write(OS, CGPE.Count, W.Endian);
796     }
797   }
798 
799   unsigned NumSections = Asm.size();
800   const MCAssembler::VersionInfoType &VersionInfo = Asm.getVersionInfo();
801 
802   // The section data starts after the header, the segment load command (and
803   // section headers) and the symbol table.
804   unsigned NumLoadCommands = 1;
805   uint64_t LoadCommandsSize = is64Bit() ?
806     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
807     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
808 
809   // Add the deployment target version info load command size, if used.
810   if (VersionInfo.Major != 0) {
811     ++NumLoadCommands;
812     if (VersionInfo.EmitBuildVersion)
813       LoadCommandsSize += sizeof(MachO::build_version_command);
814     else
815       LoadCommandsSize += sizeof(MachO::version_min_command);
816   }
817 
818   const MCAssembler::VersionInfoType &TargetVariantVersionInfo =
819       Asm.getDarwinTargetVariantVersionInfo();
820 
821   // Add the target variant version info load command size, if used.
822   if (TargetVariantVersionInfo.Major != 0) {
823     ++NumLoadCommands;
824     assert(TargetVariantVersionInfo.EmitBuildVersion &&
825            "target variant should use build version");
826     LoadCommandsSize += sizeof(MachO::build_version_command);
827   }
828 
829   // Add the data-in-code load command size, if used.
830   unsigned NumDataRegions = Asm.getDataRegions().size();
831   if (NumDataRegions) {
832     ++NumLoadCommands;
833     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
834   }
835 
836   // Add the loh load command size, if used.
837   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(Asm, *this);
838   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
839   if (LOHSize) {
840     ++NumLoadCommands;
841     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
842   }
843 
844   // Add the symbol table load command sizes, if used.
845   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
846     UndefinedSymbolData.size();
847   if (NumSymbols) {
848     NumLoadCommands += 2;
849     LoadCommandsSize += (sizeof(MachO::symtab_command) +
850                          sizeof(MachO::dysymtab_command));
851   }
852 
853   // Add the linker option load commands sizes.
854   for (const auto &Option : Asm.getLinkerOptions()) {
855     ++NumLoadCommands;
856     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
857   }
858 
859   // Compute the total size of the section data, as well as its file size and vm
860   // size.
861   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
862                                sizeof(MachO::mach_header)) + LoadCommandsSize;
863   uint64_t SectionDataSize = 0;
864   uint64_t SectionDataFileSize = 0;
865   uint64_t VMSize = 0;
866   for (const MCSection &Sec : Asm) {
867     uint64_t Address = getSectionAddress(&Sec);
868     uint64_t Size = Asm.getSectionAddressSize(Sec);
869     uint64_t FileSize = Asm.getSectionFileSize(Sec);
870     FileSize += getPaddingSize(Asm, &Sec);
871 
872     VMSize = std::max(VMSize, Address + Size);
873 
874     if (Sec.isVirtualSection())
875       continue;
876 
877     SectionDataSize = std::max(SectionDataSize, Address + Size);
878     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
879   }
880 
881   // The section data is padded to pointer size bytes.
882   //
883   // FIXME: Is this machine dependent?
884   unsigned SectionDataPadding =
885       offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4));
886   SectionDataFileSize += SectionDataPadding;
887 
888   // Write the prolog, starting with the header and load command...
889   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
890               Asm.getSubsectionsViaSymbols());
891   uint32_t Prot =
892       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
893   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
894                           SectionDataSize, Prot, Prot);
895 
896   // ... and then the section headers.
897   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
898   for (const MCSection &Section : Asm) {
899     const auto &Sec = cast<MCSectionMachO>(Section);
900     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
901     unsigned NumRelocs = Relocs.size();
902     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
903     unsigned Flags = Sec.getTypeAndAttributes();
904     if (Sec.hasInstructions())
905       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
906     writeSection(Asm, Sec, getSectionAddress(&Sec), SectionStart, Flags,
907                  RelocTableEnd, NumRelocs);
908     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
909   }
910 
911   // Write out the deployment target information, if it's available.
912   auto EmitDeploymentTargetVersion =
913       [&](const MCAssembler::VersionInfoType &VersionInfo) {
914         auto EncodeVersion = [](VersionTuple V) -> uint32_t {
915           assert(!V.empty() && "empty version");
916           unsigned Update = V.getSubminor().value_or(0);
917           unsigned Minor = V.getMinor().value_or(0);
918           assert(Update < 256 && "unencodable update target version");
919           assert(Minor < 256 && "unencodable minor target version");
920           assert(V.getMajor() < 65536 && "unencodable major target version");
921           return Update | (Minor << 8) | (V.getMajor() << 16);
922         };
923         uint32_t EncodedVersion = EncodeVersion(VersionTuple(
924             VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
925         uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
926                                   ? EncodeVersion(VersionInfo.SDKVersion)
927                                   : 0;
928         if (VersionInfo.EmitBuildVersion) {
929           // FIXME: Currently empty tools. Add clang version in the future.
930           W.write<uint32_t>(MachO::LC_BUILD_VERSION);
931           W.write<uint32_t>(sizeof(MachO::build_version_command));
932           W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
933           W.write<uint32_t>(EncodedVersion);
934           W.write<uint32_t>(SDKVersion);
935           W.write<uint32_t>(0); // Empty tools list.
936         } else {
937           MachO::LoadCommandType LCType =
938               getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
939           W.write<uint32_t>(LCType);
940           W.write<uint32_t>(sizeof(MachO::version_min_command));
941           W.write<uint32_t>(EncodedVersion);
942           W.write<uint32_t>(SDKVersion);
943         }
944       };
945   if (VersionInfo.Major != 0)
946     EmitDeploymentTargetVersion(VersionInfo);
947   if (TargetVariantVersionInfo.Major != 0)
948     EmitDeploymentTargetVersion(TargetVariantVersionInfo);
949 
950   // Write the data-in-code load command, if used.
951   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
952   if (NumDataRegions) {
953     uint64_t DataRegionsOffset = RelocTableEnd;
954     uint64_t DataRegionsSize = NumDataRegions * 8;
955     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
956                              DataRegionsSize);
957   }
958 
959   // Write the loh load command, if used.
960   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
961   if (LOHSize)
962     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
963                              DataInCodeTableEnd, LOHSize);
964 
965   // Write the symbol table load command, if used.
966   if (NumSymbols) {
967     unsigned FirstLocalSymbol = 0;
968     unsigned NumLocalSymbols = LocalSymbolData.size();
969     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
970     unsigned NumExternalSymbols = ExternalSymbolData.size();
971     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
972     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
973     unsigned NumIndirectSymbols = Asm.getIndirectSymbols().size();
974     unsigned NumSymTabSymbols =
975       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
976     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
977     uint64_t IndirectSymbolOffset = 0;
978 
979     // If used, the indirect symbols are written after the section data.
980     if (NumIndirectSymbols)
981       IndirectSymbolOffset = LOHTableEnd;
982 
983     // The symbol table is written after the indirect symbol data.
984     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
985 
986     // The string table is written after symbol table.
987     uint64_t StringTableOffset =
988       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
989                                               sizeof(MachO::nlist_64) :
990                                               sizeof(MachO::nlist));
991     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
992                            StringTableOffset, StringTable.getSize());
993 
994     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
995                              FirstExternalSymbol, NumExternalSymbols,
996                              FirstUndefinedSymbol, NumUndefinedSymbols,
997                              IndirectSymbolOffset, NumIndirectSymbols);
998   }
999 
1000   // Write the linker options load commands.
1001   for (const auto &Option : Asm.getLinkerOptions())
1002     writeLinkerOptionsLoadCommand(Option);
1003 
1004   // Write the actual section data.
1005   for (const MCSection &Sec : Asm) {
1006     Asm.writeSectionData(W.OS, &Sec);
1007 
1008     uint64_t Pad = getPaddingSize(Asm, &Sec);
1009     W.OS.write_zeros(Pad);
1010   }
1011 
1012   // Write the extra padding.
1013   W.OS.write_zeros(SectionDataPadding);
1014 
1015   // Write the relocation entries.
1016   for (const MCSection &Sec : Asm) {
1017     // Write the section relocation entries, in reverse order to match 'as'
1018     // (approximately, the exact algorithm is more complicated than this).
1019     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
1020     for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) {
1021       W.write<uint32_t>(Rel.MRE.r_word0);
1022       W.write<uint32_t>(Rel.MRE.r_word1);
1023     }
1024   }
1025 
1026   // Write out the data-in-code region payload, if there is one.
1027   for (MCAssembler::const_data_region_iterator
1028          it = Asm.data_region_begin(), ie = Asm.data_region_end();
1029          it != ie; ++it) {
1030     const DataRegionData *Data = &(*it);
1031     uint64_t Start = getSymbolAddress(*Data->Start, Asm);
1032     uint64_t End;
1033     if (Data->End)
1034       End = getSymbolAddress(*Data->End, Asm);
1035     else
1036       report_fatal_error("Data region not terminated");
1037 
1038     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
1039                       << "  start: " << Start << "(" << Data->Start->getName()
1040                       << ")"
1041                       << "  end: " << End << "(" << Data->End->getName() << ")"
1042                       << "  size: " << End - Start << "\n");
1043     W.write<uint32_t>(Start);
1044     W.write<uint16_t>(End - Start);
1045     W.write<uint16_t>(Data->Kind);
1046   }
1047 
1048   // Write out the loh commands, if there is one.
1049   if (LOHSize) {
1050 #ifndef NDEBUG
1051     unsigned Start = W.OS.tell();
1052 #endif
1053     Asm.getLOHContainer().emit(Asm, *this);
1054     // Pad to a multiple of the pointer size.
1055     W.OS.write_zeros(
1056         offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
1057     assert(W.OS.tell() - Start == LOHSize);
1058   }
1059 
1060   // Write the symbol table data, if used.
1061   if (NumSymbols) {
1062     // Write the indirect symbol entries.
1063     for (auto &ISD : Asm.getIndirectSymbols()) {
1064       // Indirect symbols in the non-lazy symbol pointer section have some
1065       // special handling.
1066       const MCSectionMachO &Section =
1067           static_cast<const MCSectionMachO &>(*ISD.Section);
1068       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1069         // If this symbol is defined and internal, mark it as such.
1070         if (ISD.Symbol->isDefined() && !ISD.Symbol->isExternal()) {
1071           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1072           if (ISD.Symbol->isAbsolute())
1073             Flags |= MachO::INDIRECT_SYMBOL_ABS;
1074           W.write<uint32_t>(Flags);
1075           continue;
1076         }
1077       }
1078 
1079       W.write<uint32_t>(ISD.Symbol->getIndex());
1080     }
1081 
1082     // FIXME: Check that offsets match computed ones.
1083 
1084     // Write the symbol table entries.
1085     for (auto *SymbolData :
1086          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1087       for (MachSymbolData &Entry : *SymbolData)
1088         writeNlist(Entry, Asm);
1089 
1090     // Write the string table.
1091     StringTable.write(W.OS);
1092   }
1093 
1094   return W.OS.tell() - StartOffset;
1095 }
1096 
1097 std::unique_ptr<MCObjectWriter>
1098 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1099                              raw_pwrite_stream &OS, bool IsLittleEndian) {
1100   return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1101                                              IsLittleEndian);
1102 }
1103