xref: /llvm-project/llvm/lib/MC/MachObjectWriter.cpp (revision a40ca78bb926d8c596036fc93b1c6ca7731c795b)
1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAsmInfoDarwin.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCDirectives.h"
19 #include "llvm/MC/MCExpr.h"
20 #include "llvm/MC/MCFixupKindInfo.h"
21 #include "llvm/MC/MCFragment.h"
22 #include "llvm/MC/MCMachObjectWriter.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCObjectWriter.h"
25 #include "llvm/MC/MCSection.h"
26 #include "llvm/MC/MCSectionMachO.h"
27 #include "llvm/MC/MCSymbol.h"
28 #include "llvm/MC/MCSymbolMachO.h"
29 #include "llvm/MC/MCValue.h"
30 #include "llvm/Support/Alignment.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/LEB128.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <cassert>
39 #include <cstdint>
40 #include <string>
41 #include <utility>
42 #include <vector>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "mc"
47 
48 void MachObjectWriter::reset() {
49   Relocations.clear();
50   IndirectSymBase.clear();
51   StringTable.clear();
52   LocalSymbolData.clear();
53   ExternalSymbolData.clear();
54   UndefinedSymbolData.clear();
55   MCObjectWriter::reset();
56 }
57 
58 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
59   // Undefined symbols are always extern.
60   if (S.isUndefined())
61     return true;
62 
63   // References to weak definitions require external relocation entries; the
64   // definition may not always be the one in the same object file.
65   if (cast<MCSymbolMachO>(S).isWeakDefinition())
66     return true;
67 
68   // Otherwise, we can use an internal relocation.
69   return false;
70 }
71 
72 bool MachObjectWriter::
73 MachSymbolData::operator<(const MachSymbolData &RHS) const {
74   return Symbol->getName() < RHS.Symbol->getName();
75 }
76 
77 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
78   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
79     (MCFixupKind) Kind);
80 
81   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
82 }
83 
84 uint64_t
85 MachObjectWriter::getFragmentAddress(const MCAssembler &Asm,
86                                      const MCFragment *Fragment) const {
87   return getSectionAddress(Fragment->getParent()) +
88          Asm.getFragmentOffset(*Fragment);
89 }
90 
91 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
92                                             const MCAsmLayout &Layout) const {
93   // If this is a variable, then recursively evaluate now.
94   if (S.isVariable()) {
95     if (const MCConstantExpr *C =
96           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
97       return C->getValue();
98 
99     MCValue Target;
100     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
101       report_fatal_error("unable to evaluate offset for variable '" +
102                          S.getName() + "'");
103 
104     // Verify that any used symbols are defined.
105     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
106       report_fatal_error("unable to evaluate offset to undefined symbol '" +
107                          Target.getSymA()->getSymbol().getName() + "'");
108     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
109       report_fatal_error("unable to evaluate offset to undefined symbol '" +
110                          Target.getSymB()->getSymbol().getName() + "'");
111 
112     uint64_t Address = Target.getConstant();
113     if (Target.getSymA())
114       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
115     if (Target.getSymB())
116       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
117     return Address;
118   }
119 
120   return getSectionAddress(S.getFragment()->getParent()) +
121          Layout.getSymbolOffset(S);
122 }
123 
124 uint64_t MachObjectWriter::getPaddingSize(const MCAssembler &Asm,
125                                           const MCSection *Sec) const {
126   uint64_t EndAddr = getSectionAddress(Sec) + Asm.getSectionAddressSize(*Sec);
127   unsigned Next = Sec->getLayoutOrder() + 1;
128   if (Next >= Asm.getLayout()->getSectionOrder().size())
129     return 0;
130 
131   const MCSection &NextSec = *Asm.getLayout()->getSectionOrder()[Next];
132   if (NextSec.isVirtualSection())
133     return 0;
134   return offsetToAlignment(EndAddr, NextSec.getAlign());
135 }
136 
137 static bool isSymbolLinkerVisible(const MCSymbol &Symbol) {
138   // Non-temporary labels should always be visible to the linker.
139   if (!Symbol.isTemporary())
140     return true;
141 
142   if (Symbol.isUsedInReloc())
143     return true;
144 
145   return false;
146 }
147 
148 const MCSymbol *MachObjectWriter::getAtom(const MCSymbol &S) const {
149   // Linker visible symbols define atoms.
150   if (isSymbolLinkerVisible(S))
151     return &S;
152 
153   // Absolute and undefined symbols have no defining atom.
154   if (!S.isInSection())
155     return nullptr;
156 
157   // Non-linker visible symbols in sections which can't be atomized have no
158   // defining atom.
159   if (!MCAsmInfoDarwin::isSectionAtomizableBySymbols(
160           *S.getFragment()->getParent()))
161     return nullptr;
162 
163   // Otherwise, return the atom for the containing fragment.
164   return S.getFragment()->getAtom();
165 }
166 
167 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
168                                    unsigned NumLoadCommands,
169                                    unsigned LoadCommandsSize,
170                                    bool SubsectionsViaSymbols) {
171   uint32_t Flags = 0;
172 
173   if (SubsectionsViaSymbols)
174     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
175 
176   // struct mach_header (28 bytes) or
177   // struct mach_header_64 (32 bytes)
178 
179   uint64_t Start = W.OS.tell();
180   (void) Start;
181 
182   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
183 
184   W.write<uint32_t>(TargetObjectWriter->getCPUType());
185   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
186 
187   W.write<uint32_t>(Type);
188   W.write<uint32_t>(NumLoadCommands);
189   W.write<uint32_t>(LoadCommandsSize);
190   W.write<uint32_t>(Flags);
191   if (is64Bit())
192     W.write<uint32_t>(0); // reserved
193 
194   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
195                                            : sizeof(MachO::mach_header)));
196 }
197 
198 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
199   assert(Size >= Str.size());
200   W.OS << Str;
201   W.OS.write_zeros(Size - Str.size());
202 }
203 
204 /// writeSegmentLoadCommand - Write a segment load command.
205 ///
206 /// \param NumSections The number of sections in this segment.
207 /// \param SectionDataSize The total size of the sections.
208 void MachObjectWriter::writeSegmentLoadCommand(
209     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
210     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
211     uint32_t InitProt) {
212   // struct segment_command (56 bytes) or
213   // struct segment_command_64 (72 bytes)
214 
215   uint64_t Start = W.OS.tell();
216   (void) Start;
217 
218   unsigned SegmentLoadCommandSize =
219     is64Bit() ? sizeof(MachO::segment_command_64):
220     sizeof(MachO::segment_command);
221   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
222   W.write<uint32_t>(SegmentLoadCommandSize +
223           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
224                          sizeof(MachO::section)));
225 
226   writeWithPadding(Name, 16);
227   if (is64Bit()) {
228     W.write<uint64_t>(VMAddr);                 // vmaddr
229     W.write<uint64_t>(VMSize); // vmsize
230     W.write<uint64_t>(SectionDataStartOffset); // file offset
231     W.write<uint64_t>(SectionDataSize); // file size
232   } else {
233     W.write<uint32_t>(VMAddr);                 // vmaddr
234     W.write<uint32_t>(VMSize); // vmsize
235     W.write<uint32_t>(SectionDataStartOffset); // file offset
236     W.write<uint32_t>(SectionDataSize); // file size
237   }
238   // maxprot
239   W.write<uint32_t>(MaxProt);
240   // initprot
241   W.write<uint32_t>(InitProt);
242   W.write<uint32_t>(NumSections);
243   W.write<uint32_t>(0); // flags
244 
245   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
246 }
247 
248 void MachObjectWriter::writeSection(const MCAssembler &Asm,
249                                     const MCSection &Sec, uint64_t VMAddr,
250                                     uint64_t FileOffset, unsigned Flags,
251                                     uint64_t RelocationsStart,
252                                     unsigned NumRelocations) {
253   uint64_t SectionSize = Asm.getSectionAddressSize(Sec);
254   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
255 
256   // The offset is unused for virtual sections.
257   if (Section.isVirtualSection()) {
258     assert(Asm.getSectionFileSize(Sec) == 0 && "Invalid file size!");
259     FileOffset = 0;
260   }
261 
262   // struct section (68 bytes) or
263   // struct section_64 (80 bytes)
264 
265   uint64_t Start = W.OS.tell();
266   (void) Start;
267 
268   writeWithPadding(Section.getName(), 16);
269   writeWithPadding(Section.getSegmentName(), 16);
270   if (is64Bit()) {
271     W.write<uint64_t>(VMAddr);      // address
272     W.write<uint64_t>(SectionSize); // size
273   } else {
274     W.write<uint32_t>(VMAddr);      // address
275     W.write<uint32_t>(SectionSize); // size
276   }
277   W.write<uint32_t>(FileOffset);
278 
279   W.write<uint32_t>(Log2(Section.getAlign()));
280   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
281   W.write<uint32_t>(NumRelocations);
282   W.write<uint32_t>(Flags);
283   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
284   W.write<uint32_t>(Section.getStubSize()); // reserved2
285   if (is64Bit())
286     W.write<uint32_t>(0); // reserved3
287 
288   assert(W.OS.tell() - Start ==
289          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
290 }
291 
292 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
293                                               uint32_t NumSymbols,
294                                               uint32_t StringTableOffset,
295                                               uint32_t StringTableSize) {
296   // struct symtab_command (24 bytes)
297 
298   uint64_t Start = W.OS.tell();
299   (void) Start;
300 
301   W.write<uint32_t>(MachO::LC_SYMTAB);
302   W.write<uint32_t>(sizeof(MachO::symtab_command));
303   W.write<uint32_t>(SymbolOffset);
304   W.write<uint32_t>(NumSymbols);
305   W.write<uint32_t>(StringTableOffset);
306   W.write<uint32_t>(StringTableSize);
307 
308   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
309 }
310 
311 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
312                                                 uint32_t NumLocalSymbols,
313                                                 uint32_t FirstExternalSymbol,
314                                                 uint32_t NumExternalSymbols,
315                                                 uint32_t FirstUndefinedSymbol,
316                                                 uint32_t NumUndefinedSymbols,
317                                                 uint32_t IndirectSymbolOffset,
318                                                 uint32_t NumIndirectSymbols) {
319   // struct dysymtab_command (80 bytes)
320 
321   uint64_t Start = W.OS.tell();
322   (void) Start;
323 
324   W.write<uint32_t>(MachO::LC_DYSYMTAB);
325   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
326   W.write<uint32_t>(FirstLocalSymbol);
327   W.write<uint32_t>(NumLocalSymbols);
328   W.write<uint32_t>(FirstExternalSymbol);
329   W.write<uint32_t>(NumExternalSymbols);
330   W.write<uint32_t>(FirstUndefinedSymbol);
331   W.write<uint32_t>(NumUndefinedSymbols);
332   W.write<uint32_t>(0); // tocoff
333   W.write<uint32_t>(0); // ntoc
334   W.write<uint32_t>(0); // modtaboff
335   W.write<uint32_t>(0); // nmodtab
336   W.write<uint32_t>(0); // extrefsymoff
337   W.write<uint32_t>(0); // nextrefsyms
338   W.write<uint32_t>(IndirectSymbolOffset);
339   W.write<uint32_t>(NumIndirectSymbols);
340   W.write<uint32_t>(0); // extreloff
341   W.write<uint32_t>(0); // nextrel
342   W.write<uint32_t>(0); // locreloff
343   W.write<uint32_t>(0); // nlocrel
344 
345   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
346 }
347 
348 MachObjectWriter::MachSymbolData *
349 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
350   for (auto *SymbolData :
351        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
352     for (MachSymbolData &Entry : *SymbolData)
353       if (Entry.Symbol == &Sym)
354         return &Entry;
355 
356   return nullptr;
357 }
358 
359 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
360   const MCSymbol *S = &Sym;
361   while (S->isVariable()) {
362     const MCExpr *Value = S->getVariableValue();
363     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
364     if (!Ref)
365       return *S;
366     S = &Ref->getSymbol();
367   }
368   return *S;
369 }
370 
371 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
372                                   const MCAsmLayout &Layout) {
373   const MCSymbol *Symbol = MSD.Symbol;
374   const MCSymbol &Data = *Symbol;
375   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
376   uint8_t SectionIndex = MSD.SectionIndex;
377   uint8_t Type = 0;
378   uint64_t Address = 0;
379   bool IsAlias = Symbol != AliasedSymbol;
380 
381   const MCSymbol &OrigSymbol = *Symbol;
382   MachSymbolData *AliaseeInfo;
383   if (IsAlias) {
384     AliaseeInfo = findSymbolData(*AliasedSymbol);
385     if (AliaseeInfo)
386       SectionIndex = AliaseeInfo->SectionIndex;
387     Symbol = AliasedSymbol;
388     // FIXME: Should this update Data as well?
389   }
390 
391   // Set the N_TYPE bits. See <mach-o/nlist.h>.
392   //
393   // FIXME: Are the prebound or indirect fields possible here?
394   if (IsAlias && Symbol->isUndefined())
395     Type = MachO::N_INDR;
396   else if (Symbol->isUndefined())
397     Type = MachO::N_UNDF;
398   else if (Symbol->isAbsolute())
399     Type = MachO::N_ABS;
400   else
401     Type = MachO::N_SECT;
402 
403   // FIXME: Set STAB bits.
404 
405   if (Data.isPrivateExtern())
406     Type |= MachO::N_PEXT;
407 
408   // Set external bit.
409   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
410     Type |= MachO::N_EXT;
411 
412   // Compute the symbol address.
413   if (IsAlias && Symbol->isUndefined())
414     Address = AliaseeInfo->StringIndex;
415   else if (Symbol->isDefined())
416     Address = getSymbolAddress(OrigSymbol, Layout);
417   else if (Symbol->isCommon()) {
418     // Common symbols are encoded with the size in the address
419     // field, and their alignment in the flags.
420     Address = Symbol->getCommonSize();
421   }
422 
423   // struct nlist (12 bytes)
424 
425   W.write<uint32_t>(MSD.StringIndex);
426   W.OS << char(Type);
427   W.OS << char(SectionIndex);
428 
429   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
430   // value.
431   bool EncodeAsAltEntry =
432     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
433   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
434   if (is64Bit())
435     W.write<uint64_t>(Address);
436   else
437     W.write<uint32_t>(Address);
438 }
439 
440 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
441                                                 uint32_t DataOffset,
442                                                 uint32_t DataSize) {
443   uint64_t Start = W.OS.tell();
444   (void) Start;
445 
446   W.write<uint32_t>(Type);
447   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
448   W.write<uint32_t>(DataOffset);
449   W.write<uint32_t>(DataSize);
450 
451   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
452 }
453 
454 static unsigned ComputeLinkerOptionsLoadCommandSize(
455   const std::vector<std::string> &Options, bool is64Bit)
456 {
457   unsigned Size = sizeof(MachO::linker_option_command);
458   for (const std::string &Option : Options)
459     Size += Option.size() + 1;
460   return alignTo(Size, is64Bit ? 8 : 4);
461 }
462 
463 void MachObjectWriter::writeLinkerOptionsLoadCommand(
464   const std::vector<std::string> &Options)
465 {
466   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
467   uint64_t Start = W.OS.tell();
468   (void) Start;
469 
470   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
471   W.write<uint32_t>(Size);
472   W.write<uint32_t>(Options.size());
473   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
474   for (const std::string &Option : Options) {
475     // Write each string, including the null byte.
476     W.OS << Option << '\0';
477     BytesWritten += Option.size() + 1;
478   }
479 
480   // Pad to a multiple of the pointer size.
481   W.OS.write_zeros(
482       offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
483 
484   assert(W.OS.tell() - Start == Size);
485 }
486 
487 static bool isFixupTargetValid(const MCValue &Target) {
488   // Target is (LHS - RHS + cst).
489   // We don't support the form where LHS is null: -RHS + cst
490   if (!Target.getSymA() && Target.getSymB())
491     return false;
492   return true;
493 }
494 
495 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
496                                         const MCFragment *Fragment,
497                                         const MCFixup &Fixup, MCValue Target,
498                                         uint64_t &FixedValue) {
499   if (!isFixupTargetValid(Target)) {
500     Asm.getContext().reportError(Fixup.getLoc(),
501                                  "unsupported relocation expression");
502     return;
503   }
504 
505   TargetObjectWriter->recordRelocation(this, Asm, Fragment, Fixup, Target,
506                                        FixedValue);
507 }
508 
509 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
510   // This is the point where 'as' creates actual symbols for indirect symbols
511   // (in the following two passes). It would be easier for us to do this sooner
512   // when we see the attribute, but that makes getting the order in the symbol
513   // table much more complicated than it is worth.
514   //
515   // FIXME: Revisit this when the dust settles.
516 
517   // Report errors for use of .indirect_symbol not in a symbol pointer section
518   // or stub section.
519   for (IndirectSymbolData &ISD : llvm::make_range(Asm.indirect_symbol_begin(),
520                                                   Asm.indirect_symbol_end())) {
521     const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section);
522 
523     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
524         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
525         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
526         Section.getType() != MachO::S_SYMBOL_STUBS) {
527       MCSymbol &Symbol = *ISD.Symbol;
528       report_fatal_error("indirect symbol '" + Symbol.getName() +
529                          "' not in a symbol pointer or stub section");
530     }
531   }
532 
533   // Bind non-lazy symbol pointers first.
534   unsigned IndirectIndex = 0;
535   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
536          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
537     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
538 
539     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
540         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
541       continue;
542 
543     // Initialize the section indirect symbol base, if necessary.
544     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
545 
546     Asm.registerSymbol(*it->Symbol);
547   }
548 
549   // Then lazy symbol pointers and symbol stubs.
550   IndirectIndex = 0;
551   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
552          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
553     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
554 
555     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
556         Section.getType() != MachO::S_SYMBOL_STUBS)
557       continue;
558 
559     // Initialize the section indirect symbol base, if necessary.
560     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
561 
562     // Set the symbol type to undefined lazy, but only on construction.
563     //
564     // FIXME: Do not hardcode.
565     if (Asm.registerSymbol(*it->Symbol))
566       cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
567   }
568 }
569 
570 /// computeSymbolTable - Compute the symbol table data
571 void MachObjectWriter::computeSymbolTable(
572     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
573     std::vector<MachSymbolData> &ExternalSymbolData,
574     std::vector<MachSymbolData> &UndefinedSymbolData) {
575   // Build section lookup table.
576   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
577   unsigned Index = 1;
578   for (MCAssembler::iterator it = Asm.begin(),
579          ie = Asm.end(); it != ie; ++it, ++Index)
580     SectionIndexMap[&*it] = Index;
581   assert(Index <= 256 && "Too many sections!");
582 
583   // Build the string table.
584   for (const MCSymbol &Symbol : Asm.symbols()) {
585     if (!Asm.isSymbolLinkerVisible(Symbol))
586       continue;
587 
588     StringTable.add(Symbol.getName());
589   }
590   StringTable.finalize();
591 
592   // Build the symbol arrays but only for non-local symbols.
593   //
594   // The particular order that we collect and then sort the symbols is chosen to
595   // match 'as'. Even though it doesn't matter for correctness, this is
596   // important for letting us diff .o files.
597   for (const MCSymbol &Symbol : Asm.symbols()) {
598     // Ignore non-linker visible symbols.
599     if (!Asm.isSymbolLinkerVisible(Symbol))
600       continue;
601 
602     if (!Symbol.isExternal() && !Symbol.isUndefined())
603       continue;
604 
605     MachSymbolData MSD;
606     MSD.Symbol = &Symbol;
607     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
608 
609     if (Symbol.isUndefined()) {
610       MSD.SectionIndex = 0;
611       UndefinedSymbolData.push_back(MSD);
612     } else if (Symbol.isAbsolute()) {
613       MSD.SectionIndex = 0;
614       ExternalSymbolData.push_back(MSD);
615     } else {
616       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
617       assert(MSD.SectionIndex && "Invalid section index!");
618       ExternalSymbolData.push_back(MSD);
619     }
620   }
621 
622   // Now add the data for local symbols.
623   for (const MCSymbol &Symbol : Asm.symbols()) {
624     // Ignore non-linker visible symbols.
625     if (!Asm.isSymbolLinkerVisible(Symbol))
626       continue;
627 
628     if (Symbol.isExternal() || Symbol.isUndefined())
629       continue;
630 
631     MachSymbolData MSD;
632     MSD.Symbol = &Symbol;
633     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
634 
635     if (Symbol.isAbsolute()) {
636       MSD.SectionIndex = 0;
637       LocalSymbolData.push_back(MSD);
638     } else {
639       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
640       assert(MSD.SectionIndex && "Invalid section index!");
641       LocalSymbolData.push_back(MSD);
642     }
643   }
644 
645   // External and undefined symbols are required to be in lexicographic order.
646   llvm::sort(ExternalSymbolData);
647   llvm::sort(UndefinedSymbolData);
648 
649   // Set the symbol indices.
650   Index = 0;
651   for (auto *SymbolData :
652        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
653     for (MachSymbolData &Entry : *SymbolData)
654       Entry.Symbol->setIndex(Index++);
655 
656   for (const MCSection &Section : Asm) {
657     for (RelAndSymbol &Rel : Relocations[&Section]) {
658       if (!Rel.Sym)
659         continue;
660 
661       // Set the Index and the IsExtern bit.
662       unsigned Index = Rel.Sym->getIndex();
663       assert(isInt<24>(Index));
664       if (W.Endian == llvm::endianness::little)
665         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
666       else
667         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
668     }
669   }
670 }
671 
672 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm) {
673   uint64_t StartAddress = 0;
674   for (const MCSection *Sec : Asm.getLayout()->getSectionOrder()) {
675     StartAddress = alignTo(StartAddress, Sec->getAlign());
676     SectionAddress[Sec] = StartAddress;
677     StartAddress += Asm.getSectionAddressSize(*Sec);
678 
679     // Explicitly pad the section to match the alignment requirements of the
680     // following one. This is for 'gas' compatibility, it shouldn't
681     /// strictly be necessary.
682     StartAddress += getPaddingSize(Asm, Sec);
683   }
684 }
685 
686 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm) {
687   computeSectionAddresses(Asm);
688 
689   // Create symbol data for any indirect symbols.
690   bindIndirectSymbols(Asm);
691 }
692 
693 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
694     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
695     bool InSet, bool IsPCRel) const {
696   if (InSet)
697     return true;
698 
699   // The effective address is
700   //     addr(atom(A)) + offset(A)
701   //   - addr(atom(B)) - offset(B)
702   // and the offsets are not relocatable, so the fixup is fully resolved when
703   //  addr(atom(A)) - addr(atom(B)) == 0.
704   const MCSymbol &SA = findAliasedSymbol(SymA);
705   const MCSection &SecA = SA.getSection();
706   const MCSection &SecB = *FB.getParent();
707 
708   if (IsPCRel) {
709     // The simple (Darwin, except on x86_64) way of dealing with this was to
710     // assume that any reference to a temporary symbol *must* be a temporary
711     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
712     // relocation to a temporary symbol (in the same section) is fully
713     // resolved. This also works in conjunction with absolutized .set, which
714     // requires the compiler to use .set to absolutize the differences between
715     // symbols which the compiler knows to be assembly time constants, so we
716     // don't need to worry about considering symbol differences fully resolved.
717     //
718     // If the file isn't using sub-sections-via-symbols, we can make the
719     // same assumptions about any symbol that we normally make about
720     // assembler locals.
721 
722     bool hasReliableSymbolDifference = isX86_64();
723     if (!hasReliableSymbolDifference) {
724       if (!SA.isInSection() || &SecA != &SecB ||
725           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
726            Asm.getSubsectionsViaSymbols()))
727         return false;
728       return true;
729     }
730   }
731 
732   // If they are not in the same section, we can't compute the diff.
733   if (&SecA != &SecB)
734     return false;
735 
736   // If the atoms are the same, they are guaranteed to have the same address.
737   return SA.getFragment()->getAtom() == FB.getAtom();
738 }
739 
740 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
741   switch (Type) {
742   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
743   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
744   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
745   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
746   }
747   llvm_unreachable("Invalid mc version min type");
748 }
749 
750 void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) {
751   MCSection *AddrSigSection =
752       Asm.getContext().getObjectFileInfo()->getAddrSigSection();
753   unsigned Log2Size = is64Bit() ? 3 : 2;
754   for (const MCSymbol *S : getAddrsigSyms()) {
755     if (!S->isRegistered())
756       continue;
757     MachO::any_relocation_info MRE;
758     MRE.r_word0 = 0;
759     MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28);
760     addRelocation(S, AddrSigSection, MRE);
761   }
762 }
763 
764 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm) {
765   auto &Layout = *Asm.getLayout();
766   uint64_t StartOffset = W.OS.tell();
767 
768   populateAddrSigSection(Asm);
769 
770   // Compute symbol table information and bind symbol indices.
771   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
772                      UndefinedSymbolData);
773 
774   if (!Asm.CGProfile.empty()) {
775     MCSection *CGProfileSection = Asm.getContext().getMachOSection(
776         "__LLVM", "__cg_profile", 0, SectionKind::getMetadata());
777     auto &Frag = cast<MCDataFragment>(*CGProfileSection->begin());
778     Frag.getContents().clear();
779     raw_svector_ostream OS(Frag.getContents());
780     for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) {
781       uint32_t FromIndex = CGPE.From->getSymbol().getIndex();
782       uint32_t ToIndex = CGPE.To->getSymbol().getIndex();
783       support::endian::write(OS, FromIndex, W.Endian);
784       support::endian::write(OS, ToIndex, W.Endian);
785       support::endian::write(OS, CGPE.Count, W.Endian);
786     }
787   }
788 
789   unsigned NumSections = Asm.size();
790   const MCAssembler::VersionInfoType &VersionInfo = Asm.getVersionInfo();
791 
792   // The section data starts after the header, the segment load command (and
793   // section headers) and the symbol table.
794   unsigned NumLoadCommands = 1;
795   uint64_t LoadCommandsSize = is64Bit() ?
796     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
797     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
798 
799   // Add the deployment target version info load command size, if used.
800   if (VersionInfo.Major != 0) {
801     ++NumLoadCommands;
802     if (VersionInfo.EmitBuildVersion)
803       LoadCommandsSize += sizeof(MachO::build_version_command);
804     else
805       LoadCommandsSize += sizeof(MachO::version_min_command);
806   }
807 
808   const MCAssembler::VersionInfoType &TargetVariantVersionInfo =
809       Asm.getDarwinTargetVariantVersionInfo();
810 
811   // Add the target variant version info load command size, if used.
812   if (TargetVariantVersionInfo.Major != 0) {
813     ++NumLoadCommands;
814     assert(TargetVariantVersionInfo.EmitBuildVersion &&
815            "target variant should use build version");
816     LoadCommandsSize += sizeof(MachO::build_version_command);
817   }
818 
819   // Add the data-in-code load command size, if used.
820   unsigned NumDataRegions = Asm.getDataRegions().size();
821   if (NumDataRegions) {
822     ++NumLoadCommands;
823     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
824   }
825 
826   // Add the loh load command size, if used.
827   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
828   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
829   if (LOHSize) {
830     ++NumLoadCommands;
831     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
832   }
833 
834   // Add the symbol table load command sizes, if used.
835   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
836     UndefinedSymbolData.size();
837   if (NumSymbols) {
838     NumLoadCommands += 2;
839     LoadCommandsSize += (sizeof(MachO::symtab_command) +
840                          sizeof(MachO::dysymtab_command));
841   }
842 
843   // Add the linker option load commands sizes.
844   for (const auto &Option : Asm.getLinkerOptions()) {
845     ++NumLoadCommands;
846     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
847   }
848 
849   // Compute the total size of the section data, as well as its file size and vm
850   // size.
851   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
852                                sizeof(MachO::mach_header)) + LoadCommandsSize;
853   uint64_t SectionDataSize = 0;
854   uint64_t SectionDataFileSize = 0;
855   uint64_t VMSize = 0;
856   for (const MCSection &Sec : Asm) {
857     uint64_t Address = getSectionAddress(&Sec);
858     uint64_t Size = Asm.getSectionAddressSize(Sec);
859     uint64_t FileSize = Asm.getSectionFileSize(Sec);
860     FileSize += getPaddingSize(Asm, &Sec);
861 
862     VMSize = std::max(VMSize, Address + Size);
863 
864     if (Sec.isVirtualSection())
865       continue;
866 
867     SectionDataSize = std::max(SectionDataSize, Address + Size);
868     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
869   }
870 
871   // The section data is padded to pointer size bytes.
872   //
873   // FIXME: Is this machine dependent?
874   unsigned SectionDataPadding =
875       offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4));
876   SectionDataFileSize += SectionDataPadding;
877 
878   // Write the prolog, starting with the header and load command...
879   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
880               Asm.getSubsectionsViaSymbols());
881   uint32_t Prot =
882       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
883   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
884                           SectionDataSize, Prot, Prot);
885 
886   // ... and then the section headers.
887   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
888   for (const MCSection &Section : Asm) {
889     const auto &Sec = cast<MCSectionMachO>(Section);
890     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
891     unsigned NumRelocs = Relocs.size();
892     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
893     unsigned Flags = Sec.getTypeAndAttributes();
894     if (Sec.hasInstructions())
895       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
896     writeSection(Asm, Sec, getSectionAddress(&Sec), SectionStart, Flags,
897                  RelocTableEnd, NumRelocs);
898     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
899   }
900 
901   // Write out the deployment target information, if it's available.
902   auto EmitDeploymentTargetVersion =
903       [&](const MCAssembler::VersionInfoType &VersionInfo) {
904         auto EncodeVersion = [](VersionTuple V) -> uint32_t {
905           assert(!V.empty() && "empty version");
906           unsigned Update = V.getSubminor().value_or(0);
907           unsigned Minor = V.getMinor().value_or(0);
908           assert(Update < 256 && "unencodable update target version");
909           assert(Minor < 256 && "unencodable minor target version");
910           assert(V.getMajor() < 65536 && "unencodable major target version");
911           return Update | (Minor << 8) | (V.getMajor() << 16);
912         };
913         uint32_t EncodedVersion = EncodeVersion(VersionTuple(
914             VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
915         uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
916                                   ? EncodeVersion(VersionInfo.SDKVersion)
917                                   : 0;
918         if (VersionInfo.EmitBuildVersion) {
919           // FIXME: Currently empty tools. Add clang version in the future.
920           W.write<uint32_t>(MachO::LC_BUILD_VERSION);
921           W.write<uint32_t>(sizeof(MachO::build_version_command));
922           W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
923           W.write<uint32_t>(EncodedVersion);
924           W.write<uint32_t>(SDKVersion);
925           W.write<uint32_t>(0); // Empty tools list.
926         } else {
927           MachO::LoadCommandType LCType =
928               getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
929           W.write<uint32_t>(LCType);
930           W.write<uint32_t>(sizeof(MachO::version_min_command));
931           W.write<uint32_t>(EncodedVersion);
932           W.write<uint32_t>(SDKVersion);
933         }
934       };
935   if (VersionInfo.Major != 0)
936     EmitDeploymentTargetVersion(VersionInfo);
937   if (TargetVariantVersionInfo.Major != 0)
938     EmitDeploymentTargetVersion(TargetVariantVersionInfo);
939 
940   // Write the data-in-code load command, if used.
941   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
942   if (NumDataRegions) {
943     uint64_t DataRegionsOffset = RelocTableEnd;
944     uint64_t DataRegionsSize = NumDataRegions * 8;
945     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
946                              DataRegionsSize);
947   }
948 
949   // Write the loh load command, if used.
950   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
951   if (LOHSize)
952     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
953                              DataInCodeTableEnd, LOHSize);
954 
955   // Write the symbol table load command, if used.
956   if (NumSymbols) {
957     unsigned FirstLocalSymbol = 0;
958     unsigned NumLocalSymbols = LocalSymbolData.size();
959     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
960     unsigned NumExternalSymbols = ExternalSymbolData.size();
961     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
962     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
963     unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
964     unsigned NumSymTabSymbols =
965       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
966     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
967     uint64_t IndirectSymbolOffset = 0;
968 
969     // If used, the indirect symbols are written after the section data.
970     if (NumIndirectSymbols)
971       IndirectSymbolOffset = LOHTableEnd;
972 
973     // The symbol table is written after the indirect symbol data.
974     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
975 
976     // The string table is written after symbol table.
977     uint64_t StringTableOffset =
978       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
979                                               sizeof(MachO::nlist_64) :
980                                               sizeof(MachO::nlist));
981     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
982                            StringTableOffset, StringTable.getSize());
983 
984     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
985                              FirstExternalSymbol, NumExternalSymbols,
986                              FirstUndefinedSymbol, NumUndefinedSymbols,
987                              IndirectSymbolOffset, NumIndirectSymbols);
988   }
989 
990   // Write the linker options load commands.
991   for (const auto &Option : Asm.getLinkerOptions())
992     writeLinkerOptionsLoadCommand(Option);
993 
994   // Write the actual section data.
995   for (const MCSection &Sec : Asm) {
996     Asm.writeSectionData(W.OS, &Sec);
997 
998     uint64_t Pad = getPaddingSize(Asm, &Sec);
999     W.OS.write_zeros(Pad);
1000   }
1001 
1002   // Write the extra padding.
1003   W.OS.write_zeros(SectionDataPadding);
1004 
1005   // Write the relocation entries.
1006   for (const MCSection &Sec : Asm) {
1007     // Write the section relocation entries, in reverse order to match 'as'
1008     // (approximately, the exact algorithm is more complicated than this).
1009     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
1010     for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) {
1011       W.write<uint32_t>(Rel.MRE.r_word0);
1012       W.write<uint32_t>(Rel.MRE.r_word1);
1013     }
1014   }
1015 
1016   // Write out the data-in-code region payload, if there is one.
1017   for (MCAssembler::const_data_region_iterator
1018          it = Asm.data_region_begin(), ie = Asm.data_region_end();
1019          it != ie; ++it) {
1020     const DataRegionData *Data = &(*it);
1021     uint64_t Start = getSymbolAddress(*Data->Start, Layout);
1022     uint64_t End;
1023     if (Data->End)
1024       End = getSymbolAddress(*Data->End, Layout);
1025     else
1026       report_fatal_error("Data region not terminated");
1027 
1028     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
1029                       << "  start: " << Start << "(" << Data->Start->getName()
1030                       << ")"
1031                       << "  end: " << End << "(" << Data->End->getName() << ")"
1032                       << "  size: " << End - Start << "\n");
1033     W.write<uint32_t>(Start);
1034     W.write<uint16_t>(End - Start);
1035     W.write<uint16_t>(Data->Kind);
1036   }
1037 
1038   // Write out the loh commands, if there is one.
1039   if (LOHSize) {
1040 #ifndef NDEBUG
1041     unsigned Start = W.OS.tell();
1042 #endif
1043     Asm.getLOHContainer().emit(*this, Layout);
1044     // Pad to a multiple of the pointer size.
1045     W.OS.write_zeros(
1046         offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
1047     assert(W.OS.tell() - Start == LOHSize);
1048   }
1049 
1050   // Write the symbol table data, if used.
1051   if (NumSymbols) {
1052     // Write the indirect symbol entries.
1053     for (MCAssembler::const_indirect_symbol_iterator
1054            it = Asm.indirect_symbol_begin(),
1055            ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1056       // Indirect symbols in the non-lazy symbol pointer section have some
1057       // special handling.
1058       const MCSectionMachO &Section =
1059           static_cast<const MCSectionMachO &>(*it->Section);
1060       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1061         // If this symbol is defined and internal, mark it as such.
1062         if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1063           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1064           if (it->Symbol->isAbsolute())
1065             Flags |= MachO::INDIRECT_SYMBOL_ABS;
1066           W.write<uint32_t>(Flags);
1067           continue;
1068         }
1069       }
1070 
1071       W.write<uint32_t>(it->Symbol->getIndex());
1072     }
1073 
1074     // FIXME: Check that offsets match computed ones.
1075 
1076     // Write the symbol table entries.
1077     for (auto *SymbolData :
1078          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1079       for (MachSymbolData &Entry : *SymbolData)
1080         writeNlist(Entry, Layout);
1081 
1082     // Write the string table.
1083     StringTable.write(W.OS);
1084   }
1085 
1086   return W.OS.tell() - StartOffset;
1087 }
1088 
1089 std::unique_ptr<MCObjectWriter>
1090 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1091                              raw_pwrite_stream &OS, bool IsLittleEndian) {
1092   return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1093                                              IsLittleEndian);
1094 }
1095