1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/ADT/DenseMap.h" 10 #include "llvm/ADT/Twine.h" 11 #include "llvm/BinaryFormat/MachO.h" 12 #include "llvm/MC/MCAsmBackend.h" 13 #include "llvm/MC/MCAsmInfoDarwin.h" 14 #include "llvm/MC/MCAssembler.h" 15 #include "llvm/MC/MCContext.h" 16 #include "llvm/MC/MCDirectives.h" 17 #include "llvm/MC/MCExpr.h" 18 #include "llvm/MC/MCFixupKindInfo.h" 19 #include "llvm/MC/MCFragment.h" 20 #include "llvm/MC/MCMachObjectWriter.h" 21 #include "llvm/MC/MCObjectFileInfo.h" 22 #include "llvm/MC/MCObjectWriter.h" 23 #include "llvm/MC/MCSection.h" 24 #include "llvm/MC/MCSectionMachO.h" 25 #include "llvm/MC/MCSymbol.h" 26 #include "llvm/MC/MCSymbolMachO.h" 27 #include "llvm/MC/MCValue.h" 28 #include "llvm/Support/Alignment.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstdint> 37 #include <string> 38 #include <utility> 39 #include <vector> 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "mc" 44 45 void MachObjectWriter::reset() { 46 Relocations.clear(); 47 IndirectSymBase.clear(); 48 IndirectSymbols.clear(); 49 DataRegions.clear(); 50 SectionAddress.clear(); 51 SectionOrder.clear(); 52 StringTable.clear(); 53 LocalSymbolData.clear(); 54 ExternalSymbolData.clear(); 55 UndefinedSymbolData.clear(); 56 LOHContainer.reset(); 57 VersionInfo.Major = 0; 58 VersionInfo.SDKVersion = VersionTuple(); 59 TargetVariantVersionInfo.Major = 0; 60 TargetVariantVersionInfo.SDKVersion = VersionTuple(); 61 LinkerOptions.clear(); 62 MCObjectWriter::reset(); 63 } 64 65 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) { 66 // Undefined symbols are always extern. 67 if (S.isUndefined()) 68 return true; 69 70 // References to weak definitions require external relocation entries; the 71 // definition may not always be the one in the same object file. 72 if (cast<MCSymbolMachO>(S).isWeakDefinition()) 73 return true; 74 75 // Otherwise, we can use an internal relocation. 76 return false; 77 } 78 79 bool MachObjectWriter:: 80 MachSymbolData::operator<(const MachSymbolData &RHS) const { 81 return Symbol->getName() < RHS.Symbol->getName(); 82 } 83 84 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { 85 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo( 86 (MCFixupKind) Kind); 87 88 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; 89 } 90 91 uint64_t 92 MachObjectWriter::getFragmentAddress(const MCAssembler &Asm, 93 const MCFragment *Fragment) const { 94 return getSectionAddress(Fragment->getParent()) + 95 Asm.getFragmentOffset(*Fragment); 96 } 97 98 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S, 99 const MCAssembler &Asm) const { 100 // If this is a variable, then recursively evaluate now. 101 if (S.isVariable()) { 102 if (const MCConstantExpr *C = 103 dyn_cast<const MCConstantExpr>(S.getVariableValue())) 104 return C->getValue(); 105 106 MCValue Target; 107 if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Asm, nullptr)) 108 report_fatal_error("unable to evaluate offset for variable '" + 109 S.getName() + "'"); 110 111 // Verify that any used symbols are defined. 112 if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) 113 report_fatal_error("unable to evaluate offset to undefined symbol '" + 114 Target.getSymA()->getSymbol().getName() + "'"); 115 if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) 116 report_fatal_error("unable to evaluate offset to undefined symbol '" + 117 Target.getSymB()->getSymbol().getName() + "'"); 118 119 uint64_t Address = Target.getConstant(); 120 if (Target.getSymA()) 121 Address += getSymbolAddress(Target.getSymA()->getSymbol(), Asm); 122 if (Target.getSymB()) 123 Address += getSymbolAddress(Target.getSymB()->getSymbol(), Asm); 124 return Address; 125 } 126 127 return getSectionAddress(S.getFragment()->getParent()) + 128 Asm.getSymbolOffset(S); 129 } 130 131 uint64_t MachObjectWriter::getPaddingSize(const MCAssembler &Asm, 132 const MCSection *Sec) const { 133 uint64_t EndAddr = getSectionAddress(Sec) + Asm.getSectionAddressSize(*Sec); 134 unsigned Next = cast<MCSectionMachO>(Sec)->getLayoutOrder() + 1; 135 if (Next >= SectionOrder.size()) 136 return 0; 137 138 const MCSection &NextSec = *SectionOrder[Next]; 139 if (NextSec.isVirtualSection()) 140 return 0; 141 return offsetToAlignment(EndAddr, NextSec.getAlign()); 142 } 143 144 static bool isSymbolLinkerVisible(const MCSymbol &Symbol) { 145 // Non-temporary labels should always be visible to the linker. 146 if (!Symbol.isTemporary()) 147 return true; 148 149 if (Symbol.isUsedInReloc()) 150 return true; 151 152 return false; 153 } 154 155 const MCSymbol *MachObjectWriter::getAtom(const MCSymbol &S) const { 156 // Linker visible symbols define atoms. 157 if (isSymbolLinkerVisible(S)) 158 return &S; 159 160 // Absolute and undefined symbols have no defining atom. 161 if (!S.isInSection()) 162 return nullptr; 163 164 // Non-linker visible symbols in sections which can't be atomized have no 165 // defining atom. 166 if (!MCAsmInfoDarwin::isSectionAtomizableBySymbols( 167 *S.getFragment()->getParent())) 168 return nullptr; 169 170 // Otherwise, return the atom for the containing fragment. 171 return S.getFragment()->getAtom(); 172 } 173 174 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type, 175 unsigned NumLoadCommands, 176 unsigned LoadCommandsSize, 177 bool SubsectionsViaSymbols) { 178 uint32_t Flags = 0; 179 180 if (SubsectionsViaSymbols) 181 Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; 182 183 // struct mach_header (28 bytes) or 184 // struct mach_header_64 (32 bytes) 185 186 uint64_t Start = W.OS.tell(); 187 (void) Start; 188 189 W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); 190 191 W.write<uint32_t>(TargetObjectWriter->getCPUType()); 192 193 uint32_t Cpusubtype = TargetObjectWriter->getCPUSubtype(); 194 195 // Promote arm64e subtypes to always be ptrauth-ABI-versioned, at version 0. 196 // We never need to emit unversioned binaries. 197 // And we don't support arbitrary ABI versions (or the kernel flag) yet. 198 if (TargetObjectWriter->getCPUType() == MachO::CPU_TYPE_ARM64 && 199 Cpusubtype == MachO::CPU_SUBTYPE_ARM64E) 200 Cpusubtype = MachO::CPU_SUBTYPE_ARM64E_WITH_PTRAUTH_VERSION( 201 /*PtrAuthABIVersion=*/0, /*PtrAuthKernelABIVersion=*/false); 202 203 W.write<uint32_t>(Cpusubtype); 204 205 W.write<uint32_t>(Type); 206 W.write<uint32_t>(NumLoadCommands); 207 W.write<uint32_t>(LoadCommandsSize); 208 W.write<uint32_t>(Flags); 209 if (is64Bit()) 210 W.write<uint32_t>(0); // reserved 211 212 assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64) 213 : sizeof(MachO::mach_header))); 214 } 215 216 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) { 217 assert(Size >= Str.size()); 218 W.OS << Str; 219 W.OS.write_zeros(Size - Str.size()); 220 } 221 222 /// writeSegmentLoadCommand - Write a segment load command. 223 /// 224 /// \param NumSections The number of sections in this segment. 225 /// \param SectionDataSize The total size of the sections. 226 void MachObjectWriter::writeSegmentLoadCommand( 227 StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize, 228 uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt, 229 uint32_t InitProt) { 230 // struct segment_command (56 bytes) or 231 // struct segment_command_64 (72 bytes) 232 233 uint64_t Start = W.OS.tell(); 234 (void) Start; 235 236 unsigned SegmentLoadCommandSize = 237 is64Bit() ? sizeof(MachO::segment_command_64): 238 sizeof(MachO::segment_command); 239 W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); 240 W.write<uint32_t>(SegmentLoadCommandSize + 241 NumSections * (is64Bit() ? sizeof(MachO::section_64) : 242 sizeof(MachO::section))); 243 244 writeWithPadding(Name, 16); 245 if (is64Bit()) { 246 W.write<uint64_t>(VMAddr); // vmaddr 247 W.write<uint64_t>(VMSize); // vmsize 248 W.write<uint64_t>(SectionDataStartOffset); // file offset 249 W.write<uint64_t>(SectionDataSize); // file size 250 } else { 251 W.write<uint32_t>(VMAddr); // vmaddr 252 W.write<uint32_t>(VMSize); // vmsize 253 W.write<uint32_t>(SectionDataStartOffset); // file offset 254 W.write<uint32_t>(SectionDataSize); // file size 255 } 256 // maxprot 257 W.write<uint32_t>(MaxProt); 258 // initprot 259 W.write<uint32_t>(InitProt); 260 W.write<uint32_t>(NumSections); 261 W.write<uint32_t>(0); // flags 262 263 assert(W.OS.tell() - Start == SegmentLoadCommandSize); 264 } 265 266 void MachObjectWriter::writeSection(const MCAssembler &Asm, 267 const MCSection &Sec, uint64_t VMAddr, 268 uint64_t FileOffset, unsigned Flags, 269 uint64_t RelocationsStart, 270 unsigned NumRelocations) { 271 uint64_t SectionSize = Asm.getSectionAddressSize(Sec); 272 const MCSectionMachO &Section = cast<MCSectionMachO>(Sec); 273 274 // The offset is unused for virtual sections. 275 if (Section.isVirtualSection()) { 276 assert(Asm.getSectionFileSize(Sec) == 0 && "Invalid file size!"); 277 FileOffset = 0; 278 } 279 280 // struct section (68 bytes) or 281 // struct section_64 (80 bytes) 282 283 uint64_t Start = W.OS.tell(); 284 (void) Start; 285 286 writeWithPadding(Section.getName(), 16); 287 writeWithPadding(Section.getSegmentName(), 16); 288 if (is64Bit()) { 289 W.write<uint64_t>(VMAddr); // address 290 W.write<uint64_t>(SectionSize); // size 291 } else { 292 W.write<uint32_t>(VMAddr); // address 293 W.write<uint32_t>(SectionSize); // size 294 } 295 assert(isUInt<32>(FileOffset) && "Cannot encode offset of section"); 296 W.write<uint32_t>(FileOffset); 297 298 W.write<uint32_t>(Log2(Section.getAlign())); 299 assert((!NumRelocations || isUInt<32>(RelocationsStart)) && 300 "Cannot encode offset of relocations"); 301 W.write<uint32_t>(NumRelocations ? RelocationsStart : 0); 302 W.write<uint32_t>(NumRelocations); 303 W.write<uint32_t>(Flags); 304 W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1 305 W.write<uint32_t>(Section.getStubSize()); // reserved2 306 if (is64Bit()) 307 W.write<uint32_t>(0); // reserved3 308 309 assert(W.OS.tell() - Start == 310 (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section))); 311 } 312 313 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset, 314 uint32_t NumSymbols, 315 uint32_t StringTableOffset, 316 uint32_t StringTableSize) { 317 // struct symtab_command (24 bytes) 318 319 uint64_t Start = W.OS.tell(); 320 (void) Start; 321 322 W.write<uint32_t>(MachO::LC_SYMTAB); 323 W.write<uint32_t>(sizeof(MachO::symtab_command)); 324 W.write<uint32_t>(SymbolOffset); 325 W.write<uint32_t>(NumSymbols); 326 W.write<uint32_t>(StringTableOffset); 327 W.write<uint32_t>(StringTableSize); 328 329 assert(W.OS.tell() - Start == sizeof(MachO::symtab_command)); 330 } 331 332 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol, 333 uint32_t NumLocalSymbols, 334 uint32_t FirstExternalSymbol, 335 uint32_t NumExternalSymbols, 336 uint32_t FirstUndefinedSymbol, 337 uint32_t NumUndefinedSymbols, 338 uint32_t IndirectSymbolOffset, 339 uint32_t NumIndirectSymbols) { 340 // struct dysymtab_command (80 bytes) 341 342 uint64_t Start = W.OS.tell(); 343 (void) Start; 344 345 W.write<uint32_t>(MachO::LC_DYSYMTAB); 346 W.write<uint32_t>(sizeof(MachO::dysymtab_command)); 347 W.write<uint32_t>(FirstLocalSymbol); 348 W.write<uint32_t>(NumLocalSymbols); 349 W.write<uint32_t>(FirstExternalSymbol); 350 W.write<uint32_t>(NumExternalSymbols); 351 W.write<uint32_t>(FirstUndefinedSymbol); 352 W.write<uint32_t>(NumUndefinedSymbols); 353 W.write<uint32_t>(0); // tocoff 354 W.write<uint32_t>(0); // ntoc 355 W.write<uint32_t>(0); // modtaboff 356 W.write<uint32_t>(0); // nmodtab 357 W.write<uint32_t>(0); // extrefsymoff 358 W.write<uint32_t>(0); // nextrefsyms 359 W.write<uint32_t>(IndirectSymbolOffset); 360 W.write<uint32_t>(NumIndirectSymbols); 361 W.write<uint32_t>(0); // extreloff 362 W.write<uint32_t>(0); // nextrel 363 W.write<uint32_t>(0); // locreloff 364 W.write<uint32_t>(0); // nlocrel 365 366 assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command)); 367 } 368 369 MachObjectWriter::MachSymbolData * 370 MachObjectWriter::findSymbolData(const MCSymbol &Sym) { 371 for (auto *SymbolData : 372 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 373 for (MachSymbolData &Entry : *SymbolData) 374 if (Entry.Symbol == &Sym) 375 return &Entry; 376 377 return nullptr; 378 } 379 380 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const { 381 const MCSymbol *S = &Sym; 382 while (S->isVariable()) { 383 const MCExpr *Value = S->getVariableValue(); 384 const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value); 385 if (!Ref) 386 return *S; 387 S = &Ref->getSymbol(); 388 } 389 return *S; 390 } 391 392 void MachObjectWriter::writeNlist(MachSymbolData &MSD, const MCAssembler &Asm) { 393 const MCSymbol *Symbol = MSD.Symbol; 394 const auto &Data = cast<MCSymbolMachO>(*Symbol); 395 const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol); 396 uint8_t SectionIndex = MSD.SectionIndex; 397 uint8_t Type = 0; 398 uint64_t Address = 0; 399 bool IsAlias = Symbol != AliasedSymbol; 400 401 const MCSymbol &OrigSymbol = *Symbol; 402 MachSymbolData *AliaseeInfo; 403 if (IsAlias) { 404 AliaseeInfo = findSymbolData(*AliasedSymbol); 405 if (AliaseeInfo) 406 SectionIndex = AliaseeInfo->SectionIndex; 407 Symbol = AliasedSymbol; 408 // FIXME: Should this update Data as well? 409 } 410 411 // Set the N_TYPE bits. See <mach-o/nlist.h>. 412 // 413 // FIXME: Are the prebound or indirect fields possible here? 414 if (IsAlias && Symbol->isUndefined()) 415 Type = MachO::N_INDR; 416 else if (Symbol->isUndefined()) 417 Type = MachO::N_UNDF; 418 else if (Symbol->isAbsolute()) 419 Type = MachO::N_ABS; 420 else 421 Type = MachO::N_SECT; 422 423 // FIXME: Set STAB bits. 424 425 if (Data.isPrivateExtern()) 426 Type |= MachO::N_PEXT; 427 428 // Set external bit. 429 if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) 430 Type |= MachO::N_EXT; 431 432 // Compute the symbol address. 433 if (IsAlias && Symbol->isUndefined()) 434 Address = AliaseeInfo->StringIndex; 435 else if (Symbol->isDefined()) 436 Address = getSymbolAddress(OrigSymbol, Asm); 437 else if (Symbol->isCommon()) { 438 // Common symbols are encoded with the size in the address 439 // field, and their alignment in the flags. 440 Address = Symbol->getCommonSize(); 441 } 442 443 // struct nlist (12 bytes) 444 445 W.write<uint32_t>(MSD.StringIndex); 446 W.OS << char(Type); 447 W.OS << char(SectionIndex); 448 449 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' 450 // value. 451 bool EncodeAsAltEntry = 452 IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry(); 453 W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry)); 454 if (is64Bit()) 455 W.write<uint64_t>(Address); 456 else 457 W.write<uint32_t>(Address); 458 } 459 460 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type, 461 uint32_t DataOffset, 462 uint32_t DataSize) { 463 uint64_t Start = W.OS.tell(); 464 (void) Start; 465 466 W.write<uint32_t>(Type); 467 W.write<uint32_t>(sizeof(MachO::linkedit_data_command)); 468 W.write<uint32_t>(DataOffset); 469 W.write<uint32_t>(DataSize); 470 471 assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command)); 472 } 473 474 static unsigned ComputeLinkerOptionsLoadCommandSize( 475 const std::vector<std::string> &Options, bool is64Bit) 476 { 477 unsigned Size = sizeof(MachO::linker_option_command); 478 for (const std::string &Option : Options) 479 Size += Option.size() + 1; 480 return alignTo(Size, is64Bit ? 8 : 4); 481 } 482 483 void MachObjectWriter::writeLinkerOptionsLoadCommand( 484 const std::vector<std::string> &Options) 485 { 486 unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit()); 487 uint64_t Start = W.OS.tell(); 488 (void) Start; 489 490 W.write<uint32_t>(MachO::LC_LINKER_OPTION); 491 W.write<uint32_t>(Size); 492 W.write<uint32_t>(Options.size()); 493 uint64_t BytesWritten = sizeof(MachO::linker_option_command); 494 for (const std::string &Option : Options) { 495 // Write each string, including the null byte. 496 W.OS << Option << '\0'; 497 BytesWritten += Option.size() + 1; 498 } 499 500 // Pad to a multiple of the pointer size. 501 W.OS.write_zeros( 502 offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4))); 503 504 assert(W.OS.tell() - Start == Size); 505 } 506 507 static bool isFixupTargetValid(const MCValue &Target) { 508 // Target is (LHS - RHS + cst). 509 // We don't support the form where LHS is null: -RHS + cst 510 if (!Target.getSymA() && Target.getSymB()) 511 return false; 512 return true; 513 } 514 515 void MachObjectWriter::recordRelocation(MCAssembler &Asm, 516 const MCFragment *Fragment, 517 const MCFixup &Fixup, MCValue Target, 518 uint64_t &FixedValue) { 519 if (!isFixupTargetValid(Target)) { 520 Asm.getContext().reportError(Fixup.getLoc(), 521 "unsupported relocation expression"); 522 return; 523 } 524 525 TargetObjectWriter->recordRelocation(this, Asm, Fragment, Fixup, Target, 526 FixedValue); 527 } 528 529 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { 530 // This is the point where 'as' creates actual symbols for indirect symbols 531 // (in the following two passes). It would be easier for us to do this sooner 532 // when we see the attribute, but that makes getting the order in the symbol 533 // table much more complicated than it is worth. 534 // 535 // FIXME: Revisit this when the dust settles. 536 537 // Report errors for use of .indirect_symbol not in a symbol pointer section 538 // or stub section. 539 for (IndirectSymbolData &ISD : IndirectSymbols) { 540 const MCSectionMachO &Section = cast<MCSectionMachO>(*ISD.Section); 541 542 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 543 Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 544 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS && 545 Section.getType() != MachO::S_SYMBOL_STUBS) { 546 MCSymbol &Symbol = *ISD.Symbol; 547 report_fatal_error("indirect symbol '" + Symbol.getName() + 548 "' not in a symbol pointer or stub section"); 549 } 550 } 551 552 // Bind non-lazy symbol pointers first. 553 for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) { 554 const auto &Section = cast<MCSectionMachO>(*ISD.Section); 555 556 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS && 557 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS) 558 continue; 559 560 // Initialize the section indirect symbol base, if necessary. 561 IndirectSymBase.insert(std::make_pair(ISD.Section, IndirectIndex)); 562 563 Asm.registerSymbol(*ISD.Symbol); 564 } 565 566 // Then lazy symbol pointers and symbol stubs. 567 for (auto [IndirectIndex, ISD] : enumerate(IndirectSymbols)) { 568 const auto &Section = cast<MCSectionMachO>(*ISD.Section); 569 570 if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS && 571 Section.getType() != MachO::S_SYMBOL_STUBS) 572 continue; 573 574 // Initialize the section indirect symbol base, if necessary. 575 IndirectSymBase.insert(std::make_pair(ISD.Section, IndirectIndex)); 576 577 // Set the symbol type to undefined lazy, but only on construction. 578 // 579 // FIXME: Do not hardcode. 580 if (Asm.registerSymbol(*ISD.Symbol)) 581 cast<MCSymbolMachO>(ISD.Symbol)->setReferenceTypeUndefinedLazy(true); 582 } 583 } 584 585 /// computeSymbolTable - Compute the symbol table data 586 void MachObjectWriter::computeSymbolTable( 587 MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData, 588 std::vector<MachSymbolData> &ExternalSymbolData, 589 std::vector<MachSymbolData> &UndefinedSymbolData) { 590 // Build section lookup table. 591 DenseMap<const MCSection*, uint8_t> SectionIndexMap; 592 unsigned Index = 1; 593 for (MCSection &Sec : Asm) 594 SectionIndexMap[&Sec] = Index++; 595 assert(Index <= 256 && "Too many sections!"); 596 597 // Build the string table. 598 for (const MCSymbol &Symbol : Asm.symbols()) { 599 if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible()) 600 continue; 601 602 StringTable.add(Symbol.getName()); 603 } 604 StringTable.finalize(); 605 606 // Build the symbol arrays but only for non-local symbols. 607 // 608 // The particular order that we collect and then sort the symbols is chosen to 609 // match 'as'. Even though it doesn't matter for correctness, this is 610 // important for letting us diff .o files. 611 for (const MCSymbol &Symbol : Asm.symbols()) { 612 // Ignore non-linker visible symbols. 613 if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible()) 614 continue; 615 616 if (!Symbol.isExternal() && !Symbol.isUndefined()) 617 continue; 618 619 MachSymbolData MSD; 620 MSD.Symbol = &Symbol; 621 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 622 623 if (Symbol.isUndefined()) { 624 MSD.SectionIndex = 0; 625 UndefinedSymbolData.push_back(MSD); 626 } else if (Symbol.isAbsolute()) { 627 MSD.SectionIndex = 0; 628 ExternalSymbolData.push_back(MSD); 629 } else { 630 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 631 assert(MSD.SectionIndex && "Invalid section index!"); 632 ExternalSymbolData.push_back(MSD); 633 } 634 } 635 636 // Now add the data for local symbols. 637 for (const MCSymbol &Symbol : Asm.symbols()) { 638 // Ignore non-linker visible symbols. 639 if (!cast<MCSymbolMachO>(Symbol).isSymbolLinkerVisible()) 640 continue; 641 642 if (Symbol.isExternal() || Symbol.isUndefined()) 643 continue; 644 645 MachSymbolData MSD; 646 MSD.Symbol = &Symbol; 647 MSD.StringIndex = StringTable.getOffset(Symbol.getName()); 648 649 if (Symbol.isAbsolute()) { 650 MSD.SectionIndex = 0; 651 LocalSymbolData.push_back(MSD); 652 } else { 653 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); 654 assert(MSD.SectionIndex && "Invalid section index!"); 655 LocalSymbolData.push_back(MSD); 656 } 657 } 658 659 // External and undefined symbols are required to be in lexicographic order. 660 llvm::sort(ExternalSymbolData); 661 llvm::sort(UndefinedSymbolData); 662 663 // Set the symbol indices. 664 Index = 0; 665 for (auto *SymbolData : 666 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 667 for (MachSymbolData &Entry : *SymbolData) 668 Entry.Symbol->setIndex(Index++); 669 670 for (const MCSection &Section : Asm) { 671 for (RelAndSymbol &Rel : Relocations[&Section]) { 672 if (!Rel.Sym) 673 continue; 674 675 // Set the Index and the IsExtern bit. 676 unsigned Index = Rel.Sym->getIndex(); 677 assert(isInt<24>(Index)); 678 if (W.Endian == llvm::endianness::little) 679 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27); 680 else 681 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4); 682 } 683 } 684 } 685 686 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm) { 687 // Assign layout order indices to sections. 688 unsigned i = 0; 689 // Compute the section layout order. Virtual sections must go last. 690 for (MCSection &Sec : Asm) { 691 if (!Sec.isVirtualSection()) { 692 SectionOrder.push_back(&Sec); 693 cast<MCSectionMachO>(Sec).setLayoutOrder(i++); 694 } 695 } 696 for (MCSection &Sec : Asm) { 697 if (Sec.isVirtualSection()) { 698 SectionOrder.push_back(&Sec); 699 cast<MCSectionMachO>(Sec).setLayoutOrder(i++); 700 } 701 } 702 703 uint64_t StartAddress = 0; 704 for (const MCSection *Sec : SectionOrder) { 705 StartAddress = alignTo(StartAddress, Sec->getAlign()); 706 SectionAddress[Sec] = StartAddress; 707 StartAddress += Asm.getSectionAddressSize(*Sec); 708 709 // Explicitly pad the section to match the alignment requirements of the 710 // following one. This is for 'gas' compatibility, it shouldn't 711 /// strictly be necessary. 712 StartAddress += getPaddingSize(Asm, Sec); 713 } 714 } 715 716 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm) { 717 computeSectionAddresses(Asm); 718 719 // Create symbol data for any indirect symbols. 720 bindIndirectSymbols(Asm); 721 } 722 723 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( 724 const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, 725 bool InSet, bool IsPCRel) const { 726 if (InSet) 727 return true; 728 729 // The effective address is 730 // addr(atom(A)) + offset(A) 731 // - addr(atom(B)) - offset(B) 732 // and the offsets are not relocatable, so the fixup is fully resolved when 733 // addr(atom(A)) - addr(atom(B)) == 0. 734 const MCSymbol &SA = findAliasedSymbol(SymA); 735 const MCSection &SecA = SA.getSection(); 736 const MCSection &SecB = *FB.getParent(); 737 738 if (IsPCRel) { 739 // The simple (Darwin, except on x86_64) way of dealing with this was to 740 // assume that any reference to a temporary symbol *must* be a temporary 741 // symbol in the same atom, unless the sections differ. Therefore, any PCrel 742 // relocation to a temporary symbol (in the same section) is fully 743 // resolved. This also works in conjunction with absolutized .set, which 744 // requires the compiler to use .set to absolutize the differences between 745 // symbols which the compiler knows to be assembly time constants, so we 746 // don't need to worry about considering symbol differences fully resolved. 747 // 748 // If the file isn't using sub-sections-via-symbols, we can make the 749 // same assumptions about any symbol that we normally make about 750 // assembler locals. 751 752 bool hasReliableSymbolDifference = isX86_64(); 753 if (!hasReliableSymbolDifference) { 754 if (!SA.isInSection() || &SecA != &SecB || 755 (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() && 756 SubsectionsViaSymbols)) 757 return false; 758 return true; 759 } 760 } 761 762 // If they are not in the same section, we can't compute the diff. 763 if (&SecA != &SecB) 764 return false; 765 766 // If the atoms are the same, they are guaranteed to have the same address. 767 return SA.getFragment()->getAtom() == FB.getAtom(); 768 } 769 770 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) { 771 switch (Type) { 772 case MCVM_OSXVersionMin: return MachO::LC_VERSION_MIN_MACOSX; 773 case MCVM_IOSVersionMin: return MachO::LC_VERSION_MIN_IPHONEOS; 774 case MCVM_TvOSVersionMin: return MachO::LC_VERSION_MIN_TVOS; 775 case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS; 776 } 777 llvm_unreachable("Invalid mc version min type"); 778 } 779 780 void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) { 781 MCSection *AddrSigSection = 782 Asm.getContext().getObjectFileInfo()->getAddrSigSection(); 783 unsigned Log2Size = is64Bit() ? 3 : 2; 784 for (const MCSymbol *S : getAddrsigSyms()) { 785 if (!S->isRegistered()) 786 continue; 787 MachO::any_relocation_info MRE; 788 MRE.r_word0 = 0; 789 MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28); 790 addRelocation(S, AddrSigSection, MRE); 791 } 792 } 793 794 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm) { 795 uint64_t StartOffset = W.OS.tell(); 796 auto NumBytesWritten = [&] { return W.OS.tell() - StartOffset; }; 797 798 populateAddrSigSection(Asm); 799 800 // Compute symbol table information and bind symbol indices. 801 computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, 802 UndefinedSymbolData); 803 804 if (!CGProfile.empty()) { 805 MCSection *CGProfileSection = Asm.getContext().getMachOSection( 806 "__LLVM", "__cg_profile", 0, SectionKind::getMetadata()); 807 auto &Frag = cast<MCDataFragment>(*CGProfileSection->begin()); 808 Frag.getContents().clear(); 809 raw_svector_ostream OS(Frag.getContents()); 810 for (const MCObjectWriter::CGProfileEntry &CGPE : CGProfile) { 811 uint32_t FromIndex = CGPE.From->getSymbol().getIndex(); 812 uint32_t ToIndex = CGPE.To->getSymbol().getIndex(); 813 support::endian::write(OS, FromIndex, W.Endian); 814 support::endian::write(OS, ToIndex, W.Endian); 815 support::endian::write(OS, CGPE.Count, W.Endian); 816 } 817 } 818 819 unsigned NumSections = Asm.end() - Asm.begin(); 820 821 // The section data starts after the header, the segment load command (and 822 // section headers) and the symbol table. 823 unsigned NumLoadCommands = 1; 824 uint64_t LoadCommandsSize = is64Bit() ? 825 sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): 826 sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); 827 828 // Add the deployment target version info load command size, if used. 829 if (VersionInfo.Major != 0) { 830 ++NumLoadCommands; 831 if (VersionInfo.EmitBuildVersion) 832 LoadCommandsSize += sizeof(MachO::build_version_command); 833 else 834 LoadCommandsSize += sizeof(MachO::version_min_command); 835 } 836 837 // Add the target variant version info load command size, if used. 838 if (TargetVariantVersionInfo.Major != 0) { 839 ++NumLoadCommands; 840 assert(TargetVariantVersionInfo.EmitBuildVersion && 841 "target variant should use build version"); 842 LoadCommandsSize += sizeof(MachO::build_version_command); 843 } 844 845 // Add the data-in-code load command size, if used. 846 unsigned NumDataRegions = DataRegions.size(); 847 if (NumDataRegions) { 848 ++NumLoadCommands; 849 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 850 } 851 852 // Add the loh load command size, if used. 853 uint64_t LOHRawSize = LOHContainer.getEmitSize(Asm, *this); 854 uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4); 855 if (LOHSize) { 856 ++NumLoadCommands; 857 LoadCommandsSize += sizeof(MachO::linkedit_data_command); 858 } 859 860 // Add the symbol table load command sizes, if used. 861 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + 862 UndefinedSymbolData.size(); 863 if (NumSymbols) { 864 NumLoadCommands += 2; 865 LoadCommandsSize += (sizeof(MachO::symtab_command) + 866 sizeof(MachO::dysymtab_command)); 867 } 868 869 // Add the linker option load commands sizes. 870 for (const auto &Option : LinkerOptions) { 871 ++NumLoadCommands; 872 LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit()); 873 } 874 875 // Compute the total size of the section data, as well as its file size and vm 876 // size. 877 uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : 878 sizeof(MachO::mach_header)) + LoadCommandsSize; 879 uint64_t SectionDataSize = 0; 880 uint64_t SectionDataFileSize = 0; 881 uint64_t VMSize = 0; 882 for (const MCSection &Sec : Asm) { 883 uint64_t Address = getSectionAddress(&Sec); 884 uint64_t Size = Asm.getSectionAddressSize(Sec); 885 uint64_t FileSize = Asm.getSectionFileSize(Sec); 886 FileSize += getPaddingSize(Asm, &Sec); 887 888 VMSize = std::max(VMSize, Address + Size); 889 890 if (Sec.isVirtualSection()) 891 continue; 892 893 SectionDataSize = std::max(SectionDataSize, Address + Size); 894 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize); 895 } 896 897 // The section data is padded to pointer size bytes. 898 // 899 // FIXME: Is this machine dependent? 900 unsigned SectionDataPadding = 901 offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4)); 902 SectionDataFileSize += SectionDataPadding; 903 904 // Write the prolog, starting with the header and load command... 905 writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize, 906 SubsectionsViaSymbols); 907 uint32_t Prot = 908 MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; 909 writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart, 910 SectionDataSize, Prot, Prot); 911 912 // ... and then the section headers. 913 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; 914 for (const MCSection &Section : Asm) { 915 const auto &Sec = cast<MCSectionMachO>(Section); 916 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; 917 unsigned NumRelocs = Relocs.size(); 918 uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec); 919 unsigned Flags = Sec.getTypeAndAttributes(); 920 if (Sec.hasInstructions()) 921 Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS; 922 if (!cast<MCSectionMachO>(Sec).isVirtualSection() && 923 !isUInt<32>(SectionStart)) { 924 Asm.getContext().reportError( 925 SMLoc(), "cannot encode offset of section; object file too large"); 926 return NumBytesWritten(); 927 } 928 if (NumRelocs && !isUInt<32>(RelocTableEnd)) { 929 Asm.getContext().reportError( 930 SMLoc(), 931 "cannot encode offset of relocations; object file too large"); 932 return NumBytesWritten(); 933 } 934 writeSection(Asm, Sec, getSectionAddress(&Sec), SectionStart, Flags, 935 RelocTableEnd, NumRelocs); 936 RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); 937 } 938 939 // Write out the deployment target information, if it's available. 940 auto EmitDeploymentTargetVersion = 941 [&](const VersionInfoType &VersionInfo) { 942 auto EncodeVersion = [](VersionTuple V) -> uint32_t { 943 assert(!V.empty() && "empty version"); 944 unsigned Update = V.getSubminor().value_or(0); 945 unsigned Minor = V.getMinor().value_or(0); 946 assert(Update < 256 && "unencodable update target version"); 947 assert(Minor < 256 && "unencodable minor target version"); 948 assert(V.getMajor() < 65536 && "unencodable major target version"); 949 return Update | (Minor << 8) | (V.getMajor() << 16); 950 }; 951 uint32_t EncodedVersion = EncodeVersion(VersionTuple( 952 VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update)); 953 uint32_t SDKVersion = !VersionInfo.SDKVersion.empty() 954 ? EncodeVersion(VersionInfo.SDKVersion) 955 : 0; 956 if (VersionInfo.EmitBuildVersion) { 957 // FIXME: Currently empty tools. Add clang version in the future. 958 W.write<uint32_t>(MachO::LC_BUILD_VERSION); 959 W.write<uint32_t>(sizeof(MachO::build_version_command)); 960 W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform); 961 W.write<uint32_t>(EncodedVersion); 962 W.write<uint32_t>(SDKVersion); 963 W.write<uint32_t>(0); // Empty tools list. 964 } else { 965 MachO::LoadCommandType LCType = 966 getLCFromMCVM(VersionInfo.TypeOrPlatform.Type); 967 W.write<uint32_t>(LCType); 968 W.write<uint32_t>(sizeof(MachO::version_min_command)); 969 W.write<uint32_t>(EncodedVersion); 970 W.write<uint32_t>(SDKVersion); 971 } 972 }; 973 if (VersionInfo.Major != 0) 974 EmitDeploymentTargetVersion(VersionInfo); 975 if (TargetVariantVersionInfo.Major != 0) 976 EmitDeploymentTargetVersion(TargetVariantVersionInfo); 977 978 // Write the data-in-code load command, if used. 979 uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; 980 if (NumDataRegions) { 981 uint64_t DataRegionsOffset = RelocTableEnd; 982 uint64_t DataRegionsSize = NumDataRegions * 8; 983 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, 984 DataRegionsSize); 985 } 986 987 // Write the loh load command, if used. 988 uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize; 989 if (LOHSize) 990 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT, 991 DataInCodeTableEnd, LOHSize); 992 993 // Write the symbol table load command, if used. 994 if (NumSymbols) { 995 unsigned FirstLocalSymbol = 0; 996 unsigned NumLocalSymbols = LocalSymbolData.size(); 997 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; 998 unsigned NumExternalSymbols = ExternalSymbolData.size(); 999 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; 1000 unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); 1001 unsigned NumIndirectSymbols = IndirectSymbols.size(); 1002 unsigned NumSymTabSymbols = 1003 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; 1004 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; 1005 uint64_t IndirectSymbolOffset = 0; 1006 1007 // If used, the indirect symbols are written after the section data. 1008 if (NumIndirectSymbols) 1009 IndirectSymbolOffset = LOHTableEnd; 1010 1011 // The symbol table is written after the indirect symbol data. 1012 uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize; 1013 1014 // The string table is written after symbol table. 1015 uint64_t StringTableOffset = 1016 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? 1017 sizeof(MachO::nlist_64) : 1018 sizeof(MachO::nlist)); 1019 writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, 1020 StringTableOffset, StringTable.getSize()); 1021 1022 writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, 1023 FirstExternalSymbol, NumExternalSymbols, 1024 FirstUndefinedSymbol, NumUndefinedSymbols, 1025 IndirectSymbolOffset, NumIndirectSymbols); 1026 } 1027 1028 // Write the linker options load commands. 1029 for (const auto &Option : LinkerOptions) 1030 writeLinkerOptionsLoadCommand(Option); 1031 1032 // Write the actual section data. 1033 for (const MCSection &Sec : Asm) { 1034 Asm.writeSectionData(W.OS, &Sec); 1035 1036 uint64_t Pad = getPaddingSize(Asm, &Sec); 1037 W.OS.write_zeros(Pad); 1038 } 1039 1040 // Write the extra padding. 1041 W.OS.write_zeros(SectionDataPadding); 1042 1043 // Write the relocation entries. 1044 for (const MCSection &Sec : Asm) { 1045 // Write the section relocation entries, in reverse order to match 'as' 1046 // (approximately, the exact algorithm is more complicated than this). 1047 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec]; 1048 for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) { 1049 W.write<uint32_t>(Rel.MRE.r_word0); 1050 W.write<uint32_t>(Rel.MRE.r_word1); 1051 } 1052 } 1053 1054 // Write out the data-in-code region payload, if there is one. 1055 for (DataRegionData Data : DataRegions) { 1056 uint64_t Start = getSymbolAddress(*Data.Start, Asm); 1057 uint64_t End; 1058 if (Data.End) 1059 End = getSymbolAddress(*Data.End, Asm); 1060 else 1061 report_fatal_error("Data region not terminated"); 1062 1063 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data.Kind 1064 << " start: " << Start << "(" << Data.Start->getName() 1065 << ")" << " end: " << End << "(" << Data.End->getName() 1066 << ")" << " size: " << End - Start << "\n"); 1067 W.write<uint32_t>(Start); 1068 W.write<uint16_t>(End - Start); 1069 W.write<uint16_t>(Data.Kind); 1070 } 1071 1072 // Write out the loh commands, if there is one. 1073 if (LOHSize) { 1074 #ifndef NDEBUG 1075 unsigned Start = W.OS.tell(); 1076 #endif 1077 LOHContainer.emit(Asm, *this); 1078 // Pad to a multiple of the pointer size. 1079 W.OS.write_zeros( 1080 offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4))); 1081 assert(W.OS.tell() - Start == LOHSize); 1082 } 1083 1084 // Write the symbol table data, if used. 1085 if (NumSymbols) { 1086 // Write the indirect symbol entries. 1087 for (auto &ISD : IndirectSymbols) { 1088 // Indirect symbols in the non-lazy symbol pointer section have some 1089 // special handling. 1090 const MCSectionMachO &Section = 1091 static_cast<const MCSectionMachO &>(*ISD.Section); 1092 if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) { 1093 // If this symbol is defined and internal, mark it as such. 1094 if (ISD.Symbol->isDefined() && !ISD.Symbol->isExternal()) { 1095 uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; 1096 if (ISD.Symbol->isAbsolute()) 1097 Flags |= MachO::INDIRECT_SYMBOL_ABS; 1098 W.write<uint32_t>(Flags); 1099 continue; 1100 } 1101 } 1102 1103 W.write<uint32_t>(ISD.Symbol->getIndex()); 1104 } 1105 1106 // FIXME: Check that offsets match computed ones. 1107 1108 // Write the symbol table entries. 1109 for (auto *SymbolData : 1110 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData}) 1111 for (MachSymbolData &Entry : *SymbolData) 1112 writeNlist(Entry, Asm); 1113 1114 // Write the string table. 1115 StringTable.write(W.OS); 1116 } 1117 1118 return NumBytesWritten(); 1119 } 1120