1 //===------------ MachOBuilder.h -- Build MachO Objects ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Build MachO object files for interaction with the ObjC runtime and debugger. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H 14 #define LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H 15 16 #include "llvm/BinaryFormat/MachO.h" 17 #include "llvm/Support/Endian.h" 18 #include "llvm/Support/MathExtras.h" 19 20 #include <list> 21 #include <map> 22 #include <vector> 23 24 namespace llvm { 25 namespace orc { 26 27 template <typename MachOStruct> 28 size_t writeMachOStruct(MutableArrayRef<char> Buf, size_t Offset, MachOStruct S, 29 bool SwapStruct) { 30 if (SwapStruct) 31 MachO::swapStruct(S); 32 assert(Offset + sizeof(MachOStruct) <= Buf.size() && "Buffer overflow"); 33 memcpy(&Buf[Offset], reinterpret_cast<const char *>(&S), sizeof(MachOStruct)); 34 return Offset + sizeof(MachOStruct); 35 } 36 37 /// Base type for MachOBuilder load command wrappers. 38 struct MachOBuilderLoadCommandBase { 39 virtual ~MachOBuilderLoadCommandBase() {} 40 virtual size_t size() const = 0; 41 virtual size_t write(MutableArrayRef<char> Buf, size_t Offset, 42 bool SwapStruct) = 0; 43 }; 44 45 /// MachOBuilder load command wrapper type. 46 template <MachO::LoadCommandType LCType> struct MachOBuilderLoadCommandImplBase; 47 48 #define HANDLE_LOAD_COMMAND(Name, Value, LCStruct) \ 49 template <> \ 50 struct MachOBuilderLoadCommandImplBase<MachO::Name> \ 51 : public MachO::LCStruct, public MachOBuilderLoadCommandBase { \ 52 using CmdStruct = LCStruct; \ 53 MachOBuilderLoadCommandImplBase() { \ 54 memset(&rawStruct(), 0, sizeof(CmdStruct)); \ 55 cmd = Value; \ 56 cmdsize = sizeof(CmdStruct); \ 57 } \ 58 template <typename... ArgTs> \ 59 MachOBuilderLoadCommandImplBase(ArgTs &&...Args) \ 60 : CmdStruct{Value, sizeof(CmdStruct), std::forward<ArgTs>(Args)...} {} \ 61 CmdStruct &rawStruct() { return static_cast<CmdStruct &>(*this); } \ 62 size_t size() const override { return cmdsize; } \ 63 size_t write(MutableArrayRef<char> Buf, size_t Offset, \ 64 bool SwapStruct) override { \ 65 return writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct); \ 66 } \ 67 }; 68 69 #include "llvm/BinaryFormat/MachO.def" 70 71 #undef HANDLE_LOAD_COMMAND 72 73 template <MachO::LoadCommandType LCType> 74 struct MachOBuilderLoadCommand 75 : public MachOBuilderLoadCommandImplBase<LCType> { 76 public: 77 MachOBuilderLoadCommand() = default; 78 79 template <typename... ArgTs> 80 MachOBuilderLoadCommand(ArgTs &&...Args) 81 : MachOBuilderLoadCommandImplBase<LCType>(std::forward<ArgTs>(Args)...) {} 82 }; 83 84 template <> 85 struct MachOBuilderLoadCommand<MachO::LC_ID_DYLIB> 86 : public MachOBuilderLoadCommandImplBase<MachO::LC_ID_DYLIB> { 87 88 MachOBuilderLoadCommand(std::string Name, uint32_t Timestamp, 89 uint32_t CurrentVersion, 90 uint32_t CompatibilityVersion) 91 : MachOBuilderLoadCommandImplBase( 92 MachO::dylib{24, Timestamp, CurrentVersion, CompatibilityVersion}), 93 Name(std::move(Name)) { 94 cmdsize += (this->Name.size() + 1 + 3) & ~0x3; 95 } 96 97 size_t write(MutableArrayRef<char> Buf, size_t Offset, 98 bool SwapStruct) override { 99 Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct); 100 strcpy(Buf.data() + Offset, Name.data()); 101 return Offset + ((Name.size() + 1 + 3) & ~0x3); 102 } 103 104 std::string Name; 105 }; 106 107 template <> 108 struct MachOBuilderLoadCommand<MachO::LC_LOAD_DYLIB> 109 : public MachOBuilderLoadCommandImplBase<MachO::LC_LOAD_DYLIB> { 110 111 MachOBuilderLoadCommand(std::string Name, uint32_t Timestamp, 112 uint32_t CurrentVersion, 113 uint32_t CompatibilityVersion) 114 : MachOBuilderLoadCommandImplBase( 115 MachO::dylib{24, Timestamp, CurrentVersion, CompatibilityVersion}), 116 Name(std::move(Name)) { 117 cmdsize += (this->Name.size() + 1 + 3) & ~0x3; 118 } 119 120 size_t write(MutableArrayRef<char> Buf, size_t Offset, 121 bool SwapStruct) override { 122 Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct); 123 strcpy(Buf.data() + Offset, Name.data()); 124 return Offset + ((Name.size() + 1 + 3) & ~0x3); 125 } 126 127 std::string Name; 128 }; 129 130 template <> 131 struct MachOBuilderLoadCommand<MachO::LC_RPATH> 132 : public MachOBuilderLoadCommandImplBase<MachO::LC_RPATH> { 133 MachOBuilderLoadCommand(std::string Path) 134 : MachOBuilderLoadCommandImplBase(12u), Path(std::move(Path)) { 135 cmdsize += (this->Path.size() + 1 + 3) & ~0x3; 136 } 137 138 size_t write(MutableArrayRef<char> Buf, size_t Offset, 139 bool SwapStruct) override { 140 Offset = writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct); 141 strcpy(Buf.data() + Offset, Path.data()); 142 return Offset + ((Path.size() + 1 + 3) & ~0x3); 143 } 144 145 std::string Path; 146 }; 147 148 // Builds MachO objects. 149 template <typename MachOTraits> class MachOBuilder { 150 private: 151 struct SymbolContainer { 152 size_t SymbolIndexBase = 0; 153 std::vector<typename MachOTraits::NList> Symbols; 154 }; 155 156 struct StringTableEntry { 157 StringRef S; 158 size_t Offset; 159 }; 160 161 using StringTable = std::vector<StringTableEntry>; 162 163 static bool swapStruct() { 164 return MachOTraits::Endianness != llvm::endianness::native; 165 } 166 167 public: 168 using StringId = size_t; 169 170 struct Section; 171 172 // Points to either an nlist entry (as a (symbol-container, index) pair), or 173 // a section. 174 class RelocTarget { 175 public: 176 RelocTarget(const Section &S) : S(&S), Idx(~0U) {} 177 RelocTarget(SymbolContainer &SC, size_t Idx) : SC(&SC), Idx(Idx) {} 178 179 bool isSymbol() { return Idx != ~0U; } 180 181 uint32_t getSymbolNum() { 182 assert(isSymbol() && "Target is not a symbol"); 183 return SC->SymbolIndexBase + Idx; 184 } 185 186 uint32_t getSectionId() { 187 assert(!isSymbol() && "Target is not a section"); 188 return S->SectionNumber; 189 } 190 191 typename MachOTraits::NList &nlist() { 192 assert(isSymbol() && "Target is not a symbol"); 193 return SC->Symbols[Idx]; 194 } 195 196 private: 197 union { 198 const Section *S; 199 SymbolContainer *SC; 200 }; 201 size_t Idx; 202 }; 203 204 struct Reloc : public MachO::relocation_info { 205 RelocTarget Target; 206 207 Reloc(int32_t Offset, RelocTarget Target, bool PCRel, unsigned Length, 208 unsigned Type) 209 : Target(Target) { 210 assert(Type < 16 && "Relocation type out of range"); 211 r_address = Offset; // Will slide to account for sec addr during layout 212 r_symbolnum = 0; 213 r_pcrel = PCRel; 214 r_length = Length; 215 r_extern = Target.isSymbol(); 216 r_type = Type; 217 } 218 219 MachO::relocation_info &rawStruct() { 220 return static_cast<MachO::relocation_info &>(*this); 221 } 222 }; 223 224 struct SectionContent { 225 const char *Data = nullptr; 226 size_t Size = 0; 227 }; 228 229 struct Section : public MachOTraits::Section, public RelocTarget { 230 MachOBuilder &Builder; 231 SectionContent Content; 232 size_t SectionNumber = 0; 233 SymbolContainer SC; 234 std::vector<Reloc> Relocs; 235 236 Section(MachOBuilder &Builder, StringRef SecName, StringRef SegName) 237 : RelocTarget(*this), Builder(Builder) { 238 memset(&rawStruct(), 0, sizeof(typename MachOTraits::Section)); 239 assert(SecName.size() <= 16 && "SecName too long"); 240 assert(SegName.size() <= 16 && "SegName too long"); 241 memcpy(this->sectname, SecName.data(), SecName.size()); 242 memcpy(this->segname, SegName.data(), SegName.size()); 243 } 244 245 RelocTarget addSymbol(int32_t Offset, StringRef Name, uint8_t Type, 246 uint16_t Desc) { 247 StringId SI = Builder.addString(Name); 248 typename MachOTraits::NList Sym; 249 Sym.n_strx = SI; 250 Sym.n_type = Type | MachO::N_SECT; 251 Sym.n_sect = MachO::NO_SECT; // Will be filled in later. 252 Sym.n_desc = Desc; 253 Sym.n_value = Offset; 254 SC.Symbols.push_back(Sym); 255 return {SC, SC.Symbols.size() - 1}; 256 } 257 258 void addReloc(int32_t Offset, RelocTarget Target, bool PCRel, 259 unsigned Length, unsigned Type) { 260 Relocs.push_back({Offset, Target, PCRel, Length, Type}); 261 } 262 263 auto &rawStruct() { 264 return static_cast<typename MachOTraits::Section &>(*this); 265 } 266 }; 267 268 struct Segment : public MachOBuilderLoadCommand<MachOTraits::SegmentCmd> { 269 MachOBuilder &Builder; 270 std::vector<std::unique_ptr<Section>> Sections; 271 272 Segment(MachOBuilder &Builder, StringRef SegName) 273 : MachOBuilderLoadCommand<MachOTraits::SegmentCmd>(), Builder(Builder) { 274 assert(SegName.size() <= 16 && "SegName too long"); 275 memcpy(this->segname, SegName.data(), SegName.size()); 276 this->maxprot = 277 MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; 278 this->initprot = this->maxprot; 279 } 280 281 Section &addSection(StringRef SecName, StringRef SegName) { 282 Sections.push_back(std::make_unique<Section>(Builder, SecName, SegName)); 283 return *Sections.back(); 284 } 285 286 size_t write(MutableArrayRef<char> Buf, size_t Offset, 287 bool SwapStruct) override { 288 Offset = MachOBuilderLoadCommand<MachOTraits::SegmentCmd>::write( 289 Buf, Offset, SwapStruct); 290 for (auto &Sec : Sections) 291 Offset = writeMachOStruct(Buf, Offset, Sec->rawStruct(), SwapStruct); 292 return Offset; 293 } 294 }; 295 296 MachOBuilder(size_t PageSize) : PageSize(PageSize) { 297 memset((char *)&Header, 0, sizeof(Header)); 298 Header.magic = MachOTraits::Magic; 299 } 300 301 template <MachO::LoadCommandType LCType, typename... ArgTs> 302 MachOBuilderLoadCommand<LCType> &addLoadCommand(ArgTs &&...Args) { 303 static_assert(LCType != MachOTraits::SegmentCmd, 304 "Use addSegment to add segment load command"); 305 auto LC = std::make_unique<MachOBuilderLoadCommand<LCType>>( 306 std::forward<ArgTs>(Args)...); 307 auto &Tmp = *LC; 308 LoadCommands.push_back(std::move(LC)); 309 return Tmp; 310 } 311 312 StringId addString(StringRef Str) { 313 if (Strings.empty() && !Str.empty()) 314 addString(""); 315 return Strings.insert(std::make_pair(Str, Strings.size())).first->second; 316 } 317 318 Segment &addSegment(StringRef SegName) { 319 Segments.push_back(Segment(*this, SegName)); 320 return Segments.back(); 321 } 322 323 RelocTarget addSymbol(StringRef Name, uint8_t Type, uint8_t Sect, 324 uint16_t Desc, typename MachOTraits::UIntPtr Value) { 325 StringId SI = addString(Name); 326 typename MachOTraits::NList Sym; 327 Sym.n_strx = SI; 328 Sym.n_type = Type; 329 Sym.n_sect = Sect; 330 Sym.n_desc = Desc; 331 Sym.n_value = Value; 332 SC.Symbols.push_back(Sym); 333 return {SC, SC.Symbols.size() - 1}; 334 } 335 336 // Call to perform layout on the MachO. Returns the total size of the 337 // resulting file. 338 // This method will automatically insert some load commands (e.g. 339 // LC_SYMTAB) and fill in load command fields. 340 size_t layout() { 341 342 // Build symbol table and add LC_SYMTAB command. 343 makeStringTable(); 344 MachOBuilderLoadCommand<MachOTraits::SymTabCmd> *SymTabLC = nullptr; 345 if (!StrTab.empty()) 346 SymTabLC = &addLoadCommand<MachOTraits::SymTabCmd>(); 347 348 // Lay out header, segment load command, and other load commands. 349 size_t Offset = sizeof(Header); 350 for (auto &Seg : Segments) { 351 Seg.cmdsize += 352 Seg.Sections.size() * sizeof(typename MachOTraits::Section); 353 Seg.nsects = Seg.Sections.size(); 354 Offset += Seg.cmdsize; 355 } 356 for (auto &LC : LoadCommands) 357 Offset += LC->size(); 358 359 Header.sizeofcmds = Offset - sizeof(Header); 360 361 // Lay out content, set segment / section addrs and offsets. 362 size_t SegVMAddr = 0; 363 for (auto &Seg : Segments) { 364 Seg.vmaddr = SegVMAddr; 365 Seg.fileoff = Offset; 366 for (auto &Sec : Seg.Sections) { 367 Offset = alignTo(Offset, 1ULL << Sec->align); 368 if (Sec->Content.Size) 369 Sec->offset = Offset; 370 Sec->size = Sec->Content.Size; 371 Sec->addr = SegVMAddr + Sec->offset - Seg.fileoff; 372 Offset += Sec->Content.Size; 373 } 374 size_t SegContentSize = Offset - Seg.fileoff; 375 Seg.filesize = SegContentSize; 376 Seg.vmsize = Header.filetype == MachO::MH_OBJECT 377 ? SegContentSize 378 : alignTo(SegContentSize, PageSize); 379 SegVMAddr += Seg.vmsize; 380 } 381 382 // Set string table offsets for non-section symbols. 383 for (auto &Sym : SC.Symbols) 384 Sym.n_strx = StrTab[Sym.n_strx].Offset; 385 386 // Number sections, set symbol section numbers and string table offsets, 387 // count relocations. 388 size_t NumSymbols = SC.Symbols.size(); 389 size_t SectionNumber = 0; 390 for (auto &Seg : Segments) { 391 for (auto &Sec : Seg.Sections) { 392 ++SectionNumber; 393 Sec->SectionNumber = SectionNumber; 394 Sec->SC.SymbolIndexBase = NumSymbols; 395 NumSymbols += Sec->SC.Symbols.size(); 396 for (auto &Sym : Sec->SC.Symbols) { 397 Sym.n_sect = SectionNumber; 398 Sym.n_strx = StrTab[Sym.n_strx].Offset; 399 Sym.n_value += Sec->addr; 400 } 401 } 402 } 403 404 // Handle relocations 405 bool OffsetAlignedForRelocs = false; 406 for (auto &Seg : Segments) { 407 for (auto &Sec : Seg.Sections) { 408 if (!Sec->Relocs.empty()) { 409 if (!OffsetAlignedForRelocs) { 410 Offset = alignTo(Offset, sizeof(MachO::relocation_info)); 411 OffsetAlignedForRelocs = true; 412 } 413 Sec->reloff = Offset; 414 Sec->nreloc = Sec->Relocs.size(); 415 Offset += Sec->Relocs.size() * sizeof(MachO::relocation_info); 416 for (auto &R : Sec->Relocs) 417 R.r_symbolnum = R.Target.isSymbol() ? R.Target.getSymbolNum() 418 : R.Target.getSectionId(); 419 } 420 } 421 } 422 423 // Calculate offset to start of nlist and update symtab command. 424 if (NumSymbols > 0) { 425 Offset = alignTo(Offset, sizeof(typename MachOTraits::NList)); 426 SymTabLC->symoff = Offset; 427 SymTabLC->nsyms = NumSymbols; 428 429 // Calculate string table bounds and update symtab command. 430 if (!StrTab.empty()) { 431 Offset += NumSymbols * sizeof(typename MachOTraits::NList); 432 size_t StringTableSize = 433 StrTab.back().Offset + StrTab.back().S.size() + 1; 434 435 SymTabLC->stroff = Offset; 436 SymTabLC->strsize = StringTableSize; 437 Offset += StringTableSize; 438 } 439 } 440 441 return Offset; 442 } 443 444 void write(MutableArrayRef<char> Buffer) { 445 size_t Offset = 0; 446 Offset = writeHeader(Buffer, Offset); 447 Offset = writeSegments(Buffer, Offset); 448 Offset = writeLoadCommands(Buffer, Offset); 449 Offset = writeSectionContent(Buffer, Offset); 450 Offset = writeRelocations(Buffer, Offset); 451 Offset = writeSymbols(Buffer, Offset); 452 Offset = writeStrings(Buffer, Offset); 453 } 454 455 typename MachOTraits::Header Header; 456 457 private: 458 void makeStringTable() { 459 if (Strings.empty()) 460 return; 461 462 StrTab.resize(Strings.size()); 463 for (auto &KV : Strings) 464 StrTab[KV.second] = {KV.first, 0}; 465 size_t Offset = 0; 466 for (auto &Elem : StrTab) { 467 Elem.Offset = Offset; 468 Offset += Elem.S.size() + 1; 469 } 470 } 471 472 size_t writeHeader(MutableArrayRef<char> Buf, size_t Offset) { 473 Header.ncmds = Segments.size() + LoadCommands.size(); 474 return writeMachOStruct(Buf, Offset, Header, swapStruct()); 475 } 476 477 size_t writeSegments(MutableArrayRef<char> Buf, size_t Offset) { 478 for (auto &Seg : Segments) 479 Offset = Seg.write(Buf, Offset, swapStruct()); 480 return Offset; 481 } 482 483 size_t writeLoadCommands(MutableArrayRef<char> Buf, size_t Offset) { 484 for (auto &LC : LoadCommands) 485 Offset = LC->write(Buf, Offset, swapStruct()); 486 return Offset; 487 } 488 489 size_t writeSectionContent(MutableArrayRef<char> Buf, size_t Offset) { 490 for (auto &Seg : Segments) { 491 for (auto &Sec : Seg.Sections) { 492 if (!Sec->Content.Data) { 493 assert(Sec->Relocs.empty() && 494 "Cant' have relocs for zero-fill segment"); 495 continue; 496 } 497 while (Offset != Sec->offset) 498 Buf[Offset++] = '\0'; 499 500 assert(Offset + Sec->Content.Size <= Buf.size() && "Buffer overflow"); 501 memcpy(&Buf[Offset], Sec->Content.Data, Sec->Content.Size); 502 Offset += Sec->Content.Size; 503 } 504 } 505 return Offset; 506 } 507 508 size_t writeRelocations(MutableArrayRef<char> Buf, size_t Offset) { 509 for (auto &Seg : Segments) { 510 for (auto &Sec : Seg.Sections) { 511 if (!Sec->Relocs.empty()) { 512 while (Offset % sizeof(MachO::relocation_info)) 513 Buf[Offset++] = '\0'; 514 } 515 for (auto &R : Sec->Relocs) { 516 assert(Offset + sizeof(MachO::relocation_info) <= Buf.size() && 517 "Buffer overflow"); 518 memcpy(&Buf[Offset], reinterpret_cast<const char *>(&R.rawStruct()), 519 sizeof(MachO::relocation_info)); 520 Offset += sizeof(MachO::relocation_info); 521 } 522 } 523 } 524 return Offset; 525 } 526 527 size_t writeSymbols(MutableArrayRef<char> Buf, size_t Offset) { 528 529 // Count symbols. 530 size_t NumSymbols = SC.Symbols.size(); 531 for (auto &Seg : Segments) 532 for (auto &Sec : Seg.Sections) 533 NumSymbols += Sec->SC.Symbols.size(); 534 535 // If none then return. 536 if (NumSymbols == 0) 537 return Offset; 538 539 // Align to nlist entry size. 540 while (Offset % sizeof(typename MachOTraits::NList)) 541 Buf[Offset++] = '\0'; 542 543 // Write non-section symbols. 544 for (auto &Sym : SC.Symbols) 545 Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct()); 546 547 // Write section symbols. 548 for (auto &Seg : Segments) { 549 for (auto &Sec : Seg.Sections) { 550 for (auto &Sym : Sec->SC.Symbols) { 551 Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct()); 552 } 553 } 554 } 555 return Offset; 556 } 557 558 size_t writeStrings(MutableArrayRef<char> Buf, size_t Offset) { 559 for (auto &Elem : StrTab) { 560 assert(Offset + Elem.S.size() + 1 <= Buf.size() && "Buffer overflow"); 561 memcpy(&Buf[Offset], Elem.S.data(), Elem.S.size()); 562 Offset += Elem.S.size(); 563 Buf[Offset++] = '\0'; 564 } 565 return Offset; 566 } 567 568 size_t PageSize; 569 std::list<Segment> Segments; 570 std::vector<std::unique_ptr<MachOBuilderLoadCommandBase>> LoadCommands; 571 SymbolContainer SC; 572 573 // Maps strings to their "id" (addition order). 574 std::map<StringRef, size_t> Strings; 575 StringTable StrTab; 576 }; 577 578 struct MachO64LE { 579 using UIntPtr = uint64_t; 580 using Header = MachO::mach_header_64; 581 using Section = MachO::section_64; 582 using NList = MachO::nlist_64; 583 using Relocation = MachO::relocation_info; 584 585 static constexpr llvm::endianness Endianness = llvm::endianness::little; 586 static constexpr uint32_t Magic = MachO::MH_MAGIC_64; 587 static constexpr MachO::LoadCommandType SegmentCmd = MachO::LC_SEGMENT_64; 588 static constexpr MachO::LoadCommandType SymTabCmd = MachO::LC_SYMTAB; 589 }; 590 591 } // namespace orc 592 } // namespace llvm 593 594 #endif // LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H 595