1 //===------------ MachOBuilder.h -- Build MachO Objects ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Build MachO object files for interaction with the ObjC runtime and debugger. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H 14 #define LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H 15 16 #include "llvm/BinaryFormat/MachO.h" 17 #include "llvm/Support/Endian.h" 18 #include "llvm/Support/MathExtras.h" 19 20 #include <list> 21 #include <map> 22 #include <vector> 23 24 namespace llvm { 25 namespace orc { 26 27 template <typename MachOStruct> 28 size_t writeMachOStruct(MutableArrayRef<char> Buf, size_t Offset, MachOStruct S, 29 bool SwapStruct) { 30 if (SwapStruct) 31 MachO::swapStruct(S); 32 assert(Offset + sizeof(MachOStruct) <= Buf.size() && "Buffer overflow"); 33 memcpy(&Buf[Offset], reinterpret_cast<const char *>(&S), sizeof(MachOStruct)); 34 return Offset + sizeof(MachOStruct); 35 } 36 37 /// Base type for MachOBuilder load command wrappers. 38 struct MachOBuilderLoadCommandBase { 39 virtual ~MachOBuilderLoadCommandBase() {} 40 virtual size_t size() const = 0; 41 virtual size_t write(MutableArrayRef<char> Buf, size_t Offset, 42 bool SwapStruct) = 0; 43 }; 44 45 /// MachOBuilder load command wrapper type. 46 template <MachO::LoadCommandType LCType> struct MachOBuilderLoadCommand; 47 48 #define HANDLE_LOAD_COMMAND(Name, Value, LCStruct) \ 49 template <> \ 50 struct MachOBuilderLoadCommand<MachO::Name> \ 51 : public MachO::LCStruct, public MachOBuilderLoadCommandBase { \ 52 using CmdStruct = LCStruct; \ 53 MachOBuilderLoadCommand() { \ 54 memset(&rawStruct(), 0, sizeof(CmdStruct)); \ 55 cmd = Value; \ 56 cmdsize = sizeof(CmdStruct); \ 57 } \ 58 template <typename... ArgTs> \ 59 MachOBuilderLoadCommand(ArgTs &&...Args) \ 60 : CmdStruct{Value, sizeof(CmdStruct), std::forward<ArgTs>(Args)...} {} \ 61 CmdStruct &rawStruct() { return static_cast<CmdStruct &>(*this); } \ 62 size_t size() const override { return cmdsize; } \ 63 size_t write(MutableArrayRef<char> Buf, size_t Offset, \ 64 bool SwapStruct) override { \ 65 return writeMachOStruct(Buf, Offset, rawStruct(), SwapStruct); \ 66 } \ 67 }; 68 69 #include "llvm/BinaryFormat/MachO.def" 70 71 #undef HANDLE_LOAD_COMMAND 72 73 // Builds MachO objects. 74 template <typename MachOTraits> class MachOBuilder { 75 private: 76 struct SymbolContainer { 77 size_t SymbolIndexBase = 0; 78 std::vector<typename MachOTraits::NList> Symbols; 79 }; 80 81 struct StringTableEntry { 82 StringRef S; 83 size_t Offset; 84 }; 85 86 using StringTable = std::vector<StringTableEntry>; 87 88 static bool swapStruct() { 89 return MachOTraits::Endianness != llvm::endianness::native; 90 } 91 92 public: 93 using StringId = size_t; 94 95 struct Section; 96 97 // Points to either an nlist entry (as a (symbol-container, index) pair), or 98 // a section. 99 class RelocTarget { 100 public: 101 RelocTarget(const Section &S) : S(&S), Idx(~0U) {} 102 RelocTarget(SymbolContainer &SC, size_t Idx) : SC(&SC), Idx(Idx) {} 103 104 bool isSymbol() { return Idx != ~0U; } 105 106 uint32_t getSymbolNum() { 107 assert(isSymbol() && "Target is not a symbol"); 108 return SC->SymbolIndexBase + Idx; 109 } 110 111 uint32_t getSectionId() { 112 assert(!isSymbol() && "Target is not a section"); 113 return S->SectionNumber; 114 } 115 116 typename MachOTraits::NList &nlist() { 117 assert(isSymbol() && "Target is not a symbol"); 118 return SC->Symbols[Idx]; 119 } 120 121 private: 122 union { 123 const Section *S; 124 SymbolContainer *SC; 125 }; 126 size_t Idx; 127 }; 128 129 struct Reloc : public MachO::relocation_info { 130 RelocTarget Target; 131 132 Reloc(int32_t Offset, RelocTarget Target, bool PCRel, unsigned Length, 133 unsigned Type) 134 : Target(Target) { 135 assert(Type < 16 && "Relocation type out of range"); 136 r_address = Offset; // Will slide to account for sec addr during layout 137 r_symbolnum = 0; 138 r_pcrel = PCRel; 139 r_length = Length; 140 r_extern = Target.isSymbol(); 141 r_type = Type; 142 } 143 144 MachO::relocation_info &rawStruct() { 145 return static_cast<MachO::relocation_info &>(*this); 146 } 147 }; 148 149 struct SectionContent { 150 const char *Data = nullptr; 151 size_t Size = 0; 152 }; 153 154 struct Section : public MachOTraits::Section, public RelocTarget { 155 MachOBuilder &Builder; 156 SectionContent Content; 157 size_t SectionNumber = 0; 158 SymbolContainer SC; 159 std::vector<Reloc> Relocs; 160 161 Section(MachOBuilder &Builder, StringRef SecName, StringRef SegName) 162 : RelocTarget(*this), Builder(Builder) { 163 memset(&rawStruct(), 0, sizeof(typename MachOTraits::Section)); 164 assert(SecName.size() <= 16 && "SecName too long"); 165 assert(SegName.size() <= 16 && "SegName too long"); 166 memcpy(this->sectname, SecName.data(), SecName.size()); 167 memcpy(this->segname, SegName.data(), SegName.size()); 168 } 169 170 RelocTarget addSymbol(int32_t Offset, StringRef Name, uint8_t Type, 171 uint16_t Desc) { 172 StringId SI = Builder.addString(Name); 173 typename MachOTraits::NList Sym; 174 Sym.n_strx = SI; 175 Sym.n_type = Type | MachO::N_SECT; 176 Sym.n_sect = MachO::NO_SECT; // Will be filled in later. 177 Sym.n_desc = Desc; 178 Sym.n_value = Offset; 179 SC.Symbols.push_back(Sym); 180 return {SC, SC.Symbols.size() - 1}; 181 } 182 183 void addReloc(int32_t Offset, RelocTarget Target, bool PCRel, 184 unsigned Length, unsigned Type) { 185 Relocs.push_back({Offset, Target, PCRel, Length, Type}); 186 } 187 188 auto &rawStruct() { 189 return static_cast<typename MachOTraits::Section &>(*this); 190 } 191 }; 192 193 struct Segment : public MachOBuilderLoadCommand<MachOTraits::SegmentCmd> { 194 MachOBuilder &Builder; 195 std::vector<std::unique_ptr<Section>> Sections; 196 197 Segment(MachOBuilder &Builder, StringRef SegName) 198 : MachOBuilderLoadCommand<MachOTraits::SegmentCmd>(), Builder(Builder) { 199 assert(SegName.size() <= 16 && "SegName too long"); 200 memcpy(this->segname, SegName.data(), SegName.size()); 201 this->maxprot = 202 MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE; 203 this->initprot = this->maxprot; 204 } 205 206 Section &addSection(StringRef SecName, StringRef SegName) { 207 Sections.push_back(std::make_unique<Section>(Builder, SecName, SegName)); 208 return *Sections.back(); 209 } 210 211 size_t write(MutableArrayRef<char> Buf, size_t Offset, 212 bool SwapStruct) override { 213 Offset = MachOBuilderLoadCommand<MachOTraits::SegmentCmd>::write( 214 Buf, Offset, SwapStruct); 215 for (auto &Sec : Sections) 216 Offset = writeMachOStruct(Buf, Offset, Sec->rawStruct(), SwapStruct); 217 return Offset; 218 } 219 }; 220 221 MachOBuilder(size_t PageSize) : PageSize(PageSize) { 222 memset((char *)&Header, 0, sizeof(Header)); 223 Header.magic = MachOTraits::Magic; 224 } 225 226 template <MachO::LoadCommandType LCType, typename... ArgTs> 227 MachOBuilderLoadCommand<LCType> &addLoadCommand(ArgTs &&...Args) { 228 static_assert(LCType != MachOTraits::SegmentCmd, 229 "Use addSegment to add segment load command"); 230 auto LC = std::make_unique<MachOBuilderLoadCommand<LCType>>( 231 std::forward<ArgTs>(Args)...); 232 auto &Tmp = *LC; 233 LoadCommands.push_back(std::move(LC)); 234 return Tmp; 235 } 236 237 StringId addString(StringRef Str) { 238 if (Strings.empty() && !Str.empty()) 239 addString(""); 240 return Strings.insert(std::make_pair(Str, Strings.size())).first->second; 241 } 242 243 Segment &addSegment(StringRef SegName) { 244 Segments.push_back(Segment(*this, SegName)); 245 return Segments.back(); 246 } 247 248 RelocTarget addSymbol(StringRef Name, uint8_t Type, uint8_t Sect, 249 uint16_t Desc, typename MachOTraits::UIntPtr Value) { 250 StringId SI = addString(Name); 251 typename MachOTraits::NList Sym; 252 Sym.n_strx = SI; 253 Sym.n_type = Type; 254 Sym.n_sect = Sect; 255 Sym.n_desc = Desc; 256 Sym.n_value = Value; 257 SC.Symbols.push_back(Sym); 258 return {SC, SC.Symbols.size() - 1}; 259 } 260 261 // Call to perform layout on the MachO. Returns the total size of the 262 // resulting file. 263 // This method will automatically insert some load commands (e.g. 264 // LC_SYMTAB) and fill in load command fields. 265 size_t layout() { 266 267 // Build symbol table and add LC_SYMTAB command. 268 makeStringTable(); 269 MachOBuilderLoadCommand<MachOTraits::SymTabCmd> *SymTabLC = nullptr; 270 if (!StrTab.empty()) 271 SymTabLC = &addLoadCommand<MachOTraits::SymTabCmd>(); 272 273 // Lay out header, segment load command, and other load commands. 274 size_t Offset = sizeof(Header); 275 for (auto &Seg : Segments) { 276 Seg.cmdsize += 277 Seg.Sections.size() * sizeof(typename MachOTraits::Section); 278 Seg.nsects = Seg.Sections.size(); 279 Offset += Seg.cmdsize; 280 } 281 for (auto &LC : LoadCommands) 282 Offset += LC->size(); 283 284 Header.sizeofcmds = Offset - sizeof(Header); 285 286 // Lay out content, set segment / section addrs and offsets. 287 size_t SegVMAddr = 0; 288 for (auto &Seg : Segments) { 289 Seg.vmaddr = SegVMAddr; 290 Seg.fileoff = Offset; 291 for (auto &Sec : Seg.Sections) { 292 Offset = alignTo(Offset, size_t{1} << Sec->align); 293 if (Sec->Content.Size) 294 Sec->offset = Offset; 295 Sec->size = Sec->Content.Size; 296 Sec->addr = SegVMAddr + Sec->offset - Seg.fileoff; 297 Offset += Sec->Content.Size; 298 } 299 size_t SegContentSize = Offset - Seg.fileoff; 300 Seg.filesize = SegContentSize; 301 Seg.vmsize = Header.filetype == MachO::MH_OBJECT 302 ? SegContentSize 303 : alignTo(SegContentSize, PageSize); 304 SegVMAddr += Seg.vmsize; 305 } 306 307 // Set string table offsets for non-section symbols. 308 for (auto &Sym : SC.Symbols) 309 Sym.n_strx = StrTab[Sym.n_strx].Offset; 310 311 // Number sections, set symbol section numbers and string table offsets, 312 // count relocations. 313 size_t NumSymbols = SC.Symbols.size(); 314 size_t SectionNumber = 0; 315 for (auto &Seg : Segments) { 316 for (auto &Sec : Seg.Sections) { 317 ++SectionNumber; 318 Sec->SectionNumber = SectionNumber; 319 Sec->SC.SymbolIndexBase = NumSymbols; 320 NumSymbols += Sec->SC.Symbols.size(); 321 for (auto &Sym : Sec->SC.Symbols) { 322 Sym.n_sect = SectionNumber; 323 Sym.n_strx = StrTab[Sym.n_strx].Offset; 324 Sym.n_value += Sec->addr; 325 } 326 } 327 } 328 329 // Handle relocations 330 bool OffsetAlignedForRelocs = false; 331 for (auto &Seg : Segments) { 332 for (auto &Sec : Seg.Sections) { 333 if (!Sec->Relocs.empty()) { 334 if (!OffsetAlignedForRelocs) { 335 Offset = alignTo(Offset, sizeof(MachO::relocation_info)); 336 OffsetAlignedForRelocs = true; 337 } 338 Sec->reloff = Offset; 339 Sec->nreloc = Sec->Relocs.size(); 340 Offset += Sec->Relocs.size() * sizeof(MachO::relocation_info); 341 for (auto &R : Sec->Relocs) 342 R.r_symbolnum = R.Target.isSymbol() ? R.Target.getSymbolNum() 343 : R.Target.getSectionId(); 344 } 345 } 346 } 347 348 // Calculate offset to start of nlist and update symtab command. 349 if (NumSymbols > 0) { 350 Offset = alignTo(Offset, sizeof(typename MachOTraits::NList)); 351 SymTabLC->symoff = Offset; 352 SymTabLC->nsyms = NumSymbols; 353 354 // Calculate string table bounds and update symtab command. 355 if (!StrTab.empty()) { 356 Offset += NumSymbols * sizeof(typename MachOTraits::NList); 357 size_t StringTableSize = 358 StrTab.back().Offset + StrTab.back().S.size() + 1; 359 360 SymTabLC->stroff = Offset; 361 SymTabLC->strsize = StringTableSize; 362 Offset += StringTableSize; 363 } 364 } 365 366 return Offset; 367 } 368 369 void write(MutableArrayRef<char> Buffer) { 370 size_t Offset = 0; 371 Offset = writeHeader(Buffer, Offset); 372 Offset = writeSegments(Buffer, Offset); 373 Offset = writeLoadCommands(Buffer, Offset); 374 Offset = writeSectionContent(Buffer, Offset); 375 Offset = writeRelocations(Buffer, Offset); 376 Offset = writeSymbols(Buffer, Offset); 377 Offset = writeStrings(Buffer, Offset); 378 } 379 380 typename MachOTraits::Header Header; 381 382 private: 383 void makeStringTable() { 384 if (Strings.empty()) 385 return; 386 387 StrTab.resize(Strings.size()); 388 for (auto &KV : Strings) 389 StrTab[KV.second] = {KV.first, 0}; 390 size_t Offset = 0; 391 for (auto &Elem : StrTab) { 392 Elem.Offset = Offset; 393 Offset += Elem.S.size() + 1; 394 } 395 } 396 397 size_t writeHeader(MutableArrayRef<char> Buf, size_t Offset) { 398 Header.ncmds = Segments.size() + LoadCommands.size(); 399 return writeMachOStruct(Buf, Offset, Header, swapStruct()); 400 } 401 402 size_t writeSegments(MutableArrayRef<char> Buf, size_t Offset) { 403 for (auto &Seg : Segments) 404 Offset = Seg.write(Buf, Offset, swapStruct()); 405 return Offset; 406 } 407 408 size_t writeLoadCommands(MutableArrayRef<char> Buf, size_t Offset) { 409 for (auto &LC : LoadCommands) 410 Offset = LC->write(Buf, Offset, swapStruct()); 411 return Offset; 412 } 413 414 size_t writeSectionContent(MutableArrayRef<char> Buf, size_t Offset) { 415 for (auto &Seg : Segments) { 416 for (auto &Sec : Seg.Sections) { 417 if (!Sec->Content.Data) { 418 assert(Sec->Relocs.empty() && 419 "Cant' have relocs for zero-fill segment"); 420 continue; 421 } 422 while (Offset != Sec->offset) 423 Buf[Offset++] = '\0'; 424 425 assert(Offset + Sec->Content.Size <= Buf.size() && "Buffer overflow"); 426 memcpy(&Buf[Offset], Sec->Content.Data, Sec->Content.Size); 427 Offset += Sec->Content.Size; 428 } 429 } 430 return Offset; 431 } 432 433 size_t writeRelocations(MutableArrayRef<char> Buf, size_t Offset) { 434 for (auto &Seg : Segments) { 435 for (auto &Sec : Seg.Sections) { 436 if (!Sec->Relocs.empty()) { 437 while (Offset % sizeof(MachO::relocation_info)) 438 Buf[Offset++] = '\0'; 439 } 440 for (auto &R : Sec->Relocs) { 441 assert(Offset + sizeof(MachO::relocation_info) <= Buf.size() && 442 "Buffer overflow"); 443 memcpy(&Buf[Offset], reinterpret_cast<const char *>(&R.rawStruct()), 444 sizeof(MachO::relocation_info)); 445 Offset += sizeof(MachO::relocation_info); 446 } 447 } 448 } 449 return Offset; 450 } 451 452 size_t writeSymbols(MutableArrayRef<char> Buf, size_t Offset) { 453 454 // Count symbols. 455 size_t NumSymbols = SC.Symbols.size(); 456 for (auto &Seg : Segments) 457 for (auto &Sec : Seg.Sections) 458 NumSymbols += Sec->SC.Symbols.size(); 459 460 // If none then return. 461 if (NumSymbols == 0) 462 return Offset; 463 464 // Align to nlist entry size. 465 while (Offset % sizeof(typename MachOTraits::NList)) 466 Buf[Offset++] = '\0'; 467 468 // Write non-section symbols. 469 for (auto &Sym : SC.Symbols) 470 Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct()); 471 472 // Write section symbols. 473 for (auto &Seg : Segments) { 474 for (auto &Sec : Seg.Sections) { 475 for (auto &Sym : Sec->SC.Symbols) { 476 Offset = writeMachOStruct(Buf, Offset, Sym, swapStruct()); 477 } 478 } 479 } 480 return Offset; 481 } 482 483 size_t writeStrings(MutableArrayRef<char> Buf, size_t Offset) { 484 for (auto &Elem : StrTab) { 485 assert(Offset + Elem.S.size() + 1 <= Buf.size() && "Buffer overflow"); 486 memcpy(&Buf[Offset], Elem.S.data(), Elem.S.size()); 487 Offset += Elem.S.size(); 488 Buf[Offset++] = '\0'; 489 } 490 return Offset; 491 } 492 493 size_t PageSize; 494 std::list<Segment> Segments; 495 std::vector<std::unique_ptr<MachOBuilderLoadCommandBase>> LoadCommands; 496 SymbolContainer SC; 497 498 // Maps strings to their "id" (addition order). 499 std::map<StringRef, size_t> Strings; 500 StringTable StrTab; 501 }; 502 503 struct MachO64LE { 504 using UIntPtr = uint64_t; 505 using Header = MachO::mach_header_64; 506 using Section = MachO::section_64; 507 using NList = MachO::nlist_64; 508 using Relocation = MachO::relocation_info; 509 510 static constexpr llvm::endianness Endianness = llvm::endianness::little; 511 static constexpr uint32_t Magic = MachO::MH_MAGIC_64; 512 static constexpr MachO::LoadCommandType SegmentCmd = MachO::LC_SEGMENT_64; 513 static constexpr MachO::LoadCommandType SymTabCmd = MachO::LC_SYMTAB; 514 }; 515 516 } // namespace orc 517 } // namespace llvm 518 519 #endif // LLVM_EXECUTIONENGINE_ORC_MACHOBUILDER_H 520