1 //===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the MachOObjectFile class, which binds the MachOObject 11 // class to the generic ObjectFile wrapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Object/MachO.h" 16 #include "llvm/ADT/Triple.h" 17 #include "llvm/Object/MachOFormat.h" 18 #include "llvm/Support/Casting.h" 19 #include "llvm/Support/DataExtractor.h" 20 #include "llvm/Support/Format.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include <cctype> 23 #include <cstring> 24 #include <limits> 25 26 using namespace llvm; 27 using namespace object; 28 29 namespace llvm { 30 namespace object { 31 32 MachOObjectFileBase::MachOObjectFileBase(MemoryBuffer *Object, bool Is64bits, 33 error_code &ec) 34 : ObjectFile(getMachOType(true, Is64bits), Object) { 35 } 36 37 bool MachOObjectFileBase::is64Bit() const { 38 return isa<MachOObjectFile64Le>(this); 39 } 40 41 const MachOObjectFileBase::LoadCommand * 42 MachOObjectFileBase::getLoadCommandInfo(unsigned Index) const { 43 uint64_t Offset; 44 uint64_t NewOffset = getHeaderSize(); 45 const LoadCommand *Load; 46 unsigned I = 0; 47 do { 48 Offset = NewOffset; 49 StringRef Data = getData(Offset, sizeof(LoadCommand)); 50 Load = reinterpret_cast<const LoadCommand*>(Data.data()); 51 NewOffset = Offset + Load->Size; 52 ++I; 53 } while (I != Index + 1); 54 55 return Load; 56 } 57 58 void MachOObjectFileBase::ReadULEB128s(uint64_t Index, 59 SmallVectorImpl<uint64_t> &Out) const { 60 DataExtractor extractor(ObjectFile::getData(), true, 0); 61 62 uint32_t offset = Index; 63 uint64_t data = 0; 64 while (uint64_t delta = extractor.getULEB128(&offset)) { 65 data += delta; 66 Out.push_back(data); 67 } 68 } 69 70 const MachOObjectFileBase::Header *MachOObjectFileBase::getHeader() const { 71 StringRef Data = getData(0, sizeof(Header)); 72 return reinterpret_cast<const Header*>(Data.data()); 73 } 74 75 unsigned MachOObjectFileBase::getHeaderSize() const { 76 return is64Bit() ? macho::Header64Size : macho::Header32Size; 77 } 78 79 StringRef MachOObjectFileBase::getData(size_t Offset, size_t Size) const { 80 return ObjectFile::getData().substr(Offset, Size); 81 } 82 83 const MachOObjectFileBase::RelocationEntry * 84 MachOObjectFileBase::getRelocation(DataRefImpl Rel) const { 85 if (const MachOObjectFile32Le *O = dyn_cast<MachOObjectFile32Le>(this)) 86 return O->getRelocation(Rel); 87 const MachOObjectFile64Le *O = dyn_cast<MachOObjectFile64Le>(this); 88 return O->getRelocation(Rel); 89 } 90 91 bool MachOObjectFileBase::isScattered(const RelocationEntry *RE) const { 92 unsigned Arch = getArch(); 93 return (Arch != Triple::x86_64) && (RE->Address & macho::RF_Scattered); 94 } 95 96 bool MachOObjectFileBase::isPCRel(const RelocationEntry *RE) const { 97 if (isScattered(RE)) { 98 const ScatteredRelocationEntry *SRE = 99 reinterpret_cast<const ScatteredRelocationEntry *>(RE); 100 return SRE->getPCRel(); 101 } 102 return RE->getPCRel(); 103 } 104 105 unsigned MachOObjectFileBase::getLength(const RelocationEntry *RE) const { 106 if (isScattered(RE)) { 107 const ScatteredRelocationEntry *SRE = 108 reinterpret_cast<const ScatteredRelocationEntry *>(RE); 109 return SRE->getLength(); 110 } 111 return RE->getLength(); 112 } 113 114 unsigned MachOObjectFileBase::getType(const RelocationEntry *RE) const { 115 if (isScattered(RE)) { 116 const ScatteredRelocationEntry *SRE = 117 reinterpret_cast<const ScatteredRelocationEntry *>(RE); 118 return SRE->getType(); 119 } 120 return RE->getType(); 121 } 122 123 ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { 124 StringRef Magic = Buffer->getBuffer().slice(0, 4); 125 error_code ec; 126 bool Is64Bits = Magic == "\xFE\xED\xFA\xCF" || Magic == "\xCF\xFA\xED\xFE"; 127 ObjectFile *Ret; 128 if (Is64Bits) 129 Ret = new MachOObjectFile64Le(Buffer, ec); 130 else 131 Ret = new MachOObjectFile32Le(Buffer, ec); 132 if (ec) 133 return NULL; 134 return Ret; 135 } 136 137 /*===-- Symbols -----------------------------------------------------------===*/ 138 139 void MachOObjectFileBase::moveToNextSymbol(DataRefImpl &DRI) const { 140 uint32_t LoadCommandCount = getHeader()->NumLoadCommands; 141 while (DRI.d.a < LoadCommandCount) { 142 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 143 if (Command->Type == macho::LCT_Symtab) { 144 const SymtabLoadCommand *SymtabLoadCmd = 145 reinterpret_cast<const SymtabLoadCommand*>(Command); 146 if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries) 147 return; 148 } 149 150 DRI.d.a++; 151 DRI.d.b = 0; 152 } 153 } 154 155 const MachOObjectFileBase::SymbolTableEntryBase * 156 MachOObjectFileBase::getSymbolTableEntryBase(DataRefImpl DRI) const { 157 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 158 const SymtabLoadCommand *SymtabLoadCmd = 159 reinterpret_cast<const SymtabLoadCommand*>(Command); 160 return getSymbolTableEntryBase(DRI, SymtabLoadCmd); 161 } 162 163 const MachOObjectFileBase::SymbolTableEntryBase * 164 MachOObjectFileBase::getSymbolTableEntryBase(DataRefImpl DRI, 165 const SymtabLoadCommand *SymtabLoadCmd) const { 166 uint64_t SymbolTableOffset = SymtabLoadCmd->SymbolTableOffset; 167 unsigned Index = DRI.d.b; 168 169 unsigned SymbolTableEntrySize = is64Bit() ? 170 sizeof(MachOObjectFile64Le::SymbolTableEntry) : 171 sizeof(MachOObjectFile32Le::SymbolTableEntry); 172 173 uint64_t Offset = SymbolTableOffset + Index * SymbolTableEntrySize; 174 StringRef Data = getData(Offset, SymbolTableEntrySize); 175 return reinterpret_cast<const SymbolTableEntryBase*>(Data.data()); 176 } 177 178 error_code MachOObjectFileBase::getSymbolNext(DataRefImpl DRI, 179 SymbolRef &Result) const { 180 DRI.d.b++; 181 moveToNextSymbol(DRI); 182 Result = SymbolRef(DRI, this); 183 return object_error::success; 184 } 185 186 error_code MachOObjectFileBase::getSymbolName(DataRefImpl DRI, 187 StringRef &Result) const { 188 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 189 const SymtabLoadCommand *SymtabLoadCmd = 190 reinterpret_cast<const SymtabLoadCommand*>(Command); 191 192 StringRef StringTable = getData(SymtabLoadCmd->StringTableOffset, 193 SymtabLoadCmd->StringTableSize); 194 195 const SymbolTableEntryBase *Entry = 196 getSymbolTableEntryBase(DRI, SymtabLoadCmd); 197 uint32_t StringIndex = Entry->StringIndex; 198 199 const char *Start = &StringTable.data()[StringIndex]; 200 Result = StringRef(Start); 201 202 return object_error::success; 203 } 204 205 error_code MachOObjectFileBase::getSymbolNMTypeChar(DataRefImpl DRI, 206 char &Result) const { 207 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(DRI); 208 uint8_t Type = Entry->Type; 209 uint16_t Flags = Entry->Flags; 210 211 char Char; 212 switch (Type & macho::STF_TypeMask) { 213 case macho::STT_Undefined: 214 Char = 'u'; 215 break; 216 case macho::STT_Absolute: 217 case macho::STT_Section: 218 Char = 's'; 219 break; 220 default: 221 Char = '?'; 222 break; 223 } 224 225 if (Flags & (macho::STF_External | macho::STF_PrivateExtern)) 226 Char = toupper(static_cast<unsigned char>(Char)); 227 Result = Char; 228 return object_error::success; 229 } 230 231 error_code MachOObjectFileBase::getSymbolFlags(DataRefImpl DRI, 232 uint32_t &Result) const { 233 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(DRI); 234 uint8_t MachOType = Entry->Type; 235 uint16_t MachOFlags = Entry->Flags; 236 237 // TODO: Correctly set SF_ThreadLocal 238 Result = SymbolRef::SF_None; 239 240 if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) 241 Result |= SymbolRef::SF_Undefined; 242 243 if (MachOFlags & macho::STF_StabsEntryMask) 244 Result |= SymbolRef::SF_FormatSpecific; 245 246 if (MachOType & MachO::NlistMaskExternal) { 247 Result |= SymbolRef::SF_Global; 248 if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) 249 Result |= SymbolRef::SF_Common; 250 } 251 252 if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef)) 253 Result |= SymbolRef::SF_Weak; 254 255 if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeAbsolute) 256 Result |= SymbolRef::SF_Absolute; 257 258 return object_error::success; 259 } 260 261 error_code MachOObjectFileBase::getSymbolSection(DataRefImpl Symb, 262 section_iterator &Res) const { 263 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(Symb); 264 uint8_t index = Entry->SectionIndex; 265 266 if (index == 0) 267 Res = end_sections(); 268 else 269 Res = section_iterator(SectionRef(Sections[index-1], this)); 270 271 return object_error::success; 272 } 273 274 error_code MachOObjectFileBase::getSymbolType(DataRefImpl Symb, 275 SymbolRef::Type &Res) const { 276 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(Symb); 277 uint8_t n_type = Entry->Type; 278 279 Res = SymbolRef::ST_Other; 280 281 // If this is a STAB debugging symbol, we can do nothing more. 282 if (n_type & MachO::NlistMaskStab) { 283 Res = SymbolRef::ST_Debug; 284 return object_error::success; 285 } 286 287 switch (n_type & MachO::NlistMaskType) { 288 case MachO::NListTypeUndefined : 289 Res = SymbolRef::ST_Unknown; 290 break; 291 case MachO::NListTypeSection : 292 Res = SymbolRef::ST_Function; 293 break; 294 } 295 return object_error::success; 296 } 297 298 error_code MachOObjectFileBase::getSymbolValue(DataRefImpl Symb, 299 uint64_t &Val) const { 300 report_fatal_error("getSymbolValue unimplemented in MachOObjectFileBase"); 301 } 302 303 symbol_iterator MachOObjectFileBase::begin_symbols() const { 304 // DRI.d.a = segment number; DRI.d.b = symbol index. 305 DataRefImpl DRI; 306 moveToNextSymbol(DRI); 307 return symbol_iterator(SymbolRef(DRI, this)); 308 } 309 310 symbol_iterator MachOObjectFileBase::end_symbols() const { 311 DataRefImpl DRI; 312 DRI.d.a = getHeader()->NumLoadCommands; 313 return symbol_iterator(SymbolRef(DRI, this)); 314 } 315 316 symbol_iterator MachOObjectFileBase::begin_dynamic_symbols() const { 317 // TODO: implement 318 report_fatal_error("Dynamic symbols unimplemented in MachOObjectFileBase"); 319 } 320 321 symbol_iterator MachOObjectFileBase::end_dynamic_symbols() const { 322 // TODO: implement 323 report_fatal_error("Dynamic symbols unimplemented in MachOObjectFileBase"); 324 } 325 326 library_iterator MachOObjectFileBase::begin_libraries_needed() const { 327 // TODO: implement 328 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 329 } 330 331 library_iterator MachOObjectFileBase::end_libraries_needed() const { 332 // TODO: implement 333 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 334 } 335 336 StringRef MachOObjectFileBase::getLoadName() const { 337 // TODO: Implement 338 report_fatal_error("get_load_name() unimplemented in MachOObjectFileBase"); 339 } 340 341 /*===-- Sections ----------------------------------------------------------===*/ 342 343 std::size_t MachOObjectFileBase::getSectionIndex(DataRefImpl Sec) const { 344 SectionList::const_iterator loc = 345 std::find(Sections.begin(), Sections.end(), Sec); 346 assert(loc != Sections.end() && "Sec is not a valid section!"); 347 return std::distance(Sections.begin(), loc); 348 } 349 350 const MachOObjectFileBase::SectionBase* 351 MachOObjectFileBase::getSectionBase(DataRefImpl DRI) const { 352 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 353 uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(Command); 354 355 bool Is64 = is64Bit(); 356 unsigned SegmentLoadSize = 357 Is64 ? sizeof(MachOObjectFile64Le::SegmentLoadCommand) : 358 sizeof(MachOObjectFile32Le::SegmentLoadCommand); 359 unsigned SectionSize = Is64 ? sizeof(MachOObjectFile64Le::Section) : 360 sizeof(MachOObjectFile32Le::Section); 361 362 uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + DRI.d.b * SectionSize; 363 return reinterpret_cast<const SectionBase*>(SectionAddr); 364 } 365 366 static StringRef parseSegmentOrSectionName(const char *P) { 367 if (P[15] == 0) 368 // Null terminated. 369 return P; 370 // Not null terminated, so this is a 16 char string. 371 return StringRef(P, 16); 372 } 373 374 ArrayRef<char> MachOObjectFileBase::getSectionRawName(DataRefImpl DRI) const { 375 const SectionBase *Base = getSectionBase(DRI); 376 return ArrayRef<char>(Base->Name); 377 } 378 379 error_code MachOObjectFileBase::getSectionName(DataRefImpl DRI, 380 StringRef &Result) const { 381 ArrayRef<char> Raw = getSectionRawName(DRI); 382 Result = parseSegmentOrSectionName(Raw.data()); 383 return object_error::success; 384 } 385 386 ArrayRef<char> 387 MachOObjectFileBase::getSectionRawFinalSegmentName(DataRefImpl Sec) const { 388 const SectionBase *Base = getSectionBase(Sec); 389 return ArrayRef<char>(Base->SegmentName); 390 } 391 392 StringRef 393 MachOObjectFileBase::getSectionFinalSegmentName(DataRefImpl DRI) const { 394 ArrayRef<char> Raw = getSectionRawFinalSegmentName(DRI); 395 return parseSegmentOrSectionName(Raw.data()); 396 } 397 398 error_code MachOObjectFileBase::isSectionData(DataRefImpl DRI, 399 bool &Result) const { 400 // FIXME: Unimplemented. 401 Result = false; 402 return object_error::success; 403 } 404 405 error_code MachOObjectFileBase::isSectionBSS(DataRefImpl DRI, 406 bool &Result) const { 407 // FIXME: Unimplemented. 408 Result = false; 409 return object_error::success; 410 } 411 412 error_code 413 MachOObjectFileBase::isSectionRequiredForExecution(DataRefImpl Sec, 414 bool &Result) const { 415 // FIXME: Unimplemented. 416 Result = true; 417 return object_error::success; 418 } 419 420 error_code MachOObjectFileBase::isSectionVirtual(DataRefImpl Sec, 421 bool &Result) const { 422 // FIXME: Unimplemented. 423 Result = false; 424 return object_error::success; 425 } 426 427 error_code MachOObjectFileBase::isSectionReadOnlyData(DataRefImpl Sec, 428 bool &Result) const { 429 // Consider using the code from isSectionText to look for __const sections. 430 // Alternately, emit S_ATTR_PURE_INSTRUCTIONS and/or S_ATTR_SOME_INSTRUCTIONS 431 // to use section attributes to distinguish code from data. 432 433 // FIXME: Unimplemented. 434 Result = false; 435 return object_error::success; 436 } 437 438 relocation_iterator MachOObjectFileBase::getSectionRelBegin(DataRefImpl Sec) const { 439 DataRefImpl ret; 440 ret.d.b = getSectionIndex(Sec); 441 return relocation_iterator(RelocationRef(ret, this)); 442 } 443 444 section_iterator MachOObjectFileBase::end_sections() const { 445 DataRefImpl DRI; 446 DRI.d.a = getHeader()->NumLoadCommands; 447 return section_iterator(SectionRef(DRI, this)); 448 } 449 450 /*===-- Relocations -------------------------------------------------------===*/ 451 452 error_code MachOObjectFileBase::getRelocationNext(DataRefImpl Rel, 453 RelocationRef &Res) const { 454 ++Rel.d.a; 455 Res = RelocationRef(Rel, this); 456 return object_error::success; 457 } 458 459 // Helper to advance a section or symbol iterator multiple increments at a time. 460 template<class T> 461 error_code advance(T &it, size_t Val) { 462 error_code ec; 463 while (Val--) { 464 it.increment(ec); 465 } 466 return ec; 467 } 468 469 template<class T> 470 void advanceTo(T &it, size_t Val) { 471 if (error_code ec = advance(it, Val)) 472 report_fatal_error(ec.message()); 473 } 474 475 void 476 MachOObjectFileBase::printRelocationTargetName(const RelocationEntry *RE, 477 raw_string_ostream &fmt) const { 478 // Target of a scattered relocation is an address. In the interest of 479 // generating pretty output, scan through the symbol table looking for a 480 // symbol that aligns with that address. If we find one, print it. 481 // Otherwise, we just print the hex address of the target. 482 if (isScattered(RE)) { 483 uint32_t Val = RE->SymbolNum; 484 485 error_code ec; 486 for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE; 487 SI.increment(ec)) { 488 if (ec) report_fatal_error(ec.message()); 489 490 uint64_t Addr; 491 StringRef Name; 492 493 if ((ec = SI->getAddress(Addr))) 494 report_fatal_error(ec.message()); 495 if (Addr != Val) continue; 496 if ((ec = SI->getName(Name))) 497 report_fatal_error(ec.message()); 498 fmt << Name; 499 return; 500 } 501 502 // If we couldn't find a symbol that this relocation refers to, try 503 // to find a section beginning instead. 504 for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE; 505 SI.increment(ec)) { 506 if (ec) report_fatal_error(ec.message()); 507 508 uint64_t Addr; 509 StringRef Name; 510 511 if ((ec = SI->getAddress(Addr))) 512 report_fatal_error(ec.message()); 513 if (Addr != Val) continue; 514 if ((ec = SI->getName(Name))) 515 report_fatal_error(ec.message()); 516 fmt << Name; 517 return; 518 } 519 520 fmt << format("0x%x", Val); 521 return; 522 } 523 524 StringRef S; 525 bool isExtern = RE->getExternal(); 526 uint32_t Val = RE->Address; 527 528 if (isExtern) { 529 symbol_iterator SI = begin_symbols(); 530 advanceTo(SI, Val); 531 SI->getName(S); 532 } else { 533 section_iterator SI = begin_sections(); 534 advanceTo(SI, Val); 535 SI->getName(S); 536 } 537 538 fmt << S; 539 } 540 541 error_code MachOObjectFileBase::getLibraryNext(DataRefImpl LibData, 542 LibraryRef &Res) const { 543 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 544 } 545 546 error_code MachOObjectFileBase::getLibraryPath(DataRefImpl LibData, 547 StringRef &Res) const { 548 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 549 } 550 551 error_code MachOObjectFileBase::getRelocationAdditionalInfo(DataRefImpl Rel, 552 int64_t &Res) const { 553 Res = 0; 554 return object_error::success; 555 } 556 557 558 /*===-- Miscellaneous -----------------------------------------------------===*/ 559 560 uint8_t MachOObjectFileBase::getBytesInAddress() const { 561 return is64Bit() ? 8 : 4; 562 } 563 564 StringRef MachOObjectFileBase::getFileFormatName() const { 565 if (!is64Bit()) { 566 switch (getHeader()->CPUType) { 567 case llvm::MachO::CPUTypeI386: 568 return "Mach-O 32-bit i386"; 569 case llvm::MachO::CPUTypeARM: 570 return "Mach-O arm"; 571 case llvm::MachO::CPUTypePowerPC: 572 return "Mach-O 32-bit ppc"; 573 default: 574 assert((getHeader()->CPUType & llvm::MachO::CPUArchABI64) == 0 && 575 "64-bit object file when we're not 64-bit?"); 576 return "Mach-O 32-bit unknown"; 577 } 578 } 579 580 // Make sure the cpu type has the correct mask. 581 assert((getHeader()->CPUType & llvm::MachO::CPUArchABI64) 582 == llvm::MachO::CPUArchABI64 && 583 "32-bit object file when we're 64-bit?"); 584 585 switch (getHeader()->CPUType) { 586 case llvm::MachO::CPUTypeX86_64: 587 return "Mach-O 64-bit x86-64"; 588 case llvm::MachO::CPUTypePowerPC64: 589 return "Mach-O 64-bit ppc64"; 590 default: 591 return "Mach-O 64-bit unknown"; 592 } 593 } 594 595 unsigned MachOObjectFileBase::getArch() const { 596 switch (getHeader()->CPUType) { 597 case llvm::MachO::CPUTypeI386: 598 return Triple::x86; 599 case llvm::MachO::CPUTypeX86_64: 600 return Triple::x86_64; 601 case llvm::MachO::CPUTypeARM: 602 return Triple::arm; 603 case llvm::MachO::CPUTypePowerPC: 604 return Triple::ppc; 605 case llvm::MachO::CPUTypePowerPC64: 606 return Triple::ppc64; 607 default: 608 return Triple::UnknownArch; 609 } 610 } 611 612 } // end namespace object 613 } // end namespace llvm 614