1 //===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the MachOObjectFile class, which binds the MachOObject 11 // class to the generic ObjectFile wrapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Object/MachO.h" 16 #include "llvm/ADT/Triple.h" 17 #include "llvm/Object/MachOFormat.h" 18 #include "llvm/Support/Casting.h" 19 #include "llvm/Support/DataExtractor.h" 20 #include "llvm/Support/Format.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include <cctype> 23 #include <cstring> 24 #include <limits> 25 26 using namespace llvm; 27 using namespace object; 28 29 namespace llvm { 30 namespace object { 31 32 MachOObjectFileBase::MachOObjectFileBase(MemoryBuffer *Object, bool Is64bits, 33 error_code &ec) 34 : ObjectFile(getMachOType(true, Is64bits), Object) { 35 } 36 37 bool MachOObjectFileBase::is64Bit() const { 38 return isa<MachOObjectFile<true> >(this); 39 } 40 41 const MachOObjectFileBase::LoadCommand * 42 MachOObjectFileBase::getLoadCommandInfo(unsigned Index) const { 43 uint64_t Offset; 44 uint64_t NewOffset = getHeaderSize(); 45 const LoadCommand *Load; 46 unsigned I = 0; 47 do { 48 Offset = NewOffset; 49 StringRef Data = getData(Offset, sizeof(LoadCommand)); 50 Load = reinterpret_cast<const LoadCommand*>(Data.data()); 51 NewOffset = Offset + Load->Size; 52 ++I; 53 } while (I != Index + 1); 54 55 return Load; 56 } 57 58 void MachOObjectFileBase::ReadULEB128s(uint64_t Index, 59 SmallVectorImpl<uint64_t> &Out) const { 60 DataExtractor extractor(ObjectFile::getData(), true, 0); 61 62 uint32_t offset = Index; 63 uint64_t data = 0; 64 while (uint64_t delta = extractor.getULEB128(&offset)) { 65 data += delta; 66 Out.push_back(data); 67 } 68 } 69 70 const MachOObjectFileBase::Header *MachOObjectFileBase::getHeader() const { 71 StringRef Data = getData(0, sizeof(Header)); 72 return reinterpret_cast<const Header*>(Data.data()); 73 } 74 75 unsigned MachOObjectFileBase::getHeaderSize() const { 76 return is64Bit() ? macho::Header64Size : macho::Header32Size; 77 } 78 79 StringRef MachOObjectFileBase::getData(size_t Offset, size_t Size) const { 80 return ObjectFile::getData().substr(Offset, Size); 81 } 82 83 ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { 84 StringRef Magic = Buffer->getBuffer().slice(0, 4); 85 error_code ec; 86 bool Is64Bits = Magic == "\xFE\xED\xFA\xCF" || Magic == "\xCF\xFA\xED\xFE"; 87 ObjectFile *Ret; 88 if (Is64Bits) 89 Ret = new MachOObjectFile<true>(Buffer, ec); 90 else 91 Ret = new MachOObjectFile<false>(Buffer, ec); 92 if (ec) 93 return NULL; 94 return Ret; 95 } 96 97 /*===-- Symbols -----------------------------------------------------------===*/ 98 99 void MachOObjectFileBase::moveToNextSymbol(DataRefImpl &DRI) const { 100 uint32_t LoadCommandCount = getHeader()->NumLoadCommands; 101 while (DRI.d.a < LoadCommandCount) { 102 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 103 if (Command->Type == macho::LCT_Symtab) { 104 const SymtabLoadCommand *SymtabLoadCmd = 105 reinterpret_cast<const SymtabLoadCommand*>(Command); 106 if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries) 107 return; 108 } 109 110 DRI.d.a++; 111 DRI.d.b = 0; 112 } 113 } 114 115 const MachOObjectFileBase::SymbolTableEntryBase * 116 MachOObjectFileBase::getSymbolTableEntryBase(DataRefImpl DRI) const { 117 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 118 const SymtabLoadCommand *SymtabLoadCmd = 119 reinterpret_cast<const SymtabLoadCommand*>(Command); 120 return getSymbolTableEntryBase(DRI, SymtabLoadCmd); 121 } 122 123 const MachOObjectFileBase::SymbolTableEntryBase * 124 MachOObjectFileBase::getSymbolTableEntryBase(DataRefImpl DRI, 125 const SymtabLoadCommand *SymtabLoadCmd) const { 126 uint64_t SymbolTableOffset = SymtabLoadCmd->SymbolTableOffset; 127 unsigned Index = DRI.d.b; 128 129 unsigned SymbolTableEntrySize = is64Bit() ? 130 sizeof(MachOFormat::SymbolTableEntry<true>) : 131 sizeof(MachOFormat::SymbolTableEntry<false>); 132 133 uint64_t Offset = SymbolTableOffset + Index * SymbolTableEntrySize; 134 StringRef Data = getData(Offset, SymbolTableEntrySize); 135 return reinterpret_cast<const SymbolTableEntryBase*>(Data.data()); 136 } 137 138 error_code MachOObjectFileBase::getSymbolNext(DataRefImpl DRI, 139 SymbolRef &Result) const { 140 DRI.d.b++; 141 moveToNextSymbol(DRI); 142 Result = SymbolRef(DRI, this); 143 return object_error::success; 144 } 145 146 error_code MachOObjectFileBase::getSymbolName(DataRefImpl DRI, 147 StringRef &Result) const { 148 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 149 const SymtabLoadCommand *SymtabLoadCmd = 150 reinterpret_cast<const SymtabLoadCommand*>(Command); 151 152 StringRef StringTable = getData(SymtabLoadCmd->StringTableOffset, 153 SymtabLoadCmd->StringTableSize); 154 155 const SymbolTableEntryBase *Entry = 156 getSymbolTableEntryBase(DRI, SymtabLoadCmd); 157 uint32_t StringIndex = Entry->StringIndex; 158 159 const char *Start = &StringTable.data()[StringIndex]; 160 Result = StringRef(Start); 161 162 return object_error::success; 163 } 164 165 error_code MachOObjectFileBase::getSymbolNMTypeChar(DataRefImpl DRI, 166 char &Result) const { 167 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(DRI); 168 uint8_t Type = Entry->Type; 169 uint16_t Flags = Entry->Flags; 170 171 char Char; 172 switch (Type & macho::STF_TypeMask) { 173 case macho::STT_Undefined: 174 Char = 'u'; 175 break; 176 case macho::STT_Absolute: 177 case macho::STT_Section: 178 Char = 's'; 179 break; 180 default: 181 Char = '?'; 182 break; 183 } 184 185 if (Flags & (macho::STF_External | macho::STF_PrivateExtern)) 186 Char = toupper(static_cast<unsigned char>(Char)); 187 Result = Char; 188 return object_error::success; 189 } 190 191 error_code MachOObjectFileBase::getSymbolFlags(DataRefImpl DRI, 192 uint32_t &Result) const { 193 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(DRI); 194 uint8_t MachOType = Entry->Type; 195 uint16_t MachOFlags = Entry->Flags; 196 197 // TODO: Correctly set SF_ThreadLocal 198 Result = SymbolRef::SF_None; 199 200 if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) 201 Result |= SymbolRef::SF_Undefined; 202 203 if (MachOFlags & macho::STF_StabsEntryMask) 204 Result |= SymbolRef::SF_FormatSpecific; 205 206 if (MachOType & MachO::NlistMaskExternal) { 207 Result |= SymbolRef::SF_Global; 208 if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) 209 Result |= SymbolRef::SF_Common; 210 } 211 212 if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef)) 213 Result |= SymbolRef::SF_Weak; 214 215 if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeAbsolute) 216 Result |= SymbolRef::SF_Absolute; 217 218 return object_error::success; 219 } 220 221 error_code MachOObjectFileBase::getSymbolSection(DataRefImpl Symb, 222 section_iterator &Res) const { 223 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(Symb); 224 uint8_t index = Entry->SectionIndex; 225 226 if (index == 0) 227 Res = end_sections(); 228 else 229 Res = section_iterator(SectionRef(Sections[index-1], this)); 230 231 return object_error::success; 232 } 233 234 error_code MachOObjectFileBase::getSymbolType(DataRefImpl Symb, 235 SymbolRef::Type &Res) const { 236 const SymbolTableEntryBase *Entry = getSymbolTableEntryBase(Symb); 237 uint8_t n_type = Entry->Type; 238 239 Res = SymbolRef::ST_Other; 240 241 // If this is a STAB debugging symbol, we can do nothing more. 242 if (n_type & MachO::NlistMaskStab) { 243 Res = SymbolRef::ST_Debug; 244 return object_error::success; 245 } 246 247 switch (n_type & MachO::NlistMaskType) { 248 case MachO::NListTypeUndefined : 249 Res = SymbolRef::ST_Unknown; 250 break; 251 case MachO::NListTypeSection : 252 Res = SymbolRef::ST_Function; 253 break; 254 } 255 return object_error::success; 256 } 257 258 error_code MachOObjectFileBase::getSymbolValue(DataRefImpl Symb, 259 uint64_t &Val) const { 260 report_fatal_error("getSymbolValue unimplemented in MachOObjectFileBase"); 261 } 262 263 symbol_iterator MachOObjectFileBase::begin_symbols() const { 264 // DRI.d.a = segment number; DRI.d.b = symbol index. 265 DataRefImpl DRI; 266 moveToNextSymbol(DRI); 267 return symbol_iterator(SymbolRef(DRI, this)); 268 } 269 270 symbol_iterator MachOObjectFileBase::end_symbols() const { 271 DataRefImpl DRI; 272 DRI.d.a = getHeader()->NumLoadCommands; 273 return symbol_iterator(SymbolRef(DRI, this)); 274 } 275 276 symbol_iterator MachOObjectFileBase::begin_dynamic_symbols() const { 277 // TODO: implement 278 report_fatal_error("Dynamic symbols unimplemented in MachOObjectFileBase"); 279 } 280 281 symbol_iterator MachOObjectFileBase::end_dynamic_symbols() const { 282 // TODO: implement 283 report_fatal_error("Dynamic symbols unimplemented in MachOObjectFileBase"); 284 } 285 286 library_iterator MachOObjectFileBase::begin_libraries_needed() const { 287 // TODO: implement 288 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 289 } 290 291 library_iterator MachOObjectFileBase::end_libraries_needed() const { 292 // TODO: implement 293 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 294 } 295 296 StringRef MachOObjectFileBase::getLoadName() const { 297 // TODO: Implement 298 report_fatal_error("get_load_name() unimplemented in MachOObjectFileBase"); 299 } 300 301 /*===-- Sections ----------------------------------------------------------===*/ 302 303 std::size_t MachOObjectFileBase::getSectionIndex(DataRefImpl Sec) const { 304 SectionList::const_iterator loc = 305 std::find(Sections.begin(), Sections.end(), Sec); 306 assert(loc != Sections.end() && "Sec is not a valid section!"); 307 return std::distance(Sections.begin(), loc); 308 } 309 310 const MachOObjectFileBase::SectionBase* 311 MachOObjectFileBase::getSectionBase(DataRefImpl DRI) const { 312 const LoadCommand *Command = getLoadCommandInfo(DRI.d.a); 313 uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(Command); 314 315 bool Is64 = is64Bit(); 316 unsigned SegmentLoadSize = 317 Is64 ? sizeof(MachOFormat::SegmentLoadCommand<true>) : 318 sizeof(MachOFormat::SegmentLoadCommand<false>); 319 unsigned SectionSize = Is64 ? sizeof(MachOFormat::Section<true>) : 320 sizeof(MachOFormat::Section<false>); 321 322 uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + DRI.d.b * SectionSize; 323 return reinterpret_cast<const SectionBase*>(SectionAddr); 324 } 325 326 static StringRef parseSegmentOrSectionName(const char *P) { 327 if (P[15] == 0) 328 // Null terminated. 329 return P; 330 // Not null terminated, so this is a 16 char string. 331 return StringRef(P, 16); 332 } 333 334 ArrayRef<char> MachOObjectFileBase::getSectionRawName(DataRefImpl DRI) const { 335 const SectionBase *Base = getSectionBase(DRI); 336 return ArrayRef<char>(Base->Name); 337 } 338 339 error_code MachOObjectFileBase::getSectionName(DataRefImpl DRI, 340 StringRef &Result) const { 341 ArrayRef<char> Raw = getSectionRawName(DRI); 342 Result = parseSegmentOrSectionName(Raw.data()); 343 return object_error::success; 344 } 345 346 ArrayRef<char> 347 MachOObjectFileBase::getSectionRawFinalSegmentName(DataRefImpl Sec) const { 348 const SectionBase *Base = getSectionBase(Sec); 349 return ArrayRef<char>(Base->SegmentName); 350 } 351 352 StringRef 353 MachOObjectFileBase::getSectionFinalSegmentName(DataRefImpl DRI) const { 354 ArrayRef<char> Raw = getSectionRawFinalSegmentName(DRI); 355 return parseSegmentOrSectionName(Raw.data()); 356 } 357 358 error_code MachOObjectFileBase::isSectionData(DataRefImpl DRI, 359 bool &Result) const { 360 // FIXME: Unimplemented. 361 Result = false; 362 return object_error::success; 363 } 364 365 error_code MachOObjectFileBase::isSectionBSS(DataRefImpl DRI, 366 bool &Result) const { 367 // FIXME: Unimplemented. 368 Result = false; 369 return object_error::success; 370 } 371 372 error_code 373 MachOObjectFileBase::isSectionRequiredForExecution(DataRefImpl Sec, 374 bool &Result) const { 375 // FIXME: Unimplemented. 376 Result = true; 377 return object_error::success; 378 } 379 380 error_code MachOObjectFileBase::isSectionVirtual(DataRefImpl Sec, 381 bool &Result) const { 382 // FIXME: Unimplemented. 383 Result = false; 384 return object_error::success; 385 } 386 387 error_code MachOObjectFileBase::isSectionReadOnlyData(DataRefImpl Sec, 388 bool &Result) const { 389 // Consider using the code from isSectionText to look for __const sections. 390 // Alternately, emit S_ATTR_PURE_INSTRUCTIONS and/or S_ATTR_SOME_INSTRUCTIONS 391 // to use section attributes to distinguish code from data. 392 393 // FIXME: Unimplemented. 394 Result = false; 395 return object_error::success; 396 } 397 398 relocation_iterator MachOObjectFileBase::getSectionRelBegin(DataRefImpl Sec) const { 399 DataRefImpl ret; 400 ret.d.b = getSectionIndex(Sec); 401 return relocation_iterator(RelocationRef(ret, this)); 402 } 403 404 section_iterator MachOObjectFileBase::end_sections() const { 405 DataRefImpl DRI; 406 DRI.d.a = getHeader()->NumLoadCommands; 407 return section_iterator(SectionRef(DRI, this)); 408 } 409 410 /*===-- Relocations -------------------------------------------------------===*/ 411 412 error_code MachOObjectFileBase::getRelocationNext(DataRefImpl Rel, 413 RelocationRef &Res) const { 414 ++Rel.d.a; 415 Res = RelocationRef(Rel, this); 416 return object_error::success; 417 } 418 419 // Helper to advance a section or symbol iterator multiple increments at a time. 420 template<class T> 421 error_code advance(T &it, size_t Val) { 422 error_code ec; 423 while (Val--) { 424 it.increment(ec); 425 } 426 return ec; 427 } 428 429 template<class T> 430 void advanceTo(T &it, size_t Val) { 431 if (error_code ec = advance(it, Val)) 432 report_fatal_error(ec.message()); 433 } 434 435 void 436 MachOObjectFileBase::printRelocationTargetName(const RelocationEntry *RE, 437 raw_string_ostream &fmt) const { 438 unsigned Arch = getArch(); 439 bool isScattered = (Arch != Triple::x86_64) && 440 (RE->Word0 & macho::RF_Scattered); 441 442 // Target of a scattered relocation is an address. In the interest of 443 // generating pretty output, scan through the symbol table looking for a 444 // symbol that aligns with that address. If we find one, print it. 445 // Otherwise, we just print the hex address of the target. 446 if (isScattered) { 447 uint32_t Val = RE->Word1; 448 449 error_code ec; 450 for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE; 451 SI.increment(ec)) { 452 if (ec) report_fatal_error(ec.message()); 453 454 uint64_t Addr; 455 StringRef Name; 456 457 if ((ec = SI->getAddress(Addr))) 458 report_fatal_error(ec.message()); 459 if (Addr != Val) continue; 460 if ((ec = SI->getName(Name))) 461 report_fatal_error(ec.message()); 462 fmt << Name; 463 return; 464 } 465 466 // If we couldn't find a symbol that this relocation refers to, try 467 // to find a section beginning instead. 468 for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE; 469 SI.increment(ec)) { 470 if (ec) report_fatal_error(ec.message()); 471 472 uint64_t Addr; 473 StringRef Name; 474 475 if ((ec = SI->getAddress(Addr))) 476 report_fatal_error(ec.message()); 477 if (Addr != Val) continue; 478 if ((ec = SI->getName(Name))) 479 report_fatal_error(ec.message()); 480 fmt << Name; 481 return; 482 } 483 484 fmt << format("0x%x", Val); 485 return; 486 } 487 488 StringRef S; 489 bool isExtern = (RE->Word1 >> 27) & 1; 490 uint32_t Val = RE->Word1 & 0xFFFFFF; 491 492 if (isExtern) { 493 symbol_iterator SI = begin_symbols(); 494 advanceTo(SI, Val); 495 SI->getName(S); 496 } else { 497 section_iterator SI = begin_sections(); 498 advanceTo(SI, Val); 499 SI->getName(S); 500 } 501 502 fmt << S; 503 } 504 505 error_code MachOObjectFileBase::getLibraryNext(DataRefImpl LibData, 506 LibraryRef &Res) const { 507 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 508 } 509 510 error_code MachOObjectFileBase::getLibraryPath(DataRefImpl LibData, 511 StringRef &Res) const { 512 report_fatal_error("Needed libraries unimplemented in MachOObjectFileBase"); 513 } 514 515 516 /*===-- Miscellaneous -----------------------------------------------------===*/ 517 518 uint8_t MachOObjectFileBase::getBytesInAddress() const { 519 return is64Bit() ? 8 : 4; 520 } 521 522 StringRef MachOObjectFileBase::getFileFormatName() const { 523 if (!is64Bit()) { 524 switch (getHeader()->CPUType) { 525 case llvm::MachO::CPUTypeI386: 526 return "Mach-O 32-bit i386"; 527 case llvm::MachO::CPUTypeARM: 528 return "Mach-O arm"; 529 case llvm::MachO::CPUTypePowerPC: 530 return "Mach-O 32-bit ppc"; 531 default: 532 assert((getHeader()->CPUType & llvm::MachO::CPUArchABI64) == 0 && 533 "64-bit object file when we're not 64-bit?"); 534 return "Mach-O 32-bit unknown"; 535 } 536 } 537 538 // Make sure the cpu type has the correct mask. 539 assert((getHeader()->CPUType & llvm::MachO::CPUArchABI64) 540 == llvm::MachO::CPUArchABI64 && 541 "32-bit object file when we're 64-bit?"); 542 543 switch (getHeader()->CPUType) { 544 case llvm::MachO::CPUTypeX86_64: 545 return "Mach-O 64-bit x86-64"; 546 case llvm::MachO::CPUTypePowerPC64: 547 return "Mach-O 64-bit ppc64"; 548 default: 549 return "Mach-O 64-bit unknown"; 550 } 551 } 552 553 unsigned MachOObjectFileBase::getArch() const { 554 switch (getHeader()->CPUType) { 555 case llvm::MachO::CPUTypeI386: 556 return Triple::x86; 557 case llvm::MachO::CPUTypeX86_64: 558 return Triple::x86_64; 559 case llvm::MachO::CPUTypeARM: 560 return Triple::arm; 561 case llvm::MachO::CPUTypePowerPC: 562 return Triple::ppc; 563 case llvm::MachO::CPUTypePowerPC64: 564 return Triple::ppc64; 565 default: 566 return Triple::UnknownArch; 567 } 568 } 569 570 } // end namespace object 571 } // end namespace llvm 572