125d7b4fbSAlexey Lapshin //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===// 225d7b4fbSAlexey Lapshin // 325d7b4fbSAlexey Lapshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 425d7b4fbSAlexey Lapshin // See https://llvm.org/LICENSE.txt for license information. 525d7b4fbSAlexey Lapshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 625d7b4fbSAlexey Lapshin // 725d7b4fbSAlexey Lapshin //===----------------------------------------------------------------------===// 825d7b4fbSAlexey Lapshin 925d7b4fbSAlexey Lapshin #include "MachOObject.h" 1025d7b4fbSAlexey Lapshin #include "llvm/ADT/SmallPtrSet.h" 11ec41462dSAbhina Sree #include "llvm/Support/SystemZ/zOSSupport.h" 1225d7b4fbSAlexey Lapshin #include <unordered_set> 1325d7b4fbSAlexey Lapshin 1425d7b4fbSAlexey Lapshin using namespace llvm; 1525d7b4fbSAlexey Lapshin using namespace llvm::objcopy::macho; 1625d7b4fbSAlexey Lapshin 17ccde601fSAngelo Matni Section::Section(StringRef SegName, StringRef SectName) 1825915c6aSAlexander Shaposhnikov : Segname(SegName), Sectname(SectName), 19ccde601fSAngelo Matni CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {} 20ccde601fSAngelo Matni 21ccde601fSAngelo Matni Section::Section(StringRef SegName, StringRef SectName, StringRef Content) 2225915c6aSAlexander Shaposhnikov : Segname(SegName), Sectname(SectName), 23ccde601fSAngelo Matni CanonicalName((Twine(SegName) + Twine(',') + SectName).str()), 24ccde601fSAngelo Matni Content(Content) {} 25ccde601fSAngelo Matni 2625d7b4fbSAlexey Lapshin const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const { 2725d7b4fbSAlexey Lapshin assert(Index < Symbols.size() && "invalid symbol index"); 2825d7b4fbSAlexey Lapshin return Symbols[Index].get(); 2925d7b4fbSAlexey Lapshin } 3025d7b4fbSAlexey Lapshin 3125d7b4fbSAlexey Lapshin SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) { 3225d7b4fbSAlexey Lapshin return const_cast<SymbolEntry *>( 3325d7b4fbSAlexey Lapshin static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index)); 3425d7b4fbSAlexey Lapshin } 3525d7b4fbSAlexey Lapshin 36334a5766SRichard Dzenis void SymbolTable::updateSymbols(function_ref<void(SymbolEntry &)> Callable) { 37334a5766SRichard Dzenis for (auto &Sym : Symbols) 38334a5766SRichard Dzenis Callable(*Sym); 39334a5766SRichard Dzenis 40334a5766SRichard Dzenis // Partition symbols: local < defined external < undefined external. 41334a5766SRichard Dzenis auto ExternalBegin = std::stable_partition( 42334a5766SRichard Dzenis std::begin(Symbols), std::end(Symbols), 43334a5766SRichard Dzenis [](const auto &Sym) { return Sym->isLocalSymbol(); }); 44334a5766SRichard Dzenis std::stable_partition(ExternalBegin, std::end(Symbols), [](const auto &Sym) { 45334a5766SRichard Dzenis return !Sym->isUndefinedSymbol(); 46334a5766SRichard Dzenis }); 47334a5766SRichard Dzenis } 48334a5766SRichard Dzenis 4925d7b4fbSAlexey Lapshin void SymbolTable::removeSymbols( 5025d7b4fbSAlexey Lapshin function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) { 5125d7b4fbSAlexey Lapshin llvm::erase_if(Symbols, ToRemove); 5225d7b4fbSAlexey Lapshin } 5325d7b4fbSAlexey Lapshin 5425d7b4fbSAlexey Lapshin void Object::updateLoadCommandIndexes() { 5525d7b4fbSAlexey Lapshin static constexpr char TextSegmentName[] = "__TEXT"; 5625d7b4fbSAlexey Lapshin // Update indices of special load commands 5725d7b4fbSAlexey Lapshin for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) { 5825d7b4fbSAlexey Lapshin LoadCommand &LC = LoadCommands[Index]; 5925d7b4fbSAlexey Lapshin switch (LC.MachOLoadCommand.load_command_data.cmd) { 6025d7b4fbSAlexey Lapshin case MachO::LC_CODE_SIGNATURE: 6125d7b4fbSAlexey Lapshin CodeSignatureCommandIndex = Index; 6225d7b4fbSAlexey Lapshin break; 6325d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT: 6425d7b4fbSAlexey Lapshin if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) == 6525d7b4fbSAlexey Lapshin TextSegmentName) 6625d7b4fbSAlexey Lapshin TextSegmentCommandIndex = Index; 6725d7b4fbSAlexey Lapshin break; 6825d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64: 6925d7b4fbSAlexey Lapshin if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) == 7025d7b4fbSAlexey Lapshin TextSegmentName) 7125d7b4fbSAlexey Lapshin TextSegmentCommandIndex = Index; 7225d7b4fbSAlexey Lapshin break; 7325d7b4fbSAlexey Lapshin case MachO::LC_SYMTAB: 7425d7b4fbSAlexey Lapshin SymTabCommandIndex = Index; 7525d7b4fbSAlexey Lapshin break; 7625d7b4fbSAlexey Lapshin case MachO::LC_DYSYMTAB: 7725d7b4fbSAlexey Lapshin DySymTabCommandIndex = Index; 7825d7b4fbSAlexey Lapshin break; 7925d7b4fbSAlexey Lapshin case MachO::LC_DYLD_INFO: 8025d7b4fbSAlexey Lapshin case MachO::LC_DYLD_INFO_ONLY: 8125d7b4fbSAlexey Lapshin DyLdInfoCommandIndex = Index; 8225d7b4fbSAlexey Lapshin break; 8325d7b4fbSAlexey Lapshin case MachO::LC_DATA_IN_CODE: 8425d7b4fbSAlexey Lapshin DataInCodeCommandIndex = Index; 8525d7b4fbSAlexey Lapshin break; 8625d7b4fbSAlexey Lapshin case MachO::LC_LINKER_OPTIMIZATION_HINT: 8725d7b4fbSAlexey Lapshin LinkerOptimizationHintCommandIndex = Index; 8825d7b4fbSAlexey Lapshin break; 8925d7b4fbSAlexey Lapshin case MachO::LC_FUNCTION_STARTS: 9025d7b4fbSAlexey Lapshin FunctionStartsCommandIndex = Index; 9125d7b4fbSAlexey Lapshin break; 92c2d20947SKeith Smiley case MachO::LC_DYLIB_CODE_SIGN_DRS: 93c2d20947SKeith Smiley DylibCodeSignDRsIndex = Index; 94c2d20947SKeith Smiley break; 9525d7b4fbSAlexey Lapshin case MachO::LC_DYLD_CHAINED_FIXUPS: 9625d7b4fbSAlexey Lapshin ChainedFixupsCommandIndex = Index; 9725d7b4fbSAlexey Lapshin break; 9825d7b4fbSAlexey Lapshin case MachO::LC_DYLD_EXPORTS_TRIE: 9925d7b4fbSAlexey Lapshin ExportsTrieCommandIndex = Index; 10025d7b4fbSAlexey Lapshin break; 101*1a830aa1SDaniel Rodríguez Troitiño case MachO::LC_ENCRYPTION_INFO: 102*1a830aa1SDaniel Rodríguez Troitiño case MachO::LC_ENCRYPTION_INFO_64: 103*1a830aa1SDaniel Rodríguez Troitiño EncryptionInfoCommandIndex = Index; 104*1a830aa1SDaniel Rodríguez Troitiño break; 10525d7b4fbSAlexey Lapshin } 10625d7b4fbSAlexey Lapshin } 10725d7b4fbSAlexey Lapshin } 10825d7b4fbSAlexey Lapshin 10925d7b4fbSAlexey Lapshin Error Object::removeLoadCommands( 11025d7b4fbSAlexey Lapshin function_ref<bool(const LoadCommand &)> ToRemove) { 11125d7b4fbSAlexey Lapshin auto It = std::stable_partition( 11225d7b4fbSAlexey Lapshin LoadCommands.begin(), LoadCommands.end(), 11325d7b4fbSAlexey Lapshin [&](const LoadCommand &LC) { return !ToRemove(LC); }); 11425d7b4fbSAlexey Lapshin LoadCommands.erase(It, LoadCommands.end()); 11525d7b4fbSAlexey Lapshin 11625d7b4fbSAlexey Lapshin updateLoadCommandIndexes(); 11725d7b4fbSAlexey Lapshin return Error::success(); 11825d7b4fbSAlexey Lapshin } 11925d7b4fbSAlexey Lapshin 12025d7b4fbSAlexey Lapshin Error Object::removeSections( 12125d7b4fbSAlexey Lapshin function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) { 12225d7b4fbSAlexey Lapshin DenseMap<uint32_t, const Section *> OldIndexToSection; 12325d7b4fbSAlexey Lapshin uint32_t NextSectionIndex = 1; 12425d7b4fbSAlexey Lapshin for (LoadCommand &LC : LoadCommands) { 12525d7b4fbSAlexey Lapshin auto It = std::stable_partition( 12625d7b4fbSAlexey Lapshin std::begin(LC.Sections), std::end(LC.Sections), 12725d7b4fbSAlexey Lapshin [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); }); 12825d7b4fbSAlexey Lapshin for (auto I = LC.Sections.begin(), End = It; I != End; ++I) { 12925d7b4fbSAlexey Lapshin OldIndexToSection[(*I)->Index] = I->get(); 13025d7b4fbSAlexey Lapshin (*I)->Index = NextSectionIndex++; 13125d7b4fbSAlexey Lapshin } 13225d7b4fbSAlexey Lapshin LC.Sections.erase(It, LC.Sections.end()); 13325d7b4fbSAlexey Lapshin } 13425d7b4fbSAlexey Lapshin 13525d7b4fbSAlexey Lapshin auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool { 136ec941432SFangrui Song std::optional<uint32_t> Section = S->section(); 13725d7b4fbSAlexey Lapshin return (Section && !OldIndexToSection.count(*Section)); 13825d7b4fbSAlexey Lapshin }; 13925d7b4fbSAlexey Lapshin 14025d7b4fbSAlexey Lapshin SmallPtrSet<const SymbolEntry *, 2> DeadSymbols; 14125d7b4fbSAlexey Lapshin for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols) 14225d7b4fbSAlexey Lapshin if (IsDead(Sym)) 14325d7b4fbSAlexey Lapshin DeadSymbols.insert(Sym.get()); 14425d7b4fbSAlexey Lapshin 14525d7b4fbSAlexey Lapshin for (const LoadCommand &LC : LoadCommands) 14625d7b4fbSAlexey Lapshin for (const std::unique_ptr<Section> &Sec : LC.Sections) 14725d7b4fbSAlexey Lapshin for (const RelocationInfo &R : Sec->Relocations) 14825d7b4fbSAlexey Lapshin if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol)) 14925d7b4fbSAlexey Lapshin return createStringError(std::errc::invalid_argument, 15025d7b4fbSAlexey Lapshin "symbol '%s' defined in section with index " 15125d7b4fbSAlexey Lapshin "'%u' cannot be removed because it is " 15225d7b4fbSAlexey Lapshin "referenced by a relocation in section '%s'", 15325d7b4fbSAlexey Lapshin (*R.Symbol)->Name.c_str(), 15425d7b4fbSAlexey Lapshin *((*R.Symbol)->section()), 15525d7b4fbSAlexey Lapshin Sec->CanonicalName.c_str()); 15625d7b4fbSAlexey Lapshin SymTable.removeSymbols(IsDead); 15725d7b4fbSAlexey Lapshin for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols) 15825d7b4fbSAlexey Lapshin if (S->section()) 15925d7b4fbSAlexey Lapshin S->n_sect = OldIndexToSection[S->n_sect]->Index; 16025d7b4fbSAlexey Lapshin return Error::success(); 16125d7b4fbSAlexey Lapshin } 16225d7b4fbSAlexey Lapshin 16325d7b4fbSAlexey Lapshin uint64_t Object::nextAvailableSegmentAddress() const { 16425d7b4fbSAlexey Lapshin uint64_t HeaderSize = 16525d7b4fbSAlexey Lapshin is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); 16625d7b4fbSAlexey Lapshin uint64_t Addr = HeaderSize + Header.SizeOfCmds; 16725d7b4fbSAlexey Lapshin for (const LoadCommand &LC : LoadCommands) { 16825d7b4fbSAlexey Lapshin const MachO::macho_load_command &MLC = LC.MachOLoadCommand; 16925d7b4fbSAlexey Lapshin switch (MLC.load_command_data.cmd) { 17025d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT: 17125d7b4fbSAlexey Lapshin Addr = std::max(Addr, 17225d7b4fbSAlexey Lapshin static_cast<uint64_t>(MLC.segment_command_data.vmaddr) + 17325d7b4fbSAlexey Lapshin MLC.segment_command_data.vmsize); 17425d7b4fbSAlexey Lapshin break; 17525d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64: 17625d7b4fbSAlexey Lapshin Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr + 17725d7b4fbSAlexey Lapshin MLC.segment_command_64_data.vmsize); 17825d7b4fbSAlexey Lapshin break; 17925d7b4fbSAlexey Lapshin default: 18025d7b4fbSAlexey Lapshin continue; 18125d7b4fbSAlexey Lapshin } 18225d7b4fbSAlexey Lapshin } 18325d7b4fbSAlexey Lapshin return Addr; 18425d7b4fbSAlexey Lapshin } 18525d7b4fbSAlexey Lapshin 18625d7b4fbSAlexey Lapshin template <typename SegmentType> 18725d7b4fbSAlexey Lapshin static void 18825d7b4fbSAlexey Lapshin constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, 18925d7b4fbSAlexey Lapshin StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) { 19025d7b4fbSAlexey Lapshin assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name"); 19125d7b4fbSAlexey Lapshin memset(&Seg, 0, sizeof(SegmentType)); 19225d7b4fbSAlexey Lapshin Seg.cmd = CmdType; 19325d7b4fbSAlexey Lapshin strncpy(Seg.segname, SegName.data(), SegName.size()); 19425d7b4fbSAlexey Lapshin Seg.maxprot |= 19525d7b4fbSAlexey Lapshin (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 19625d7b4fbSAlexey Lapshin Seg.initprot |= 19725d7b4fbSAlexey Lapshin (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); 19825d7b4fbSAlexey Lapshin Seg.vmaddr = SegVMAddr; 19925d7b4fbSAlexey Lapshin Seg.vmsize = SegVMSize; 20025d7b4fbSAlexey Lapshin } 20125d7b4fbSAlexey Lapshin 20225d7b4fbSAlexey Lapshin LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) { 20325d7b4fbSAlexey Lapshin LoadCommand LC; 20425d7b4fbSAlexey Lapshin const uint64_t SegVMAddr = nextAvailableSegmentAddress(); 20525d7b4fbSAlexey Lapshin if (is64Bit()) 20625d7b4fbSAlexey Lapshin constructSegment(LC.MachOLoadCommand.segment_command_64_data, 20725d7b4fbSAlexey Lapshin MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize); 20825d7b4fbSAlexey Lapshin else 20925d7b4fbSAlexey Lapshin constructSegment(LC.MachOLoadCommand.segment_command_data, 21025d7b4fbSAlexey Lapshin MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize); 21125d7b4fbSAlexey Lapshin 21225d7b4fbSAlexey Lapshin LoadCommands.push_back(std::move(LC)); 21325d7b4fbSAlexey Lapshin return LoadCommands.back(); 21425d7b4fbSAlexey Lapshin } 21525d7b4fbSAlexey Lapshin 21625d7b4fbSAlexey Lapshin /// Extracts a segment name from a string which is possibly non-null-terminated. 21725d7b4fbSAlexey Lapshin static StringRef extractSegmentName(const char *SegName) { 21825d7b4fbSAlexey Lapshin return StringRef(SegName, 21925d7b4fbSAlexey Lapshin strnlen(SegName, sizeof(MachO::segment_command::segname))); 22025d7b4fbSAlexey Lapshin } 22125d7b4fbSAlexey Lapshin 222ec941432SFangrui Song std::optional<StringRef> LoadCommand::getSegmentName() const { 22325d7b4fbSAlexey Lapshin const MachO::macho_load_command &MLC = MachOLoadCommand; 22425d7b4fbSAlexey Lapshin switch (MLC.load_command_data.cmd) { 22525d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT: 22625d7b4fbSAlexey Lapshin return extractSegmentName(MLC.segment_command_data.segname); 22725d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64: 22825d7b4fbSAlexey Lapshin return extractSegmentName(MLC.segment_command_64_data.segname); 22925d7b4fbSAlexey Lapshin default: 230aadaafacSKazu Hirata return std::nullopt; 23125d7b4fbSAlexey Lapshin } 23225d7b4fbSAlexey Lapshin } 23325d7b4fbSAlexey Lapshin 234ec941432SFangrui Song std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const { 23525d7b4fbSAlexey Lapshin const MachO::macho_load_command &MLC = MachOLoadCommand; 23625d7b4fbSAlexey Lapshin switch (MLC.load_command_data.cmd) { 23725d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT: 23825d7b4fbSAlexey Lapshin return MLC.segment_command_data.vmaddr; 23925d7b4fbSAlexey Lapshin case MachO::LC_SEGMENT_64: 24025d7b4fbSAlexey Lapshin return MLC.segment_command_64_data.vmaddr; 24125d7b4fbSAlexey Lapshin default: 242aadaafacSKazu Hirata return std::nullopt; 24325d7b4fbSAlexey Lapshin } 24425d7b4fbSAlexey Lapshin } 245