xref: /llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp (revision 1a830aa1fe1e88749b563fefe18382842e0cff90)
125d7b4fbSAlexey Lapshin //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
225d7b4fbSAlexey Lapshin //
325d7b4fbSAlexey Lapshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
425d7b4fbSAlexey Lapshin // See https://llvm.org/LICENSE.txt for license information.
525d7b4fbSAlexey Lapshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
625d7b4fbSAlexey Lapshin //
725d7b4fbSAlexey Lapshin //===----------------------------------------------------------------------===//
825d7b4fbSAlexey Lapshin 
925d7b4fbSAlexey Lapshin #include "MachOObject.h"
1025d7b4fbSAlexey Lapshin #include "llvm/ADT/SmallPtrSet.h"
11ec41462dSAbhina Sree #include "llvm/Support/SystemZ/zOSSupport.h"
1225d7b4fbSAlexey Lapshin #include <unordered_set>
1325d7b4fbSAlexey Lapshin 
1425d7b4fbSAlexey Lapshin using namespace llvm;
1525d7b4fbSAlexey Lapshin using namespace llvm::objcopy::macho;
1625d7b4fbSAlexey Lapshin 
17ccde601fSAngelo Matni Section::Section(StringRef SegName, StringRef SectName)
1825915c6aSAlexander Shaposhnikov     : Segname(SegName), Sectname(SectName),
19ccde601fSAngelo Matni       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
20ccde601fSAngelo Matni 
21ccde601fSAngelo Matni Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
2225915c6aSAlexander Shaposhnikov     : Segname(SegName), Sectname(SectName),
23ccde601fSAngelo Matni       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
24ccde601fSAngelo Matni       Content(Content) {}
25ccde601fSAngelo Matni 
2625d7b4fbSAlexey Lapshin const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
2725d7b4fbSAlexey Lapshin   assert(Index < Symbols.size() && "invalid symbol index");
2825d7b4fbSAlexey Lapshin   return Symbols[Index].get();
2925d7b4fbSAlexey Lapshin }
3025d7b4fbSAlexey Lapshin 
3125d7b4fbSAlexey Lapshin SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
3225d7b4fbSAlexey Lapshin   return const_cast<SymbolEntry *>(
3325d7b4fbSAlexey Lapshin       static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
3425d7b4fbSAlexey Lapshin }
3525d7b4fbSAlexey Lapshin 
36334a5766SRichard Dzenis void SymbolTable::updateSymbols(function_ref<void(SymbolEntry &)> Callable) {
37334a5766SRichard Dzenis   for (auto &Sym : Symbols)
38334a5766SRichard Dzenis     Callable(*Sym);
39334a5766SRichard Dzenis 
40334a5766SRichard Dzenis   // Partition symbols: local < defined external < undefined external.
41334a5766SRichard Dzenis   auto ExternalBegin = std::stable_partition(
42334a5766SRichard Dzenis       std::begin(Symbols), std::end(Symbols),
43334a5766SRichard Dzenis       [](const auto &Sym) { return Sym->isLocalSymbol(); });
44334a5766SRichard Dzenis   std::stable_partition(ExternalBegin, std::end(Symbols), [](const auto &Sym) {
45334a5766SRichard Dzenis     return !Sym->isUndefinedSymbol();
46334a5766SRichard Dzenis   });
47334a5766SRichard Dzenis }
48334a5766SRichard Dzenis 
4925d7b4fbSAlexey Lapshin void SymbolTable::removeSymbols(
5025d7b4fbSAlexey Lapshin     function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
5125d7b4fbSAlexey Lapshin   llvm::erase_if(Symbols, ToRemove);
5225d7b4fbSAlexey Lapshin }
5325d7b4fbSAlexey Lapshin 
5425d7b4fbSAlexey Lapshin void Object::updateLoadCommandIndexes() {
5525d7b4fbSAlexey Lapshin   static constexpr char TextSegmentName[] = "__TEXT";
5625d7b4fbSAlexey Lapshin   // Update indices of special load commands
5725d7b4fbSAlexey Lapshin   for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
5825d7b4fbSAlexey Lapshin     LoadCommand &LC = LoadCommands[Index];
5925d7b4fbSAlexey Lapshin     switch (LC.MachOLoadCommand.load_command_data.cmd) {
6025d7b4fbSAlexey Lapshin     case MachO::LC_CODE_SIGNATURE:
6125d7b4fbSAlexey Lapshin       CodeSignatureCommandIndex = Index;
6225d7b4fbSAlexey Lapshin       break;
6325d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT:
6425d7b4fbSAlexey Lapshin       if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
6525d7b4fbSAlexey Lapshin           TextSegmentName)
6625d7b4fbSAlexey Lapshin         TextSegmentCommandIndex = Index;
6725d7b4fbSAlexey Lapshin       break;
6825d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT_64:
6925d7b4fbSAlexey Lapshin       if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
7025d7b4fbSAlexey Lapshin           TextSegmentName)
7125d7b4fbSAlexey Lapshin         TextSegmentCommandIndex = Index;
7225d7b4fbSAlexey Lapshin       break;
7325d7b4fbSAlexey Lapshin     case MachO::LC_SYMTAB:
7425d7b4fbSAlexey Lapshin       SymTabCommandIndex = Index;
7525d7b4fbSAlexey Lapshin       break;
7625d7b4fbSAlexey Lapshin     case MachO::LC_DYSYMTAB:
7725d7b4fbSAlexey Lapshin       DySymTabCommandIndex = Index;
7825d7b4fbSAlexey Lapshin       break;
7925d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_INFO:
8025d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_INFO_ONLY:
8125d7b4fbSAlexey Lapshin       DyLdInfoCommandIndex = Index;
8225d7b4fbSAlexey Lapshin       break;
8325d7b4fbSAlexey Lapshin     case MachO::LC_DATA_IN_CODE:
8425d7b4fbSAlexey Lapshin       DataInCodeCommandIndex = Index;
8525d7b4fbSAlexey Lapshin       break;
8625d7b4fbSAlexey Lapshin     case MachO::LC_LINKER_OPTIMIZATION_HINT:
8725d7b4fbSAlexey Lapshin       LinkerOptimizationHintCommandIndex = Index;
8825d7b4fbSAlexey Lapshin       break;
8925d7b4fbSAlexey Lapshin     case MachO::LC_FUNCTION_STARTS:
9025d7b4fbSAlexey Lapshin       FunctionStartsCommandIndex = Index;
9125d7b4fbSAlexey Lapshin       break;
92c2d20947SKeith Smiley     case MachO::LC_DYLIB_CODE_SIGN_DRS:
93c2d20947SKeith Smiley       DylibCodeSignDRsIndex = Index;
94c2d20947SKeith Smiley       break;
9525d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_CHAINED_FIXUPS:
9625d7b4fbSAlexey Lapshin       ChainedFixupsCommandIndex = Index;
9725d7b4fbSAlexey Lapshin       break;
9825d7b4fbSAlexey Lapshin     case MachO::LC_DYLD_EXPORTS_TRIE:
9925d7b4fbSAlexey Lapshin       ExportsTrieCommandIndex = Index;
10025d7b4fbSAlexey Lapshin       break;
101*1a830aa1SDaniel Rodríguez Troitiño     case MachO::LC_ENCRYPTION_INFO:
102*1a830aa1SDaniel Rodríguez Troitiño     case MachO::LC_ENCRYPTION_INFO_64:
103*1a830aa1SDaniel Rodríguez Troitiño       EncryptionInfoCommandIndex = Index;
104*1a830aa1SDaniel Rodríguez Troitiño       break;
10525d7b4fbSAlexey Lapshin     }
10625d7b4fbSAlexey Lapshin   }
10725d7b4fbSAlexey Lapshin }
10825d7b4fbSAlexey Lapshin 
10925d7b4fbSAlexey Lapshin Error Object::removeLoadCommands(
11025d7b4fbSAlexey Lapshin     function_ref<bool(const LoadCommand &)> ToRemove) {
11125d7b4fbSAlexey Lapshin   auto It = std::stable_partition(
11225d7b4fbSAlexey Lapshin       LoadCommands.begin(), LoadCommands.end(),
11325d7b4fbSAlexey Lapshin       [&](const LoadCommand &LC) { return !ToRemove(LC); });
11425d7b4fbSAlexey Lapshin   LoadCommands.erase(It, LoadCommands.end());
11525d7b4fbSAlexey Lapshin 
11625d7b4fbSAlexey Lapshin   updateLoadCommandIndexes();
11725d7b4fbSAlexey Lapshin   return Error::success();
11825d7b4fbSAlexey Lapshin }
11925d7b4fbSAlexey Lapshin 
12025d7b4fbSAlexey Lapshin Error Object::removeSections(
12125d7b4fbSAlexey Lapshin     function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
12225d7b4fbSAlexey Lapshin   DenseMap<uint32_t, const Section *> OldIndexToSection;
12325d7b4fbSAlexey Lapshin   uint32_t NextSectionIndex = 1;
12425d7b4fbSAlexey Lapshin   for (LoadCommand &LC : LoadCommands) {
12525d7b4fbSAlexey Lapshin     auto It = std::stable_partition(
12625d7b4fbSAlexey Lapshin         std::begin(LC.Sections), std::end(LC.Sections),
12725d7b4fbSAlexey Lapshin         [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
12825d7b4fbSAlexey Lapshin     for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
12925d7b4fbSAlexey Lapshin       OldIndexToSection[(*I)->Index] = I->get();
13025d7b4fbSAlexey Lapshin       (*I)->Index = NextSectionIndex++;
13125d7b4fbSAlexey Lapshin     }
13225d7b4fbSAlexey Lapshin     LC.Sections.erase(It, LC.Sections.end());
13325d7b4fbSAlexey Lapshin   }
13425d7b4fbSAlexey Lapshin 
13525d7b4fbSAlexey Lapshin   auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
136ec941432SFangrui Song     std::optional<uint32_t> Section = S->section();
13725d7b4fbSAlexey Lapshin     return (Section && !OldIndexToSection.count(*Section));
13825d7b4fbSAlexey Lapshin   };
13925d7b4fbSAlexey Lapshin 
14025d7b4fbSAlexey Lapshin   SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
14125d7b4fbSAlexey Lapshin   for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
14225d7b4fbSAlexey Lapshin     if (IsDead(Sym))
14325d7b4fbSAlexey Lapshin       DeadSymbols.insert(Sym.get());
14425d7b4fbSAlexey Lapshin 
14525d7b4fbSAlexey Lapshin   for (const LoadCommand &LC : LoadCommands)
14625d7b4fbSAlexey Lapshin     for (const std::unique_ptr<Section> &Sec : LC.Sections)
14725d7b4fbSAlexey Lapshin       for (const RelocationInfo &R : Sec->Relocations)
14825d7b4fbSAlexey Lapshin         if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
14925d7b4fbSAlexey Lapshin           return createStringError(std::errc::invalid_argument,
15025d7b4fbSAlexey Lapshin                                    "symbol '%s' defined in section with index "
15125d7b4fbSAlexey Lapshin                                    "'%u' cannot be removed because it is "
15225d7b4fbSAlexey Lapshin                                    "referenced by a relocation in section '%s'",
15325d7b4fbSAlexey Lapshin                                    (*R.Symbol)->Name.c_str(),
15425d7b4fbSAlexey Lapshin                                    *((*R.Symbol)->section()),
15525d7b4fbSAlexey Lapshin                                    Sec->CanonicalName.c_str());
15625d7b4fbSAlexey Lapshin   SymTable.removeSymbols(IsDead);
15725d7b4fbSAlexey Lapshin   for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
15825d7b4fbSAlexey Lapshin     if (S->section())
15925d7b4fbSAlexey Lapshin       S->n_sect = OldIndexToSection[S->n_sect]->Index;
16025d7b4fbSAlexey Lapshin   return Error::success();
16125d7b4fbSAlexey Lapshin }
16225d7b4fbSAlexey Lapshin 
16325d7b4fbSAlexey Lapshin uint64_t Object::nextAvailableSegmentAddress() const {
16425d7b4fbSAlexey Lapshin   uint64_t HeaderSize =
16525d7b4fbSAlexey Lapshin       is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
16625d7b4fbSAlexey Lapshin   uint64_t Addr = HeaderSize + Header.SizeOfCmds;
16725d7b4fbSAlexey Lapshin   for (const LoadCommand &LC : LoadCommands) {
16825d7b4fbSAlexey Lapshin     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
16925d7b4fbSAlexey Lapshin     switch (MLC.load_command_data.cmd) {
17025d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT:
17125d7b4fbSAlexey Lapshin       Addr = std::max(Addr,
17225d7b4fbSAlexey Lapshin                       static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
17325d7b4fbSAlexey Lapshin                           MLC.segment_command_data.vmsize);
17425d7b4fbSAlexey Lapshin       break;
17525d7b4fbSAlexey Lapshin     case MachO::LC_SEGMENT_64:
17625d7b4fbSAlexey Lapshin       Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
17725d7b4fbSAlexey Lapshin                                 MLC.segment_command_64_data.vmsize);
17825d7b4fbSAlexey Lapshin       break;
17925d7b4fbSAlexey Lapshin     default:
18025d7b4fbSAlexey Lapshin       continue;
18125d7b4fbSAlexey Lapshin     }
18225d7b4fbSAlexey Lapshin   }
18325d7b4fbSAlexey Lapshin   return Addr;
18425d7b4fbSAlexey Lapshin }
18525d7b4fbSAlexey Lapshin 
18625d7b4fbSAlexey Lapshin template <typename SegmentType>
18725d7b4fbSAlexey Lapshin static void
18825d7b4fbSAlexey Lapshin constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
18925d7b4fbSAlexey Lapshin                  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
19025d7b4fbSAlexey Lapshin   assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
19125d7b4fbSAlexey Lapshin   memset(&Seg, 0, sizeof(SegmentType));
19225d7b4fbSAlexey Lapshin   Seg.cmd = CmdType;
19325d7b4fbSAlexey Lapshin   strncpy(Seg.segname, SegName.data(), SegName.size());
19425d7b4fbSAlexey Lapshin   Seg.maxprot |=
19525d7b4fbSAlexey Lapshin       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
19625d7b4fbSAlexey Lapshin   Seg.initprot |=
19725d7b4fbSAlexey Lapshin       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
19825d7b4fbSAlexey Lapshin   Seg.vmaddr = SegVMAddr;
19925d7b4fbSAlexey Lapshin   Seg.vmsize = SegVMSize;
20025d7b4fbSAlexey Lapshin }
20125d7b4fbSAlexey Lapshin 
20225d7b4fbSAlexey Lapshin LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
20325d7b4fbSAlexey Lapshin   LoadCommand LC;
20425d7b4fbSAlexey Lapshin   const uint64_t SegVMAddr = nextAvailableSegmentAddress();
20525d7b4fbSAlexey Lapshin   if (is64Bit())
20625d7b4fbSAlexey Lapshin     constructSegment(LC.MachOLoadCommand.segment_command_64_data,
20725d7b4fbSAlexey Lapshin                      MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
20825d7b4fbSAlexey Lapshin   else
20925d7b4fbSAlexey Lapshin     constructSegment(LC.MachOLoadCommand.segment_command_data,
21025d7b4fbSAlexey Lapshin                      MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
21125d7b4fbSAlexey Lapshin 
21225d7b4fbSAlexey Lapshin   LoadCommands.push_back(std::move(LC));
21325d7b4fbSAlexey Lapshin   return LoadCommands.back();
21425d7b4fbSAlexey Lapshin }
21525d7b4fbSAlexey Lapshin 
21625d7b4fbSAlexey Lapshin /// Extracts a segment name from a string which is possibly non-null-terminated.
21725d7b4fbSAlexey Lapshin static StringRef extractSegmentName(const char *SegName) {
21825d7b4fbSAlexey Lapshin   return StringRef(SegName,
21925d7b4fbSAlexey Lapshin                    strnlen(SegName, sizeof(MachO::segment_command::segname)));
22025d7b4fbSAlexey Lapshin }
22125d7b4fbSAlexey Lapshin 
222ec941432SFangrui Song std::optional<StringRef> LoadCommand::getSegmentName() const {
22325d7b4fbSAlexey Lapshin   const MachO::macho_load_command &MLC = MachOLoadCommand;
22425d7b4fbSAlexey Lapshin   switch (MLC.load_command_data.cmd) {
22525d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT:
22625d7b4fbSAlexey Lapshin     return extractSegmentName(MLC.segment_command_data.segname);
22725d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT_64:
22825d7b4fbSAlexey Lapshin     return extractSegmentName(MLC.segment_command_64_data.segname);
22925d7b4fbSAlexey Lapshin   default:
230aadaafacSKazu Hirata     return std::nullopt;
23125d7b4fbSAlexey Lapshin   }
23225d7b4fbSAlexey Lapshin }
23325d7b4fbSAlexey Lapshin 
234ec941432SFangrui Song std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
23525d7b4fbSAlexey Lapshin   const MachO::macho_load_command &MLC = MachOLoadCommand;
23625d7b4fbSAlexey Lapshin   switch (MLC.load_command_data.cmd) {
23725d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT:
23825d7b4fbSAlexey Lapshin     return MLC.segment_command_data.vmaddr;
23925d7b4fbSAlexey Lapshin   case MachO::LC_SEGMENT_64:
24025d7b4fbSAlexey Lapshin     return MLC.segment_command_64_data.vmaddr;
24125d7b4fbSAlexey Lapshin   default:
242aadaafacSKazu Hirata     return std::nullopt;
24325d7b4fbSAlexey Lapshin   }
24425d7b4fbSAlexey Lapshin }
245