xref: /llvm-project/llvm/lib/ObjCopy/MachO/MachOObject.cpp (revision 1a830aa1fe1e88749b563fefe18382842e0cff90)
1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOObject.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include "llvm/Support/SystemZ/zOSSupport.h"
12 #include <unordered_set>
13 
14 using namespace llvm;
15 using namespace llvm::objcopy::macho;
16 
17 Section::Section(StringRef SegName, StringRef SectName)
18     : Segname(SegName), Sectname(SectName),
19       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
20 
21 Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
22     : Segname(SegName), Sectname(SectName),
23       CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
24       Content(Content) {}
25 
26 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
27   assert(Index < Symbols.size() && "invalid symbol index");
28   return Symbols[Index].get();
29 }
30 
31 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
32   return const_cast<SymbolEntry *>(
33       static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
34 }
35 
36 void SymbolTable::updateSymbols(function_ref<void(SymbolEntry &)> Callable) {
37   for (auto &Sym : Symbols)
38     Callable(*Sym);
39 
40   // Partition symbols: local < defined external < undefined external.
41   auto ExternalBegin = std::stable_partition(
42       std::begin(Symbols), std::end(Symbols),
43       [](const auto &Sym) { return Sym->isLocalSymbol(); });
44   std::stable_partition(ExternalBegin, std::end(Symbols), [](const auto &Sym) {
45     return !Sym->isUndefinedSymbol();
46   });
47 }
48 
49 void SymbolTable::removeSymbols(
50     function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
51   llvm::erase_if(Symbols, ToRemove);
52 }
53 
54 void Object::updateLoadCommandIndexes() {
55   static constexpr char TextSegmentName[] = "__TEXT";
56   // Update indices of special load commands
57   for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
58     LoadCommand &LC = LoadCommands[Index];
59     switch (LC.MachOLoadCommand.load_command_data.cmd) {
60     case MachO::LC_CODE_SIGNATURE:
61       CodeSignatureCommandIndex = Index;
62       break;
63     case MachO::LC_SEGMENT:
64       if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
65           TextSegmentName)
66         TextSegmentCommandIndex = Index;
67       break;
68     case MachO::LC_SEGMENT_64:
69       if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
70           TextSegmentName)
71         TextSegmentCommandIndex = Index;
72       break;
73     case MachO::LC_SYMTAB:
74       SymTabCommandIndex = Index;
75       break;
76     case MachO::LC_DYSYMTAB:
77       DySymTabCommandIndex = Index;
78       break;
79     case MachO::LC_DYLD_INFO:
80     case MachO::LC_DYLD_INFO_ONLY:
81       DyLdInfoCommandIndex = Index;
82       break;
83     case MachO::LC_DATA_IN_CODE:
84       DataInCodeCommandIndex = Index;
85       break;
86     case MachO::LC_LINKER_OPTIMIZATION_HINT:
87       LinkerOptimizationHintCommandIndex = Index;
88       break;
89     case MachO::LC_FUNCTION_STARTS:
90       FunctionStartsCommandIndex = Index;
91       break;
92     case MachO::LC_DYLIB_CODE_SIGN_DRS:
93       DylibCodeSignDRsIndex = Index;
94       break;
95     case MachO::LC_DYLD_CHAINED_FIXUPS:
96       ChainedFixupsCommandIndex = Index;
97       break;
98     case MachO::LC_DYLD_EXPORTS_TRIE:
99       ExportsTrieCommandIndex = Index;
100       break;
101     case MachO::LC_ENCRYPTION_INFO:
102     case MachO::LC_ENCRYPTION_INFO_64:
103       EncryptionInfoCommandIndex = Index;
104       break;
105     }
106   }
107 }
108 
109 Error Object::removeLoadCommands(
110     function_ref<bool(const LoadCommand &)> ToRemove) {
111   auto It = std::stable_partition(
112       LoadCommands.begin(), LoadCommands.end(),
113       [&](const LoadCommand &LC) { return !ToRemove(LC); });
114   LoadCommands.erase(It, LoadCommands.end());
115 
116   updateLoadCommandIndexes();
117   return Error::success();
118 }
119 
120 Error Object::removeSections(
121     function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
122   DenseMap<uint32_t, const Section *> OldIndexToSection;
123   uint32_t NextSectionIndex = 1;
124   for (LoadCommand &LC : LoadCommands) {
125     auto It = std::stable_partition(
126         std::begin(LC.Sections), std::end(LC.Sections),
127         [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
128     for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
129       OldIndexToSection[(*I)->Index] = I->get();
130       (*I)->Index = NextSectionIndex++;
131     }
132     LC.Sections.erase(It, LC.Sections.end());
133   }
134 
135   auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
136     std::optional<uint32_t> Section = S->section();
137     return (Section && !OldIndexToSection.count(*Section));
138   };
139 
140   SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
141   for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
142     if (IsDead(Sym))
143       DeadSymbols.insert(Sym.get());
144 
145   for (const LoadCommand &LC : LoadCommands)
146     for (const std::unique_ptr<Section> &Sec : LC.Sections)
147       for (const RelocationInfo &R : Sec->Relocations)
148         if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
149           return createStringError(std::errc::invalid_argument,
150                                    "symbol '%s' defined in section with index "
151                                    "'%u' cannot be removed because it is "
152                                    "referenced by a relocation in section '%s'",
153                                    (*R.Symbol)->Name.c_str(),
154                                    *((*R.Symbol)->section()),
155                                    Sec->CanonicalName.c_str());
156   SymTable.removeSymbols(IsDead);
157   for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
158     if (S->section())
159       S->n_sect = OldIndexToSection[S->n_sect]->Index;
160   return Error::success();
161 }
162 
163 uint64_t Object::nextAvailableSegmentAddress() const {
164   uint64_t HeaderSize =
165       is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
166   uint64_t Addr = HeaderSize + Header.SizeOfCmds;
167   for (const LoadCommand &LC : LoadCommands) {
168     const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
169     switch (MLC.load_command_data.cmd) {
170     case MachO::LC_SEGMENT:
171       Addr = std::max(Addr,
172                       static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
173                           MLC.segment_command_data.vmsize);
174       break;
175     case MachO::LC_SEGMENT_64:
176       Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
177                                 MLC.segment_command_64_data.vmsize);
178       break;
179     default:
180       continue;
181     }
182   }
183   return Addr;
184 }
185 
186 template <typename SegmentType>
187 static void
188 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
189                  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
190   assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
191   memset(&Seg, 0, sizeof(SegmentType));
192   Seg.cmd = CmdType;
193   strncpy(Seg.segname, SegName.data(), SegName.size());
194   Seg.maxprot |=
195       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
196   Seg.initprot |=
197       (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
198   Seg.vmaddr = SegVMAddr;
199   Seg.vmsize = SegVMSize;
200 }
201 
202 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
203   LoadCommand LC;
204   const uint64_t SegVMAddr = nextAvailableSegmentAddress();
205   if (is64Bit())
206     constructSegment(LC.MachOLoadCommand.segment_command_64_data,
207                      MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
208   else
209     constructSegment(LC.MachOLoadCommand.segment_command_data,
210                      MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
211 
212   LoadCommands.push_back(std::move(LC));
213   return LoadCommands.back();
214 }
215 
216 /// Extracts a segment name from a string which is possibly non-null-terminated.
217 static StringRef extractSegmentName(const char *SegName) {
218   return StringRef(SegName,
219                    strnlen(SegName, sizeof(MachO::segment_command::segname)));
220 }
221 
222 std::optional<StringRef> LoadCommand::getSegmentName() const {
223   const MachO::macho_load_command &MLC = MachOLoadCommand;
224   switch (MLC.load_command_data.cmd) {
225   case MachO::LC_SEGMENT:
226     return extractSegmentName(MLC.segment_command_data.segname);
227   case MachO::LC_SEGMENT_64:
228     return extractSegmentName(MLC.segment_command_64_data.segname);
229   default:
230     return std::nullopt;
231   }
232 }
233 
234 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
235   const MachO::macho_load_command &MLC = MachOLoadCommand;
236   switch (MLC.load_command_data.cmd) {
237   case MachO::LC_SEGMENT:
238     return MLC.segment_command_data.vmaddr;
239   case MachO::LC_SEGMENT_64:
240     return MLC.segment_command_64_data.vmaddr;
241   default:
242     return std::nullopt;
243   }
244 }
245