1*d415bd75Srobert //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2*d415bd75Srobert //
3*d415bd75Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*d415bd75Srobert // See https://llvm.org/LICENSE.txt for license information.
5*d415bd75Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*d415bd75Srobert //
7*d415bd75Srobert //===----------------------------------------------------------------------===//
8*d415bd75Srobert
9*d415bd75Srobert #include "MachOObject.h"
10*d415bd75Srobert #include "llvm/ADT/SmallPtrSet.h"
11*d415bd75Srobert #include <unordered_set>
12*d415bd75Srobert
13*d415bd75Srobert using namespace llvm;
14*d415bd75Srobert using namespace llvm::objcopy::macho;
15*d415bd75Srobert
Section(StringRef SegName,StringRef SectName)16*d415bd75Srobert Section::Section(StringRef SegName, StringRef SectName)
17*d415bd75Srobert : Segname(SegName), Sectname(SectName),
18*d415bd75Srobert CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
19*d415bd75Srobert
Section(StringRef SegName,StringRef SectName,StringRef Content)20*d415bd75Srobert Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
21*d415bd75Srobert : Segname(SegName), Sectname(SectName),
22*d415bd75Srobert CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
23*d415bd75Srobert Content(Content) {}
24*d415bd75Srobert
getSymbolByIndex(uint32_t Index) const25*d415bd75Srobert const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
26*d415bd75Srobert assert(Index < Symbols.size() && "invalid symbol index");
27*d415bd75Srobert return Symbols[Index].get();
28*d415bd75Srobert }
29*d415bd75Srobert
getSymbolByIndex(uint32_t Index)30*d415bd75Srobert SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
31*d415bd75Srobert return const_cast<SymbolEntry *>(
32*d415bd75Srobert static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
33*d415bd75Srobert }
34*d415bd75Srobert
removeSymbols(function_ref<bool (const std::unique_ptr<SymbolEntry> &)> ToRemove)35*d415bd75Srobert void SymbolTable::removeSymbols(
36*d415bd75Srobert function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
37*d415bd75Srobert llvm::erase_if(Symbols, ToRemove);
38*d415bd75Srobert }
39*d415bd75Srobert
updateLoadCommandIndexes()40*d415bd75Srobert void Object::updateLoadCommandIndexes() {
41*d415bd75Srobert static constexpr char TextSegmentName[] = "__TEXT";
42*d415bd75Srobert // Update indices of special load commands
43*d415bd75Srobert for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
44*d415bd75Srobert LoadCommand &LC = LoadCommands[Index];
45*d415bd75Srobert switch (LC.MachOLoadCommand.load_command_data.cmd) {
46*d415bd75Srobert case MachO::LC_CODE_SIGNATURE:
47*d415bd75Srobert CodeSignatureCommandIndex = Index;
48*d415bd75Srobert break;
49*d415bd75Srobert case MachO::LC_SEGMENT:
50*d415bd75Srobert if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
51*d415bd75Srobert TextSegmentName)
52*d415bd75Srobert TextSegmentCommandIndex = Index;
53*d415bd75Srobert break;
54*d415bd75Srobert case MachO::LC_SEGMENT_64:
55*d415bd75Srobert if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
56*d415bd75Srobert TextSegmentName)
57*d415bd75Srobert TextSegmentCommandIndex = Index;
58*d415bd75Srobert break;
59*d415bd75Srobert case MachO::LC_SYMTAB:
60*d415bd75Srobert SymTabCommandIndex = Index;
61*d415bd75Srobert break;
62*d415bd75Srobert case MachO::LC_DYSYMTAB:
63*d415bd75Srobert DySymTabCommandIndex = Index;
64*d415bd75Srobert break;
65*d415bd75Srobert case MachO::LC_DYLD_INFO:
66*d415bd75Srobert case MachO::LC_DYLD_INFO_ONLY:
67*d415bd75Srobert DyLdInfoCommandIndex = Index;
68*d415bd75Srobert break;
69*d415bd75Srobert case MachO::LC_DATA_IN_CODE:
70*d415bd75Srobert DataInCodeCommandIndex = Index;
71*d415bd75Srobert break;
72*d415bd75Srobert case MachO::LC_LINKER_OPTIMIZATION_HINT:
73*d415bd75Srobert LinkerOptimizationHintCommandIndex = Index;
74*d415bd75Srobert break;
75*d415bd75Srobert case MachO::LC_FUNCTION_STARTS:
76*d415bd75Srobert FunctionStartsCommandIndex = Index;
77*d415bd75Srobert break;
78*d415bd75Srobert case MachO::LC_DYLIB_CODE_SIGN_DRS:
79*d415bd75Srobert DylibCodeSignDRsIndex = Index;
80*d415bd75Srobert break;
81*d415bd75Srobert case MachO::LC_DYLD_CHAINED_FIXUPS:
82*d415bd75Srobert ChainedFixupsCommandIndex = Index;
83*d415bd75Srobert break;
84*d415bd75Srobert case MachO::LC_DYLD_EXPORTS_TRIE:
85*d415bd75Srobert ExportsTrieCommandIndex = Index;
86*d415bd75Srobert break;
87*d415bd75Srobert }
88*d415bd75Srobert }
89*d415bd75Srobert }
90*d415bd75Srobert
removeLoadCommands(function_ref<bool (const LoadCommand &)> ToRemove)91*d415bd75Srobert Error Object::removeLoadCommands(
92*d415bd75Srobert function_ref<bool(const LoadCommand &)> ToRemove) {
93*d415bd75Srobert auto It = std::stable_partition(
94*d415bd75Srobert LoadCommands.begin(), LoadCommands.end(),
95*d415bd75Srobert [&](const LoadCommand &LC) { return !ToRemove(LC); });
96*d415bd75Srobert LoadCommands.erase(It, LoadCommands.end());
97*d415bd75Srobert
98*d415bd75Srobert updateLoadCommandIndexes();
99*d415bd75Srobert return Error::success();
100*d415bd75Srobert }
101*d415bd75Srobert
removeSections(function_ref<bool (const std::unique_ptr<Section> &)> ToRemove)102*d415bd75Srobert Error Object::removeSections(
103*d415bd75Srobert function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
104*d415bd75Srobert DenseMap<uint32_t, const Section *> OldIndexToSection;
105*d415bd75Srobert uint32_t NextSectionIndex = 1;
106*d415bd75Srobert for (LoadCommand &LC : LoadCommands) {
107*d415bd75Srobert auto It = std::stable_partition(
108*d415bd75Srobert std::begin(LC.Sections), std::end(LC.Sections),
109*d415bd75Srobert [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
110*d415bd75Srobert for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
111*d415bd75Srobert OldIndexToSection[(*I)->Index] = I->get();
112*d415bd75Srobert (*I)->Index = NextSectionIndex++;
113*d415bd75Srobert }
114*d415bd75Srobert LC.Sections.erase(It, LC.Sections.end());
115*d415bd75Srobert }
116*d415bd75Srobert
117*d415bd75Srobert auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
118*d415bd75Srobert std::optional<uint32_t> Section = S->section();
119*d415bd75Srobert return (Section && !OldIndexToSection.count(*Section));
120*d415bd75Srobert };
121*d415bd75Srobert
122*d415bd75Srobert SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
123*d415bd75Srobert for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
124*d415bd75Srobert if (IsDead(Sym))
125*d415bd75Srobert DeadSymbols.insert(Sym.get());
126*d415bd75Srobert
127*d415bd75Srobert for (const LoadCommand &LC : LoadCommands)
128*d415bd75Srobert for (const std::unique_ptr<Section> &Sec : LC.Sections)
129*d415bd75Srobert for (const RelocationInfo &R : Sec->Relocations)
130*d415bd75Srobert if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
131*d415bd75Srobert return createStringError(std::errc::invalid_argument,
132*d415bd75Srobert "symbol '%s' defined in section with index "
133*d415bd75Srobert "'%u' cannot be removed because it is "
134*d415bd75Srobert "referenced by a relocation in section '%s'",
135*d415bd75Srobert (*R.Symbol)->Name.c_str(),
136*d415bd75Srobert *((*R.Symbol)->section()),
137*d415bd75Srobert Sec->CanonicalName.c_str());
138*d415bd75Srobert SymTable.removeSymbols(IsDead);
139*d415bd75Srobert for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
140*d415bd75Srobert if (S->section())
141*d415bd75Srobert S->n_sect = OldIndexToSection[S->n_sect]->Index;
142*d415bd75Srobert return Error::success();
143*d415bd75Srobert }
144*d415bd75Srobert
nextAvailableSegmentAddress() const145*d415bd75Srobert uint64_t Object::nextAvailableSegmentAddress() const {
146*d415bd75Srobert uint64_t HeaderSize =
147*d415bd75Srobert is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
148*d415bd75Srobert uint64_t Addr = HeaderSize + Header.SizeOfCmds;
149*d415bd75Srobert for (const LoadCommand &LC : LoadCommands) {
150*d415bd75Srobert const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
151*d415bd75Srobert switch (MLC.load_command_data.cmd) {
152*d415bd75Srobert case MachO::LC_SEGMENT:
153*d415bd75Srobert Addr = std::max(Addr,
154*d415bd75Srobert static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
155*d415bd75Srobert MLC.segment_command_data.vmsize);
156*d415bd75Srobert break;
157*d415bd75Srobert case MachO::LC_SEGMENT_64:
158*d415bd75Srobert Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
159*d415bd75Srobert MLC.segment_command_64_data.vmsize);
160*d415bd75Srobert break;
161*d415bd75Srobert default:
162*d415bd75Srobert continue;
163*d415bd75Srobert }
164*d415bd75Srobert }
165*d415bd75Srobert return Addr;
166*d415bd75Srobert }
167*d415bd75Srobert
168*d415bd75Srobert template <typename SegmentType>
169*d415bd75Srobert static void
constructSegment(SegmentType & Seg,llvm::MachO::LoadCommandType CmdType,StringRef SegName,uint64_t SegVMAddr,uint64_t SegVMSize)170*d415bd75Srobert constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
171*d415bd75Srobert StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
172*d415bd75Srobert assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
173*d415bd75Srobert memset(&Seg, 0, sizeof(SegmentType));
174*d415bd75Srobert Seg.cmd = CmdType;
175*d415bd75Srobert strncpy(Seg.segname, SegName.data(), SegName.size());
176*d415bd75Srobert Seg.maxprot |=
177*d415bd75Srobert (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
178*d415bd75Srobert Seg.initprot |=
179*d415bd75Srobert (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
180*d415bd75Srobert Seg.vmaddr = SegVMAddr;
181*d415bd75Srobert Seg.vmsize = SegVMSize;
182*d415bd75Srobert }
183*d415bd75Srobert
addSegment(StringRef SegName,uint64_t SegVMSize)184*d415bd75Srobert LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
185*d415bd75Srobert LoadCommand LC;
186*d415bd75Srobert const uint64_t SegVMAddr = nextAvailableSegmentAddress();
187*d415bd75Srobert if (is64Bit())
188*d415bd75Srobert constructSegment(LC.MachOLoadCommand.segment_command_64_data,
189*d415bd75Srobert MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
190*d415bd75Srobert else
191*d415bd75Srobert constructSegment(LC.MachOLoadCommand.segment_command_data,
192*d415bd75Srobert MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
193*d415bd75Srobert
194*d415bd75Srobert LoadCommands.push_back(std::move(LC));
195*d415bd75Srobert return LoadCommands.back();
196*d415bd75Srobert }
197*d415bd75Srobert
198*d415bd75Srobert /// Extracts a segment name from a string which is possibly non-null-terminated.
extractSegmentName(const char * SegName)199*d415bd75Srobert static StringRef extractSegmentName(const char *SegName) {
200*d415bd75Srobert return StringRef(SegName,
201*d415bd75Srobert strnlen(SegName, sizeof(MachO::segment_command::segname)));
202*d415bd75Srobert }
203*d415bd75Srobert
getSegmentName() const204*d415bd75Srobert std::optional<StringRef> LoadCommand::getSegmentName() const {
205*d415bd75Srobert const MachO::macho_load_command &MLC = MachOLoadCommand;
206*d415bd75Srobert switch (MLC.load_command_data.cmd) {
207*d415bd75Srobert case MachO::LC_SEGMENT:
208*d415bd75Srobert return extractSegmentName(MLC.segment_command_data.segname);
209*d415bd75Srobert case MachO::LC_SEGMENT_64:
210*d415bd75Srobert return extractSegmentName(MLC.segment_command_64_data.segname);
211*d415bd75Srobert default:
212*d415bd75Srobert return std::nullopt;
213*d415bd75Srobert }
214*d415bd75Srobert }
215*d415bd75Srobert
getSegmentVMAddr() const216*d415bd75Srobert std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
217*d415bd75Srobert const MachO::macho_load_command &MLC = MachOLoadCommand;
218*d415bd75Srobert switch (MLC.load_command_data.cmd) {
219*d415bd75Srobert case MachO::LC_SEGMENT:
220*d415bd75Srobert return MLC.segment_command_data.vmaddr;
221*d415bd75Srobert case MachO::LC_SEGMENT_64:
222*d415bd75Srobert return MLC.segment_command_64_data.vmaddr;
223*d415bd75Srobert default:
224*d415bd75Srobert return std::nullopt;
225*d415bd75Srobert }
226*d415bd75Srobert }
227