1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MachOObject.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include <unordered_set>
12
13 using namespace llvm;
14 using namespace llvm::objcopy::macho;
15
Section(StringRef SegName,StringRef SectName)16 Section::Section(StringRef SegName, StringRef SectName)
17 : Segname(SegName), Sectname(SectName),
18 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
19
Section(StringRef SegName,StringRef SectName,StringRef Content)20 Section::Section(StringRef SegName, StringRef SectName, StringRef Content)
21 : Segname(SegName), Sectname(SectName),
22 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
23 Content(Content) {}
24
getSymbolByIndex(uint32_t Index) const25 const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
26 assert(Index < Symbols.size() && "invalid symbol index");
27 return Symbols[Index].get();
28 }
29
getSymbolByIndex(uint32_t Index)30 SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) {
31 return const_cast<SymbolEntry *>(
32 static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
33 }
34
removeSymbols(function_ref<bool (const std::unique_ptr<SymbolEntry> &)> ToRemove)35 void SymbolTable::removeSymbols(
36 function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
37 llvm::erase_if(Symbols, ToRemove);
38 }
39
updateLoadCommandIndexes()40 void Object::updateLoadCommandIndexes() {
41 static constexpr char TextSegmentName[] = "__TEXT";
42 // Update indices of special load commands
43 for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
44 LoadCommand &LC = LoadCommands[Index];
45 switch (LC.MachOLoadCommand.load_command_data.cmd) {
46 case MachO::LC_CODE_SIGNATURE:
47 CodeSignatureCommandIndex = Index;
48 break;
49 case MachO::LC_SEGMENT:
50 if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
51 TextSegmentName)
52 TextSegmentCommandIndex = Index;
53 break;
54 case MachO::LC_SEGMENT_64:
55 if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
56 TextSegmentName)
57 TextSegmentCommandIndex = Index;
58 break;
59 case MachO::LC_SYMTAB:
60 SymTabCommandIndex = Index;
61 break;
62 case MachO::LC_DYSYMTAB:
63 DySymTabCommandIndex = Index;
64 break;
65 case MachO::LC_DYLD_INFO:
66 case MachO::LC_DYLD_INFO_ONLY:
67 DyLdInfoCommandIndex = Index;
68 break;
69 case MachO::LC_DATA_IN_CODE:
70 DataInCodeCommandIndex = Index;
71 break;
72 case MachO::LC_LINKER_OPTIMIZATION_HINT:
73 LinkerOptimizationHintCommandIndex = Index;
74 break;
75 case MachO::LC_FUNCTION_STARTS:
76 FunctionStartsCommandIndex = Index;
77 break;
78 case MachO::LC_DYLIB_CODE_SIGN_DRS:
79 DylibCodeSignDRsIndex = Index;
80 break;
81 case MachO::LC_DYLD_CHAINED_FIXUPS:
82 ChainedFixupsCommandIndex = Index;
83 break;
84 case MachO::LC_DYLD_EXPORTS_TRIE:
85 ExportsTrieCommandIndex = Index;
86 break;
87 }
88 }
89 }
90
removeLoadCommands(function_ref<bool (const LoadCommand &)> ToRemove)91 Error Object::removeLoadCommands(
92 function_ref<bool(const LoadCommand &)> ToRemove) {
93 auto It = std::stable_partition(
94 LoadCommands.begin(), LoadCommands.end(),
95 [&](const LoadCommand &LC) { return !ToRemove(LC); });
96 LoadCommands.erase(It, LoadCommands.end());
97
98 updateLoadCommandIndexes();
99 return Error::success();
100 }
101
removeSections(function_ref<bool (const std::unique_ptr<Section> &)> ToRemove)102 Error Object::removeSections(
103 function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
104 DenseMap<uint32_t, const Section *> OldIndexToSection;
105 uint32_t NextSectionIndex = 1;
106 for (LoadCommand &LC : LoadCommands) {
107 auto It = std::stable_partition(
108 std::begin(LC.Sections), std::end(LC.Sections),
109 [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
110 for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
111 OldIndexToSection[(*I)->Index] = I->get();
112 (*I)->Index = NextSectionIndex++;
113 }
114 LC.Sections.erase(It, LC.Sections.end());
115 }
116
117 auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
118 std::optional<uint32_t> Section = S->section();
119 return (Section && !OldIndexToSection.count(*Section));
120 };
121
122 SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
123 for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
124 if (IsDead(Sym))
125 DeadSymbols.insert(Sym.get());
126
127 for (const LoadCommand &LC : LoadCommands)
128 for (const std::unique_ptr<Section> &Sec : LC.Sections)
129 for (const RelocationInfo &R : Sec->Relocations)
130 if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
131 return createStringError(std::errc::invalid_argument,
132 "symbol '%s' defined in section with index "
133 "'%u' cannot be removed because it is "
134 "referenced by a relocation in section '%s'",
135 (*R.Symbol)->Name.c_str(),
136 *((*R.Symbol)->section()),
137 Sec->CanonicalName.c_str());
138 SymTable.removeSymbols(IsDead);
139 for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
140 if (S->section())
141 S->n_sect = OldIndexToSection[S->n_sect]->Index;
142 return Error::success();
143 }
144
nextAvailableSegmentAddress() const145 uint64_t Object::nextAvailableSegmentAddress() const {
146 uint64_t HeaderSize =
147 is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
148 uint64_t Addr = HeaderSize + Header.SizeOfCmds;
149 for (const LoadCommand &LC : LoadCommands) {
150 const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
151 switch (MLC.load_command_data.cmd) {
152 case MachO::LC_SEGMENT:
153 Addr = std::max(Addr,
154 static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
155 MLC.segment_command_data.vmsize);
156 break;
157 case MachO::LC_SEGMENT_64:
158 Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
159 MLC.segment_command_64_data.vmsize);
160 break;
161 default:
162 continue;
163 }
164 }
165 return Addr;
166 }
167
168 template <typename SegmentType>
169 static void
constructSegment(SegmentType & Seg,llvm::MachO::LoadCommandType CmdType,StringRef SegName,uint64_t SegVMAddr,uint64_t SegVMSize)170 constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType,
171 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
172 assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
173 memset(&Seg, 0, sizeof(SegmentType));
174 Seg.cmd = CmdType;
175 strncpy(Seg.segname, SegName.data(), SegName.size());
176 Seg.maxprot |=
177 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
178 Seg.initprot |=
179 (MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
180 Seg.vmaddr = SegVMAddr;
181 Seg.vmsize = SegVMSize;
182 }
183
addSegment(StringRef SegName,uint64_t SegVMSize)184 LoadCommand &Object::addSegment(StringRef SegName, uint64_t SegVMSize) {
185 LoadCommand LC;
186 const uint64_t SegVMAddr = nextAvailableSegmentAddress();
187 if (is64Bit())
188 constructSegment(LC.MachOLoadCommand.segment_command_64_data,
189 MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
190 else
191 constructSegment(LC.MachOLoadCommand.segment_command_data,
192 MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
193
194 LoadCommands.push_back(std::move(LC));
195 return LoadCommands.back();
196 }
197
198 /// Extracts a segment name from a string which is possibly non-null-terminated.
extractSegmentName(const char * SegName)199 static StringRef extractSegmentName(const char *SegName) {
200 return StringRef(SegName,
201 strnlen(SegName, sizeof(MachO::segment_command::segname)));
202 }
203
getSegmentName() const204 std::optional<StringRef> LoadCommand::getSegmentName() const {
205 const MachO::macho_load_command &MLC = MachOLoadCommand;
206 switch (MLC.load_command_data.cmd) {
207 case MachO::LC_SEGMENT:
208 return extractSegmentName(MLC.segment_command_data.segname);
209 case MachO::LC_SEGMENT_64:
210 return extractSegmentName(MLC.segment_command_64_data.segname);
211 default:
212 return std::nullopt;
213 }
214 }
215
getSegmentVMAddr() const216 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
217 const MachO::macho_load_command &MLC = MachOLoadCommand;
218 switch (MLC.load_command_data.cmd) {
219 case MachO::LC_SEGMENT:
220 return MLC.segment_command_data.vmaddr;
221 case MachO::LC_SEGMENT_64:
222 return MLC.segment_command_64_data.vmaddr;
223 default:
224 return std::nullopt;
225 }
226 }
227