1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/ObjCopy/MachO/MachOObjcopy.h"
10 #include "Archive.h"
11 #include "MachOReader.h"
12 #include "MachOWriter.h"
13 #include "llvm/ADT/DenseSet.h"
14 #include "llvm/ObjCopy/CommonConfig.h"
15 #include "llvm/ObjCopy/MachO/MachOConfig.h"
16 #include "llvm/ObjCopy/MultiFormatConfig.h"
17 #include "llvm/ObjCopy/ObjCopy.h"
18 #include "llvm/Object/ArchiveWriter.h"
19 #include "llvm/Object/MachOUniversal.h"
20 #include "llvm/Object/MachOUniversalWriter.h"
21 #include "llvm/Support/Errc.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/FileOutputBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SmallVectorMemoryBuffer.h"
26
27 using namespace llvm;
28 using namespace llvm::objcopy;
29 using namespace llvm::objcopy::macho;
30 using namespace llvm::object;
31
32 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
33 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
34
35 #ifndef NDEBUG
isLoadCommandWithPayloadString(const LoadCommand & LC)36 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
37 // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
38 // LC_LAZY_LOAD_DYLIB
39 return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
40 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
41 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
42 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
43 }
44 #endif
45
getPayloadString(const LoadCommand & LC)46 static StringRef getPayloadString(const LoadCommand &LC) {
47 assert(isLoadCommandWithPayloadString(LC) &&
48 "unsupported load command encountered");
49
50 return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
51 LC.Payload.size())
52 .rtrim('\0');
53 }
54
removeSections(const CommonConfig & Config,Object & Obj)55 static Error removeSections(const CommonConfig &Config, Object &Obj) {
56 SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
57 return false;
58 };
59
60 if (!Config.ToRemove.empty()) {
61 RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
62 return Config.ToRemove.matches(Sec->CanonicalName);
63 };
64 }
65
66 if (Config.StripAll || Config.StripDebug) {
67 // Remove all debug sections.
68 RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
69 if (Sec->Segname == "__DWARF")
70 return true;
71
72 return RemovePred(Sec);
73 };
74 }
75
76 if (!Config.OnlySection.empty()) {
77 // Overwrite RemovePred because --only-section takes priority.
78 RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
79 return !Config.OnlySection.matches(Sec->CanonicalName);
80 };
81 }
82
83 return Obj.removeSections(RemovePred);
84 }
85
markSymbols(const CommonConfig &,Object & Obj)86 static void markSymbols(const CommonConfig &, Object &Obj) {
87 // Symbols referenced from the indirect symbol table must not be removed.
88 for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
89 if (ISE.Symbol)
90 (*ISE.Symbol)->Referenced = true;
91 }
92
updateAndRemoveSymbols(const CommonConfig & Config,const MachOConfig & MachOConfig,Object & Obj)93 static void updateAndRemoveSymbols(const CommonConfig &Config,
94 const MachOConfig &MachOConfig,
95 Object &Obj) {
96 for (SymbolEntry &Sym : Obj.SymTable) {
97 auto I = Config.SymbolsToRename.find(Sym.Name);
98 if (I != Config.SymbolsToRename.end())
99 Sym.Name = std::string(I->getValue());
100 }
101
102 auto RemovePred = [&Config, &MachOConfig,
103 &Obj](const std::unique_ptr<SymbolEntry> &N) {
104 if (N->Referenced)
105 return false;
106 if (MachOConfig.KeepUndefined && N->isUndefinedSymbol())
107 return false;
108 if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
109 return false;
110 if (Config.StripAll)
111 return true;
112 if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
113 return true;
114 // This behavior is consistent with cctools' strip.
115 if (MachOConfig.StripSwiftSymbols &&
116 (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
117 *Obj.SwiftVersion && N->isSwiftSymbol())
118 return true;
119 return false;
120 };
121
122 Obj.SymTable.removeSymbols(RemovePred);
123 }
124
125 template <typename LCType>
updateLoadCommandPayloadString(LoadCommand & LC,StringRef S)126 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
127 assert(isLoadCommandWithPayloadString(LC) &&
128 "unsupported load command encountered");
129
130 uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
131
132 LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
133 LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
134 std::copy(S.begin(), S.end(), LC.Payload.begin());
135 }
136
buildRPathLoadCommand(StringRef Path)137 static LoadCommand buildRPathLoadCommand(StringRef Path) {
138 LoadCommand LC;
139 MachO::rpath_command RPathLC;
140 RPathLC.cmd = MachO::LC_RPATH;
141 RPathLC.path = sizeof(MachO::rpath_command);
142 RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
143 LC.MachOLoadCommand.rpath_command_data = RPathLC;
144 LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
145 std::copy(Path.begin(), Path.end(), LC.Payload.begin());
146 return LC;
147 }
148
processLoadCommands(const MachOConfig & MachOConfig,Object & Obj)149 static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
150 // Remove RPaths.
151 DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(),
152 MachOConfig.RPathsToRemove.end());
153
154 LoadCommandPred RemovePred = [&RPathsToRemove,
155 &MachOConfig](const LoadCommand &LC) {
156 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
157 // When removing all RPaths we don't need to care
158 // about what it contains
159 if (MachOConfig.RemoveAllRpaths)
160 return true;
161
162 StringRef RPath = getPayloadString(LC);
163 if (RPathsToRemove.count(RPath)) {
164 RPathsToRemove.erase(RPath);
165 return true;
166 }
167 }
168 return false;
169 };
170
171 if (Error E = Obj.removeLoadCommands(RemovePred))
172 return E;
173
174 // Emit an error if the Mach-O binary does not contain an rpath path name
175 // specified in -delete_rpath.
176 for (StringRef RPath : MachOConfig.RPathsToRemove) {
177 if (RPathsToRemove.count(RPath))
178 return createStringError(errc::invalid_argument,
179 "no LC_RPATH load command with path: %s",
180 RPath.str().c_str());
181 }
182
183 DenseSet<StringRef> RPaths;
184
185 // Get all existing RPaths.
186 for (LoadCommand &LC : Obj.LoadCommands) {
187 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
188 RPaths.insert(getPayloadString(LC));
189 }
190
191 // Throw errors for invalid RPaths.
192 for (const auto &OldNew : MachOConfig.RPathsToUpdate) {
193 StringRef Old = OldNew.getFirst();
194 StringRef New = OldNew.getSecond();
195 if (!RPaths.contains(Old))
196 return createStringError(errc::invalid_argument,
197 "no LC_RPATH load command with path: " + Old);
198 if (RPaths.contains(New))
199 return createStringError(errc::invalid_argument,
200 "rpath '" + New +
201 "' would create a duplicate load command");
202 }
203
204 // Update load commands.
205 for (LoadCommand &LC : Obj.LoadCommands) {
206 switch (LC.MachOLoadCommand.load_command_data.cmd) {
207 case MachO::LC_ID_DYLIB:
208 if (MachOConfig.SharedLibId)
209 updateLoadCommandPayloadString<MachO::dylib_command>(
210 LC, *MachOConfig.SharedLibId);
211 break;
212
213 case MachO::LC_RPATH: {
214 StringRef RPath = getPayloadString(LC);
215 StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath);
216 if (!NewRPath.empty())
217 updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
218 break;
219 }
220
221 // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
222 // here once llvm-objcopy supports them.
223 case MachO::LC_LOAD_DYLIB:
224 case MachO::LC_LOAD_WEAK_DYLIB:
225 StringRef InstallName = getPayloadString(LC);
226 StringRef NewInstallName =
227 MachOConfig.InstallNamesToUpdate.lookup(InstallName);
228 if (!NewInstallName.empty())
229 updateLoadCommandPayloadString<MachO::dylib_command>(LC,
230 NewInstallName);
231 break;
232 }
233 }
234
235 // Add new RPaths.
236 for (StringRef RPath : MachOConfig.RPathToAdd) {
237 if (RPaths.contains(RPath))
238 return createStringError(errc::invalid_argument,
239 "rpath '" + RPath +
240 "' would create a duplicate load command");
241 RPaths.insert(RPath);
242 Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
243 }
244
245 for (StringRef RPath : MachOConfig.RPathToPrepend) {
246 if (RPaths.contains(RPath))
247 return createStringError(errc::invalid_argument,
248 "rpath '" + RPath +
249 "' would create a duplicate load command");
250
251 RPaths.insert(RPath);
252 Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
253 buildRPathLoadCommand(RPath));
254 }
255
256 // Unlike appending rpaths, the indexes of subsequent load commands must
257 // be recalculated after prepending one.
258 if (!MachOConfig.RPathToPrepend.empty())
259 Obj.updateLoadCommandIndexes();
260
261 // Remove any empty segments if required.
262 if (!MachOConfig.EmptySegmentsToRemove.empty()) {
263 auto RemovePred = [&MachOConfig](const LoadCommand &LC) {
264 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 ||
265 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) {
266 return LC.Sections.empty() &&
267 MachOConfig.EmptySegmentsToRemove.contains(*LC.getSegmentName());
268 }
269 return false;
270 };
271 if (Error E = Obj.removeLoadCommands(RemovePred))
272 return E;
273 }
274
275 return Error::success();
276 }
277
dumpSectionToFile(StringRef SecName,StringRef Filename,Object & Obj)278 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
279 Object &Obj) {
280 for (LoadCommand &LC : Obj.LoadCommands)
281 for (const std::unique_ptr<Section> &Sec : LC.Sections) {
282 if (Sec->CanonicalName == SecName) {
283 Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
284 FileOutputBuffer::create(Filename, Sec->Content.size());
285 if (!BufferOrErr)
286 return BufferOrErr.takeError();
287 std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
288 llvm::copy(Sec->Content, Buf->getBufferStart());
289
290 if (Error E = Buf->commit())
291 return E;
292 return Error::success();
293 }
294 }
295
296 return createStringError(object_error::parse_failed, "section '%s' not found",
297 SecName.str().c_str());
298 }
299
addSection(const NewSectionInfo & NewSection,Object & Obj)300 static Error addSection(const NewSectionInfo &NewSection, Object &Obj) {
301 std::pair<StringRef, StringRef> Pair = NewSection.SectionName.split(',');
302 StringRef TargetSegName = Pair.first;
303 Section Sec(TargetSegName, Pair.second);
304 Sec.Content =
305 Obj.NewSectionsContents.save(NewSection.SectionData->getBuffer());
306 Sec.Size = Sec.Content.size();
307
308 // Add the a section into an existing segment.
309 for (LoadCommand &LC : Obj.LoadCommands) {
310 std::optional<StringRef> SegName = LC.getSegmentName();
311 if (SegName && SegName == TargetSegName) {
312 uint64_t Addr = *LC.getSegmentVMAddr();
313 for (const std::unique_ptr<Section> &S : LC.Sections)
314 Addr = std::max(Addr, S->Addr + S->Size);
315 LC.Sections.push_back(std::make_unique<Section>(Sec));
316 LC.Sections.back()->Addr = Addr;
317 return Error::success();
318 }
319 }
320
321 // There's no segment named TargetSegName. Create a new load command and
322 // Insert a new section into it.
323 LoadCommand &NewSegment =
324 Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
325 NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
326 NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
327 return Error::success();
328 }
329
findSection(StringRef SecName,Object & O)330 static Expected<Section &> findSection(StringRef SecName, Object &O) {
331 StringRef SegName;
332 std::tie(SegName, SecName) = SecName.split(",");
333 auto FoundSeg =
334 llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) {
335 return LC.getSegmentName() == SegName;
336 });
337 if (FoundSeg == O.LoadCommands.end())
338 return createStringError(errc::invalid_argument,
339 "could not find segment with name '%s'",
340 SegName.str().c_str());
341 auto FoundSec = llvm::find_if(FoundSeg->Sections,
342 [SecName](const std::unique_ptr<Section> &Sec) {
343 return Sec->Sectname == SecName;
344 });
345 if (FoundSec == FoundSeg->Sections.end())
346 return createStringError(errc::invalid_argument,
347 "could not find section with name '%s'",
348 SecName.str().c_str());
349
350 assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str());
351 return **FoundSec;
352 }
353
updateSection(const NewSectionInfo & NewSection,Object & O)354 static Error updateSection(const NewSectionInfo &NewSection, Object &O) {
355 Expected<Section &> SecToUpdateOrErr = findSection(NewSection.SectionName, O);
356
357 if (!SecToUpdateOrErr)
358 return SecToUpdateOrErr.takeError();
359 Section &Sec = *SecToUpdateOrErr;
360
361 if (NewSection.SectionData->getBufferSize() > Sec.Size)
362 return createStringError(
363 errc::invalid_argument,
364 "new section cannot be larger than previous section");
365 Sec.Content = O.NewSectionsContents.save(NewSection.SectionData->getBuffer());
366 Sec.Size = Sec.Content.size();
367 return Error::success();
368 }
369
370 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
371 // ("<segment>,<section>") and lengths of both segment and section names are
372 // valid.
isValidMachOCannonicalName(StringRef Name)373 static Error isValidMachOCannonicalName(StringRef Name) {
374 if (Name.count(',') != 1)
375 return createStringError(errc::invalid_argument,
376 "invalid section name '%s' (should be formatted "
377 "as '<segment name>,<section name>')",
378 Name.str().c_str());
379
380 std::pair<StringRef, StringRef> Pair = Name.split(',');
381 if (Pair.first.size() > 16)
382 return createStringError(errc::invalid_argument,
383 "too long segment name: '%s'",
384 Pair.first.str().c_str());
385 if (Pair.second.size() > 16)
386 return createStringError(errc::invalid_argument,
387 "too long section name: '%s'",
388 Pair.second.str().c_str());
389 return Error::success();
390 }
391
handleArgs(const CommonConfig & Config,const MachOConfig & MachOConfig,Object & Obj)392 static Error handleArgs(const CommonConfig &Config,
393 const MachOConfig &MachOConfig, Object &Obj) {
394 // Dump sections before add/remove for compatibility with GNU objcopy.
395 for (StringRef Flag : Config.DumpSection) {
396 StringRef SectionName;
397 StringRef FileName;
398 std::tie(SectionName, FileName) = Flag.split('=');
399 if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
400 return E;
401 }
402
403 if (Error E = removeSections(Config, Obj))
404 return E;
405
406 // Mark symbols to determine which symbols are still needed.
407 if (Config.StripAll)
408 markSymbols(Config, Obj);
409
410 updateAndRemoveSymbols(Config, MachOConfig, Obj);
411
412 if (Config.StripAll)
413 for (LoadCommand &LC : Obj.LoadCommands)
414 for (std::unique_ptr<Section> &Sec : LC.Sections)
415 Sec->Relocations.clear();
416
417 for (const NewSectionInfo &NewSection : Config.AddSection) {
418 if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
419 return E;
420 if (Error E = addSection(NewSection, Obj))
421 return E;
422 }
423
424 for (const NewSectionInfo &NewSection : Config.UpdateSection) {
425 if (Error E = isValidMachOCannonicalName(NewSection.SectionName))
426 return E;
427 if (Error E = updateSection(NewSection, Obj))
428 return E;
429 }
430
431 if (Error E = processLoadCommands(MachOConfig, Obj))
432 return E;
433
434 return Error::success();
435 }
436
executeObjcopyOnBinary(const CommonConfig & Config,const MachOConfig & MachOConfig,object::MachOObjectFile & In,raw_ostream & Out)437 Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
438 const MachOConfig &MachOConfig,
439 object::MachOObjectFile &In,
440 raw_ostream &Out) {
441 MachOReader Reader(In);
442 Expected<std::unique_ptr<Object>> O = Reader.create();
443 if (!O)
444 return createFileError(Config.InputFilename, O.takeError());
445
446 if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD)
447 return createStringError(std::errc::not_supported,
448 "%s: MH_PRELOAD files are not supported",
449 Config.InputFilename.str().c_str());
450
451 if (Error E = handleArgs(Config, MachOConfig, **O))
452 return createFileError(Config.InputFilename, std::move(E));
453
454 // Page size used for alignment of segment sizes in Mach-O executables and
455 // dynamic libraries.
456 uint64_t PageSize;
457 switch (In.getArch()) {
458 case Triple::ArchType::arm:
459 case Triple::ArchType::aarch64:
460 case Triple::ArchType::aarch64_32:
461 PageSize = 16384;
462 break;
463 default:
464 PageSize = 4096;
465 }
466
467 MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(),
468 sys::path::filename(Config.OutputFilename), PageSize, Out);
469 if (auto E = Writer.finalize())
470 return E;
471 return Writer.write();
472 }
473
executeObjcopyOnMachOUniversalBinary(const MultiFormatConfig & Config,const MachOUniversalBinary & In,raw_ostream & Out)474 Error objcopy::macho::executeObjcopyOnMachOUniversalBinary(
475 const MultiFormatConfig &Config, const MachOUniversalBinary &In,
476 raw_ostream &Out) {
477 SmallVector<OwningBinary<Binary>, 2> Binaries;
478 SmallVector<Slice, 2> Slices;
479 for (const auto &O : In.objects()) {
480 Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
481 if (ArOrErr) {
482 Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
483 createNewArchiveMembers(Config, **ArOrErr);
484 if (!NewArchiveMembersOrErr)
485 return NewArchiveMembersOrErr.takeError();
486 auto Kind = (*ArOrErr)->kind();
487 if (Kind == object::Archive::K_BSD)
488 Kind = object::Archive::K_DARWIN;
489 Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
490 writeArchiveToBuffer(*NewArchiveMembersOrErr,
491 (*ArOrErr)->hasSymbolTable(), Kind,
492 Config.getCommonConfig().DeterministicArchives,
493 (*ArOrErr)->isThin());
494 if (!OutputBufferOrErr)
495 return OutputBufferOrErr.takeError();
496 Expected<std::unique_ptr<Binary>> BinaryOrErr =
497 object::createBinary(**OutputBufferOrErr);
498 if (!BinaryOrErr)
499 return BinaryOrErr.takeError();
500 Binaries.emplace_back(std::move(*BinaryOrErr),
501 std::move(*OutputBufferOrErr));
502 Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
503 O.getCPUType(), O.getCPUSubType(),
504 O.getArchFlagName(), O.getAlign());
505 continue;
506 }
507 // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
508 // ObjectForArch return an Error in case of the type mismatch. We need to
509 // check each in turn to see what kind of slice this is, so ignore errors
510 // produced along the way.
511 consumeError(ArOrErr.takeError());
512
513 Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
514 if (!ObjOrErr) {
515 consumeError(ObjOrErr.takeError());
516 return createStringError(
517 std::errc::invalid_argument,
518 "slice for '%s' of the universal Mach-O binary "
519 "'%s' is not a Mach-O object or an archive",
520 O.getArchFlagName().c_str(),
521 Config.getCommonConfig().InputFilename.str().c_str());
522 }
523 std::string ArchFlagName = O.getArchFlagName();
524
525 SmallVector<char, 0> Buffer;
526 raw_svector_ostream MemStream(Buffer);
527
528 Expected<const MachOConfig &> MachO = Config.getMachOConfig();
529 if (!MachO)
530 return MachO.takeError();
531
532 if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO,
533 **ObjOrErr, MemStream))
534 return E;
535
536 auto MB = std::make_unique<SmallVectorMemoryBuffer>(
537 std::move(Buffer), ArchFlagName, /*RequiresNullTerminator=*/false);
538 Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB);
539 if (!BinaryOrErr)
540 return BinaryOrErr.takeError();
541 Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB));
542 Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
543 O.getAlign());
544 }
545
546 if (Error Err = writeUniversalBinaryToStream(Slices, Out))
547 return Err;
548
549 return Error::success();
550 }
551