xref: /netbsd-src/external/apache2/llvm/dist/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOObjcopy.h"
10 #include "../llvm-objcopy.h"
11 #include "CommonConfig.h"
12 #include "MachOReader.h"
13 #include "MachOWriter.h"
14 #include "MultiFormatConfig.h"
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/Object/ArchiveWriter.h"
17 #include "llvm/Object/MachOUniversal.h"
18 #include "llvm/Object/MachOUniversalWriter.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/Error.h"
21 #include "llvm/Support/FileOutputBuffer.h"
22 #include "llvm/Support/SmallVectorMemoryBuffer.h"
23 
24 namespace llvm {
25 namespace objcopy {
26 namespace macho {
27 
28 using namespace object;
29 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
30 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
31 
32 #ifndef NDEBUG
isLoadCommandWithPayloadString(const LoadCommand & LC)33 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
34   // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
35   // LC_LAZY_LOAD_DYLIB
36   return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
37          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
38          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
39          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
40 }
41 #endif
42 
getPayloadString(const LoadCommand & LC)43 static StringRef getPayloadString(const LoadCommand &LC) {
44   assert(isLoadCommandWithPayloadString(LC) &&
45          "unsupported load command encountered");
46 
47   return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
48                    LC.Payload.size())
49       .rtrim('\0');
50 }
51 
removeSections(const CommonConfig & Config,Object & Obj)52 static Error removeSections(const CommonConfig &Config, Object &Obj) {
53   SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
54     return false;
55   };
56 
57   if (!Config.ToRemove.empty()) {
58     RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
59       return Config.ToRemove.matches(Sec->CanonicalName);
60     };
61   }
62 
63   if (Config.StripAll || Config.StripDebug) {
64     // Remove all debug sections.
65     RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
66       if (Sec->Segname == "__DWARF")
67         return true;
68 
69       return RemovePred(Sec);
70     };
71   }
72 
73   if (!Config.OnlySection.empty()) {
74     // Overwrite RemovePred because --only-section takes priority.
75     RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
76       return !Config.OnlySection.matches(Sec->CanonicalName);
77     };
78   }
79 
80   return Obj.removeSections(RemovePred);
81 }
82 
markSymbols(const CommonConfig &,Object & Obj)83 static void markSymbols(const CommonConfig &, Object &Obj) {
84   // Symbols referenced from the indirect symbol table must not be removed.
85   for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
86     if (ISE.Symbol)
87       (*ISE.Symbol)->Referenced = true;
88 }
89 
updateAndRemoveSymbols(const CommonConfig & Config,Object & Obj)90 static void updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
91   for (SymbolEntry &Sym : Obj.SymTable) {
92     auto I = Config.SymbolsToRename.find(Sym.Name);
93     if (I != Config.SymbolsToRename.end())
94       Sym.Name = std::string(I->getValue());
95   }
96 
97   auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) {
98     if (N->Referenced)
99       return false;
100     if (Config.KeepUndefined && N->isUndefinedSymbol())
101       return false;
102     if (Config.StripAll)
103       return true;
104     if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
105       return true;
106     // This behavior is consistent with cctools' strip.
107     if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) &&
108         Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol())
109       return true;
110     return false;
111   };
112 
113   Obj.SymTable.removeSymbols(RemovePred);
114 }
115 
116 template <typename LCType>
updateLoadCommandPayloadString(LoadCommand & LC,StringRef S)117 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
118   assert(isLoadCommandWithPayloadString(LC) &&
119          "unsupported load command encountered");
120 
121   uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
122 
123   LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
124   LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
125   std::copy(S.begin(), S.end(), LC.Payload.begin());
126 }
127 
buildRPathLoadCommand(StringRef Path)128 static LoadCommand buildRPathLoadCommand(StringRef Path) {
129   LoadCommand LC;
130   MachO::rpath_command RPathLC;
131   RPathLC.cmd = MachO::LC_RPATH;
132   RPathLC.path = sizeof(MachO::rpath_command);
133   RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
134   LC.MachOLoadCommand.rpath_command_data = RPathLC;
135   LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
136   std::copy(Path.begin(), Path.end(), LC.Payload.begin());
137   return LC;
138 }
139 
processLoadCommands(const CommonConfig & Config,Object & Obj)140 static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
141   // Remove RPaths.
142   DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
143                                      Config.RPathsToRemove.end());
144 
145   LoadCommandPred RemovePred = [&RPathsToRemove,
146                                 &Config](const LoadCommand &LC) {
147     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
148       // When removing all RPaths we don't need to care
149       // about what it contains
150       if (Config.RemoveAllRpaths)
151         return true;
152 
153       StringRef RPath = getPayloadString(LC);
154       if (RPathsToRemove.count(RPath)) {
155         RPathsToRemove.erase(RPath);
156         return true;
157       }
158     }
159     return false;
160   };
161 
162   if (Error E = Obj.removeLoadCommands(RemovePred))
163     return E;
164 
165   // Emit an error if the Mach-O binary does not contain an rpath path name
166   // specified in -delete_rpath.
167   for (StringRef RPath : Config.RPathsToRemove) {
168     if (RPathsToRemove.count(RPath))
169       return createStringError(errc::invalid_argument,
170                                "no LC_RPATH load command with path: %s",
171                                RPath.str().c_str());
172   }
173 
174   DenseSet<StringRef> RPaths;
175 
176   // Get all existing RPaths.
177   for (LoadCommand &LC : Obj.LoadCommands) {
178     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
179       RPaths.insert(getPayloadString(LC));
180   }
181 
182   // Throw errors for invalid RPaths.
183   for (const auto &OldNew : Config.RPathsToUpdate) {
184     StringRef Old = OldNew.getFirst();
185     StringRef New = OldNew.getSecond();
186     if (!RPaths.contains(Old))
187       return createStringError(errc::invalid_argument,
188                                "no LC_RPATH load command with path: " + Old);
189     if (RPaths.contains(New))
190       return createStringError(errc::invalid_argument,
191                                "rpath '" + New +
192                                    "' would create a duplicate load command");
193   }
194 
195   // Update load commands.
196   for (LoadCommand &LC : Obj.LoadCommands) {
197     switch (LC.MachOLoadCommand.load_command_data.cmd) {
198     case MachO::LC_ID_DYLIB:
199       if (Config.SharedLibId)
200         updateLoadCommandPayloadString<MachO::dylib_command>(
201             LC, *Config.SharedLibId);
202       break;
203 
204     case MachO::LC_RPATH: {
205       StringRef RPath = getPayloadString(LC);
206       StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath);
207       if (!NewRPath.empty())
208         updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
209       break;
210     }
211 
212     // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
213     // here once llvm-objcopy supports them.
214     case MachO::LC_LOAD_DYLIB:
215     case MachO::LC_LOAD_WEAK_DYLIB:
216       StringRef InstallName = getPayloadString(LC);
217       StringRef NewInstallName =
218           Config.InstallNamesToUpdate.lookup(InstallName);
219       if (!NewInstallName.empty())
220         updateLoadCommandPayloadString<MachO::dylib_command>(LC,
221                                                              NewInstallName);
222       break;
223     }
224   }
225 
226   // Add new RPaths.
227   for (StringRef RPath : Config.RPathToAdd) {
228     if (RPaths.contains(RPath))
229       return createStringError(errc::invalid_argument,
230                                "rpath '" + RPath +
231                                    "' would create a duplicate load command");
232     RPaths.insert(RPath);
233     Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
234   }
235 
236   for (StringRef RPath : Config.RPathToPrepend) {
237     if (RPaths.contains(RPath))
238       return createStringError(errc::invalid_argument,
239                                "rpath '" + RPath +
240                                    "' would create a duplicate load command");
241 
242     RPaths.insert(RPath);
243     Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
244                             buildRPathLoadCommand(RPath));
245   }
246 
247   // Unlike appending rpaths, the indexes of subsequent load commands must
248   // be recalculated after prepending one.
249   if (!Config.RPathToPrepend.empty())
250     Obj.updateLoadCommandIndexes();
251 
252   return Error::success();
253 }
254 
dumpSectionToFile(StringRef SecName,StringRef Filename,Object & Obj)255 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
256                                Object &Obj) {
257   for (LoadCommand &LC : Obj.LoadCommands)
258     for (const std::unique_ptr<Section> &Sec : LC.Sections) {
259       if (Sec->CanonicalName == SecName) {
260         Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
261             FileOutputBuffer::create(Filename, Sec->Content.size());
262         if (!BufferOrErr)
263           return BufferOrErr.takeError();
264         std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
265         llvm::copy(Sec->Content, Buf->getBufferStart());
266 
267         if (Error E = Buf->commit())
268           return E;
269         return Error::success();
270       }
271     }
272 
273   return createStringError(object_error::parse_failed, "section '%s' not found",
274                            SecName.str().c_str());
275 }
276 
addSection(StringRef SecName,StringRef Filename,Object & Obj)277 static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
278   ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
279       MemoryBuffer::getFile(Filename);
280   if (!BufOrErr)
281     return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
282   std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
283 
284   std::pair<StringRef, StringRef> Pair = SecName.split(',');
285   StringRef TargetSegName = Pair.first;
286   Section Sec(TargetSegName, Pair.second);
287   Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer());
288   Sec.Size = Sec.Content.size();
289 
290   // Add the a section into an existing segment.
291   for (LoadCommand &LC : Obj.LoadCommands) {
292     Optional<StringRef> SegName = LC.getSegmentName();
293     if (SegName && SegName == TargetSegName) {
294       uint64_t Addr = *LC.getSegmentVMAddr();
295       for (const std::unique_ptr<Section> &S : LC.Sections)
296         Addr = std::max(Addr, S->Addr + S->Size);
297       LC.Sections.push_back(std::make_unique<Section>(Sec));
298       LC.Sections.back()->Addr = Addr;
299       return Error::success();
300     }
301   }
302 
303   // There's no segment named TargetSegName. Create a new load command and
304   // Insert a new section into it.
305   LoadCommand &NewSegment =
306       Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
307   NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
308   NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
309   return Error::success();
310 }
311 
312 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
313 // ("<segment>,<section>") and lengths of both segment and section names are
314 // valid.
isValidMachOCannonicalName(StringRef Name)315 static Error isValidMachOCannonicalName(StringRef Name) {
316   if (Name.count(',') != 1)
317     return createStringError(errc::invalid_argument,
318                              "invalid section name '%s' (should be formatted "
319                              "as '<segment name>,<section name>')",
320                              Name.str().c_str());
321 
322   std::pair<StringRef, StringRef> Pair = Name.split(',');
323   if (Pair.first.size() > 16)
324     return createStringError(errc::invalid_argument,
325                              "too long segment name: '%s'",
326                              Pair.first.str().c_str());
327   if (Pair.second.size() > 16)
328     return createStringError(errc::invalid_argument,
329                              "too long section name: '%s'",
330                              Pair.second.str().c_str());
331   return Error::success();
332 }
333 
handleArgs(const CommonConfig & Config,Object & Obj)334 static Error handleArgs(const CommonConfig &Config, Object &Obj) {
335   // Dump sections before add/remove for compatibility with GNU objcopy.
336   for (StringRef Flag : Config.DumpSection) {
337     StringRef SectionName;
338     StringRef FileName;
339     std::tie(SectionName, FileName) = Flag.split('=');
340     if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
341       return E;
342   }
343 
344   if (Error E = removeSections(Config, Obj))
345     return E;
346 
347   // Mark symbols to determine which symbols are still needed.
348   if (Config.StripAll)
349     markSymbols(Config, Obj);
350 
351   updateAndRemoveSymbols(Config, Obj);
352 
353   if (Config.StripAll)
354     for (LoadCommand &LC : Obj.LoadCommands)
355       for (std::unique_ptr<Section> &Sec : LC.Sections)
356         Sec->Relocations.clear();
357 
358   for (const auto &Flag : Config.AddSection) {
359     std::pair<StringRef, StringRef> SecPair = Flag.split("=");
360     StringRef SecName = SecPair.first;
361     StringRef File = SecPair.second;
362     if (Error E = isValidMachOCannonicalName(SecName))
363       return E;
364     if (Error E = addSection(SecName, File, Obj))
365       return E;
366   }
367 
368   if (Error E = processLoadCommands(Config, Obj))
369     return E;
370 
371   return Error::success();
372 }
373 
executeObjcopyOnBinary(const CommonConfig & Config,const MachOConfig &,object::MachOObjectFile & In,raw_ostream & Out)374 Error executeObjcopyOnBinary(const CommonConfig &Config, const MachOConfig &,
375                              object::MachOObjectFile &In, raw_ostream &Out) {
376   MachOReader Reader(In);
377   Expected<std::unique_ptr<Object>> O = Reader.create();
378   if (!O)
379     return createFileError(Config.InputFilename, O.takeError());
380 
381   if (Error E = handleArgs(Config, **O))
382     return createFileError(Config.InputFilename, std::move(E));
383 
384   // Page size used for alignment of segment sizes in Mach-O executables and
385   // dynamic libraries.
386   uint64_t PageSize;
387   switch (In.getArch()) {
388   case Triple::ArchType::arm:
389   case Triple::ArchType::aarch64:
390   case Triple::ArchType::aarch64_32:
391     PageSize = 16384;
392     break;
393   default:
394     PageSize = 4096;
395   }
396 
397   MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
398   if (auto E = Writer.finalize())
399     return E;
400   return Writer.write();
401 }
402 
executeObjcopyOnMachOUniversalBinary(const MultiFormatConfig & Config,const MachOUniversalBinary & In,raw_ostream & Out)403 Error executeObjcopyOnMachOUniversalBinary(const MultiFormatConfig &Config,
404                                            const MachOUniversalBinary &In,
405                                            raw_ostream &Out) {
406   SmallVector<OwningBinary<Binary>, 2> Binaries;
407   SmallVector<Slice, 2> Slices;
408   for (const auto &O : In.objects()) {
409     Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
410     if (ArOrErr) {
411       Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
412           createNewArchiveMembers(Config, **ArOrErr);
413       if (!NewArchiveMembersOrErr)
414         return NewArchiveMembersOrErr.takeError();
415       Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
416           writeArchiveToBuffer(*NewArchiveMembersOrErr,
417                                (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(),
418                                Config.getCommonConfig().DeterministicArchives,
419                                (*ArOrErr)->isThin());
420       if (!OutputBufferOrErr)
421         return OutputBufferOrErr.takeError();
422       Expected<std::unique_ptr<Binary>> BinaryOrErr =
423           object::createBinary(**OutputBufferOrErr);
424       if (!BinaryOrErr)
425         return BinaryOrErr.takeError();
426       Binaries.emplace_back(std::move(*BinaryOrErr),
427                             std::move(*OutputBufferOrErr));
428       Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
429                           O.getCPUType(), O.getCPUSubType(),
430                           O.getArchFlagName(), O.getAlign());
431       continue;
432     }
433     // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
434     // ObjectForArch return an Error in case of the type mismatch. We need to
435     // check each in turn to see what kind of slice this is, so ignore errors
436     // produced along the way.
437     consumeError(ArOrErr.takeError());
438 
439     Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
440     if (!ObjOrErr) {
441       consumeError(ObjOrErr.takeError());
442       return createStringError(
443           std::errc::invalid_argument,
444           "slice for '%s' of the universal Mach-O binary "
445           "'%s' is not a Mach-O object or an archive",
446           O.getArchFlagName().c_str(),
447           Config.getCommonConfig().InputFilename.str().c_str());
448     }
449     std::string ArchFlagName = O.getArchFlagName();
450 
451     SmallVector<char, 0> Buffer;
452     raw_svector_ostream MemStream(Buffer);
453 
454     Expected<const MachOConfig &> MachO = Config.getMachOConfig();
455     if (!MachO)
456       return MachO.takeError();
457 
458     if (Error E = executeObjcopyOnBinary(Config.getCommonConfig(), *MachO,
459                                          **ObjOrErr, MemStream))
460       return E;
461 
462     std::unique_ptr<MemoryBuffer> MB =
463         std::make_unique<SmallVectorMemoryBuffer>(std::move(Buffer),
464                                                   ArchFlagName);
465     Expected<std::unique_ptr<Binary>> BinaryOrErr = object::createBinary(*MB);
466     if (!BinaryOrErr)
467       return BinaryOrErr.takeError();
468     Binaries.emplace_back(std::move(*BinaryOrErr), std::move(MB));
469     Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
470                         O.getAlign());
471   }
472 
473   if (Error Err = writeUniversalBinaryToStream(Slices, Out))
474     return Err;
475 
476   return Error::success();
477 }
478 
479 } // end namespace macho
480 } // end namespace objcopy
481 } // end namespace llvm
482