xref: /llvm-project/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp (revision 0d352b2ea767e043b47d78bfdbd6820356628314)
1 //===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This tool works as a wrapper over a linking job. This tool is used to create
10 // linked device images for offloading. It scans the linker's input for embedded
11 // device offloading data stored in sections `.llvm.offloading` and extracts it
12 // as a temporary file. The extracted device files will then be passed to a
13 // device linking job to create a final device image.
14 //
15 //===---------------------------------------------------------------------===//
16 
17 #include "clang/Basic/TargetID.h"
18 #include "clang/Basic/Version.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/BinaryFormat/Magic.h"
21 #include "llvm/Bitcode/BitcodeWriter.h"
22 #include "llvm/CodeGen/CommandFlags.h"
23 #include "llvm/Frontend/Offloading/OffloadWrapper.h"
24 #include "llvm/Frontend/Offloading/Utility.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/DiagnosticPrinter.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IRReader/IRReader.h"
29 #include "llvm/LTO/LTO.h"
30 #include "llvm/MC/TargetRegistry.h"
31 #include "llvm/Object/Archive.h"
32 #include "llvm/Object/ArchiveWriter.h"
33 #include "llvm/Object/Binary.h"
34 #include "llvm/Object/ELFObjectFile.h"
35 #include "llvm/Object/IRObjectFile.h"
36 #include "llvm/Object/ObjectFile.h"
37 #include "llvm/Object/OffloadBinary.h"
38 #include "llvm/Option/ArgList.h"
39 #include "llvm/Option/OptTable.h"
40 #include "llvm/Option/Option.h"
41 #include "llvm/Passes/PassPlugin.h"
42 #include "llvm/Remarks/HotnessThresholdParser.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Errc.h"
45 #include "llvm/Support/FileOutputBuffer.h"
46 #include "llvm/Support/FileSystem.h"
47 #include "llvm/Support/InitLLVM.h"
48 #include "llvm/Support/MemoryBuffer.h"
49 #include "llvm/Support/Parallel.h"
50 #include "llvm/Support/Path.h"
51 #include "llvm/Support/Program.h"
52 #include "llvm/Support/Signals.h"
53 #include "llvm/Support/SourceMgr.h"
54 #include "llvm/Support/StringSaver.h"
55 #include "llvm/Support/TargetSelect.h"
56 #include "llvm/Support/TimeProfiler.h"
57 #include "llvm/Support/WithColor.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/Target/TargetMachine.h"
60 #include "llvm/TargetParser/Host.h"
61 #include <atomic>
62 #include <optional>
63 
64 using namespace llvm;
65 using namespace llvm::opt;
66 using namespace llvm::object;
67 
68 // Various tools (e.g., llc and opt) duplicate this series of declarations for
69 // options related to passes and remarks.
70 
71 static cl::opt<bool> RemarksWithHotness(
72     "pass-remarks-with-hotness",
73     cl::desc("With PGO, include profile count in optimization remarks"),
74     cl::Hidden);
75 
76 static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser>
77     RemarksHotnessThreshold(
78         "pass-remarks-hotness-threshold",
79         cl::desc("Minimum profile count required for "
80                  "an optimization remark to be output. "
81                  "Use 'auto' to apply the threshold from profile summary."),
82         cl::value_desc("N or 'auto'"), cl::init(0), cl::Hidden);
83 
84 static cl::opt<std::string>
85     RemarksFilename("pass-remarks-output",
86                     cl::desc("Output filename for pass remarks"),
87                     cl::value_desc("filename"));
88 
89 static cl::opt<std::string>
90     RemarksPasses("pass-remarks-filter",
91                   cl::desc("Only record optimization remarks from passes whose "
92                            "names match the given regular expression"),
93                   cl::value_desc("regex"));
94 
95 static cl::opt<std::string> RemarksFormat(
96     "pass-remarks-format",
97     cl::desc("The format used for serializing remarks (default: YAML)"),
98     cl::value_desc("format"), cl::init("yaml"));
99 
100 static cl::list<std::string>
101     PassPlugins("load-pass-plugin",
102                 cl::desc("Load passes from plugin library"));
103 
104 static cl::opt<std::string> PassPipeline(
105     "passes",
106     cl::desc(
107         "A textual description of the pass pipeline. To have analysis passes "
108         "available before a certain pass, add 'require<foo-analysis>'. "
109         "'-passes' overrides the pass pipeline (but not all effects) from "
110         "specifying '--opt-level=O?' (O2 is the default) to "
111         "clang-linker-wrapper.  Be sure to include the corresponding "
112         "'default<O?>' in '-passes'."));
113 static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline),
114                                cl::desc("Alias for -passes"));
115 
116 /// Path of the current binary.
117 static const char *LinkerExecutable;
118 
119 /// Ssave intermediary results.
120 static bool SaveTemps = false;
121 
122 /// Print arguments without executing.
123 static bool DryRun = false;
124 
125 /// Print verbose output.
126 static bool Verbose = false;
127 
128 /// Filename of the executable being created.
129 static StringRef ExecutableName;
130 
131 /// Binary path for the CUDA installation.
132 static std::string CudaBinaryPath;
133 
134 /// Mutex lock to protect writes to shared TempFiles in parallel.
135 static std::mutex TempFilesMutex;
136 
137 /// Temporary files created by the linker wrapper.
138 static std::list<SmallString<128>> TempFiles;
139 
140 /// Codegen flags for LTO backend.
141 static codegen::RegisterCodeGenFlags CodeGenFlags;
142 
143 using OffloadingImage = OffloadBinary::OffloadingImage;
144 
145 namespace llvm {
146 // Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
147 template <> struct DenseMapInfo<OffloadKind> {
148   static inline OffloadKind getEmptyKey() { return OFK_LAST; }
149   static inline OffloadKind getTombstoneKey() {
150     return static_cast<OffloadKind>(OFK_LAST + 1);
151   }
152   static unsigned getHashValue(const OffloadKind &Val) { return Val; }
153 
154   static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
155     return LHS == RHS;
156   }
157 };
158 } // namespace llvm
159 
160 namespace {
161 using std::error_code;
162 
163 /// Must not overlap with llvm::opt::DriverFlag.
164 enum WrapperFlags {
165   WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper.
166   DeviceOnlyOption = (1 << 5),  // Options only used for device linking.
167 };
168 
169 enum ID {
170   OPT_INVALID = 0, // This is not an option ID.
171 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
172 #include "LinkerWrapperOpts.inc"
173   LastOption
174 #undef OPTION
175 };
176 
177 #define OPTTABLE_STR_TABLE_CODE
178 #include "LinkerWrapperOpts.inc"
179 #undef OPTTABLE_STR_TABLE_CODE
180 
181 #define OPTTABLE_PREFIXES_TABLE_CODE
182 #include "LinkerWrapperOpts.inc"
183 #undef OPTTABLE_PREFIXES_TABLE_CODE
184 
185 static constexpr OptTable::Info InfoTable[] = {
186 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
187 #include "LinkerWrapperOpts.inc"
188 #undef OPTION
189 };
190 
191 class WrapperOptTable : public opt::GenericOptTable {
192 public:
193   WrapperOptTable()
194       : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
195 };
196 
197 const OptTable &getOptTable() {
198   static const WrapperOptTable *Table = []() {
199     auto Result = std::make_unique<WrapperOptTable>();
200     return Result.release();
201   }();
202   return *Table;
203 }
204 
205 void printCommands(ArrayRef<StringRef> CmdArgs) {
206   if (CmdArgs.empty())
207     return;
208 
209   llvm::errs() << " \"" << CmdArgs.front() << "\" ";
210   for (auto IC = std::next(CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
211     llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n");
212 }
213 
214 [[noreturn]] void reportError(Error E) {
215   outs().flush();
216   logAllUnhandledErrors(std::move(E),
217                         WithColor::error(errs(), LinkerExecutable));
218   exit(EXIT_FAILURE);
219 }
220 
221 std::string getMainExecutable(const char *Name) {
222   void *Ptr = (void *)(intptr_t)&getMainExecutable;
223   auto COWPath = sys::fs::getMainExecutable(Name, Ptr);
224   return sys::path::parent_path(COWPath).str();
225 }
226 
227 /// Get a temporary filename suitable for output.
228 Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) {
229   std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex);
230   SmallString<128> OutputFile;
231   if (SaveTemps) {
232     (Prefix + "." + Extension).toNullTerminatedStringRef(OutputFile);
233   } else {
234     if (std::error_code EC =
235             sys::fs::createTemporaryFile(Prefix, Extension, OutputFile))
236       return createFileError(OutputFile, EC);
237   }
238 
239   TempFiles.emplace_back(std::move(OutputFile));
240   return TempFiles.back();
241 }
242 
243 /// Execute the command \p ExecutablePath with the arguments \p Args.
244 Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
245   if (Verbose || DryRun)
246     printCommands(Args);
247 
248   if (!DryRun)
249     if (sys::ExecuteAndWait(ExecutablePath, Args))
250       return createStringError(
251           "'%s' failed", sys::path::filename(ExecutablePath).str().c_str());
252   return Error::success();
253 }
254 
255 Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
256 
257   ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
258   if (!Path)
259     Path = sys::findProgramByName(Name);
260   if (!Path && DryRun)
261     return Name.str();
262   if (!Path)
263     return createStringError(Path.getError(),
264                              "Unable to find '" + Name + "' in path");
265   return *Path;
266 }
267 
268 bool linkerSupportsLTO(const ArgList &Args) {
269   llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
270   return Triple.isNVPTX() || Triple.isAMDGPU() ||
271          Args.getLastArgValue(OPT_linker_path_EQ).ends_with("lld");
272 }
273 
274 /// Returns the hashed value for a constant string.
275 std::string getHash(StringRef Str) {
276   llvm::MD5 Hasher;
277   llvm::MD5::MD5Result Hash;
278   Hasher.update(Str);
279   Hasher.final(Hash);
280   return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
281 }
282 
283 /// Renames offloading entry sections in a relocatable link so they do not
284 /// conflict with a later link job.
285 Error relocateOffloadSection(const ArgList &Args, StringRef Output) {
286   llvm::Triple Triple(
287       Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
288   if (Triple.isOSWindows())
289     return createStringError(
290         "Relocatable linking is not supported on COFF targets");
291 
292   Expected<std::string> ObjcopyPath =
293       findProgram("llvm-objcopy", {getMainExecutable("llvm-objcopy")});
294   if (!ObjcopyPath)
295     return ObjcopyPath.takeError();
296 
297   // Use the linker output file to get a unique hash. This creates a unique
298   // identifier to rename the sections to that is deterministic to the contents.
299   auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer("")
300                             : MemoryBuffer::getFileOrSTDIN(Output);
301   if (!BufferOrErr)
302     return createStringError("Failed to open %s", Output.str().c_str());
303   std::string Suffix = "_" + getHash((*BufferOrErr)->getBuffer());
304 
305   SmallVector<StringRef> ObjcopyArgs = {
306       *ObjcopyPath,
307       Output,
308   };
309 
310   // Remove the old .llvm.offloading section to prevent further linking.
311   ObjcopyArgs.emplace_back("--remove-section");
312   ObjcopyArgs.emplace_back(".llvm.offloading");
313   for (StringRef Prefix : {"omp", "cuda", "hip"}) {
314     auto Section = (Prefix + "_offloading_entries").str();
315     // Rename the offloading entires to make them private to this link unit.
316     ObjcopyArgs.emplace_back("--rename-section");
317     ObjcopyArgs.emplace_back(
318         Args.MakeArgString(Section + "=" + Section + Suffix));
319 
320     // Rename the __start_ / __stop_ symbols appropriately to iterate over the
321     // newly renamed section containing the offloading entries.
322     ObjcopyArgs.emplace_back("--redefine-sym");
323     ObjcopyArgs.emplace_back(Args.MakeArgString("__start_" + Section + "=" +
324                                                 "__start_" + Section + Suffix));
325     ObjcopyArgs.emplace_back("--redefine-sym");
326     ObjcopyArgs.emplace_back(Args.MakeArgString("__stop_" + Section + "=" +
327                                                 "__stop_" + Section + Suffix));
328   }
329 
330   if (Error Err = executeCommands(*ObjcopyPath, ObjcopyArgs))
331     return Err;
332 
333   return Error::success();
334 }
335 
336 /// Runs the wrapped linker job with the newly created input.
337 Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
338   llvm::TimeTraceScope TimeScope("Execute host linker");
339 
340   // Render the linker arguments and add the newly created image. We add it
341   // after the output file to ensure it is linked with the correct libraries.
342   StringRef LinkerPath = Args.getLastArgValue(OPT_linker_path_EQ);
343   if (LinkerPath.empty())
344     return createStringError("linker path missing, must pass 'linker-path'");
345   ArgStringList NewLinkerArgs;
346   for (const opt::Arg *Arg : Args) {
347     // Do not forward arguments only intended for the linker wrapper.
348     if (Arg->getOption().hasFlag(WrapperOnlyOption))
349       continue;
350 
351     Arg->render(Args, NewLinkerArgs);
352     if (Arg->getOption().matches(OPT_o) || Arg->getOption().matches(OPT_out))
353       llvm::transform(Files, std::back_inserter(NewLinkerArgs),
354                       [&](StringRef Arg) { return Args.MakeArgString(Arg); });
355   }
356 
357   SmallVector<StringRef> LinkerArgs({LinkerPath});
358   for (StringRef Arg : NewLinkerArgs)
359     LinkerArgs.push_back(Arg);
360   if (Error Err = executeCommands(LinkerPath, LinkerArgs))
361     return Err;
362 
363   if (Args.hasArg(OPT_relocatable))
364     return relocateOffloadSection(Args, ExecutableName);
365 
366   return Error::success();
367 }
368 
369 void printVersion(raw_ostream &OS) {
370   OS << clang::getClangToolFullVersion("clang-linker-wrapper") << '\n';
371 }
372 
373 namespace nvptx {
374 Expected<StringRef>
375 fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
376           const ArgList &Args) {
377   llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
378   // NVPTX uses the fatbinary program to bundle the linked images.
379   Expected<std::string> FatBinaryPath =
380       findProgram("fatbinary", {CudaBinaryPath + "/bin"});
381   if (!FatBinaryPath)
382     return FatBinaryPath.takeError();
383 
384   llvm::Triple Triple(
385       Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
386 
387   // Create a new file to write the linked device image to.
388   auto TempFileOrErr =
389       createOutputFile(sys::path::filename(ExecutableName), "fatbin");
390   if (!TempFileOrErr)
391     return TempFileOrErr.takeError();
392 
393   SmallVector<StringRef, 16> CmdArgs;
394   CmdArgs.push_back(*FatBinaryPath);
395   CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32");
396   CmdArgs.push_back("--create");
397   CmdArgs.push_back(*TempFileOrErr);
398   for (const auto &[File, Arch] : InputFiles)
399     CmdArgs.push_back(
400         Args.MakeArgString("--image=profile=" + Arch + ",file=" + File));
401 
402   if (Error Err = executeCommands(*FatBinaryPath, CmdArgs))
403     return std::move(Err);
404 
405   return *TempFileOrErr;
406 }
407 } // namespace nvptx
408 
409 namespace amdgcn {
410 Expected<StringRef>
411 fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
412           const ArgList &Args) {
413   llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary");
414 
415   // AMDGPU uses the clang-offload-bundler to bundle the linked images.
416   Expected<std::string> OffloadBundlerPath = findProgram(
417       "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
418   if (!OffloadBundlerPath)
419     return OffloadBundlerPath.takeError();
420 
421   llvm::Triple Triple(
422       Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
423 
424   // Create a new file to write the linked device image to.
425   auto TempFileOrErr =
426       createOutputFile(sys::path::filename(ExecutableName), "hipfb");
427   if (!TempFileOrErr)
428     return TempFileOrErr.takeError();
429 
430   BumpPtrAllocator Alloc;
431   StringSaver Saver(Alloc);
432 
433   SmallVector<StringRef, 16> CmdArgs;
434   CmdArgs.push_back(*OffloadBundlerPath);
435   CmdArgs.push_back("-type=o");
436   CmdArgs.push_back("-bundle-align=4096");
437 
438   if (Args.hasArg(OPT_compress))
439     CmdArgs.push_back("-compress");
440   if (auto *Arg = Args.getLastArg(OPT_compression_level_eq))
441     CmdArgs.push_back(
442         Args.MakeArgString(Twine("-compression-level=") + Arg->getValue()));
443 
444   SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux-gnu"};
445   for (const auto &[File, Arch] : InputFiles)
446     Targets.push_back(Saver.save("hip-amdgcn-amd-amdhsa--" + Arch));
447   CmdArgs.push_back(Saver.save(llvm::join(Targets, ",")));
448 
449 #ifdef _WIN32
450   CmdArgs.push_back("-input=NUL");
451 #else
452   CmdArgs.push_back("-input=/dev/null");
453 #endif
454   for (const auto &[File, Arch] : InputFiles)
455     CmdArgs.push_back(Saver.save("-input=" + File));
456 
457   CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr));
458 
459   if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs))
460     return std::move(Err);
461 
462   return *TempFileOrErr;
463 }
464 } // namespace amdgcn
465 
466 namespace generic {
467 Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
468   llvm::TimeTraceScope TimeScope("Clang");
469   // Use `clang` to invoke the appropriate device tools.
470   Expected<std::string> ClangPath =
471       findProgram("clang", {getMainExecutable("clang")});
472   if (!ClangPath)
473     return ClangPath.takeError();
474 
475   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
476   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
477   if (Arch.empty())
478     Arch = "native";
479   // Create a new file to write the linked device image to. Assume that the
480   // input filename already has the device and architecture.
481   auto TempFileOrErr =
482       createOutputFile(sys::path::filename(ExecutableName) + "." +
483                            Triple.getArchName() + "." + Arch,
484                        "img");
485   if (!TempFileOrErr)
486     return TempFileOrErr.takeError();
487 
488   StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
489   SmallVector<StringRef, 16> CmdArgs{
490       *ClangPath,
491       "--no-default-config",
492       "-o",
493       *TempFileOrErr,
494       Args.MakeArgString("--target=" + Triple.getTriple()),
495       Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch)
496                         : Args.MakeArgString("-march=" + Arch),
497       Args.MakeArgString("-" + OptLevel),
498   };
499 
500   // Forward all of the `--offload-opt` and similar options to the device.
501   CmdArgs.push_back("-flto");
502   for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
503     CmdArgs.append(
504         {"-Xlinker",
505          Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))});
506 
507   if (!Triple.isNVPTX() && !Triple.isSPIRV())
508     CmdArgs.push_back("-Wl,--no-undefined");
509 
510   for (StringRef InputFile : InputFiles)
511     CmdArgs.push_back(InputFile);
512 
513   // If this is CPU offloading we copy the input libraries.
514   if (!Triple.isAMDGPU() && !Triple.isNVPTX() && !Triple.isSPIRV()) {
515     CmdArgs.push_back("-Wl,-Bsymbolic");
516     CmdArgs.push_back("-shared");
517     ArgStringList LinkerArgs;
518     for (const opt::Arg *Arg :
519          Args.filtered(OPT_INPUT, OPT_library, OPT_library_path, OPT_rpath,
520                        OPT_whole_archive, OPT_no_whole_archive)) {
521       // Sometimes needed libraries are passed by name, such as when using
522       // sanitizers. We need to check the file magic for any libraries.
523       if (Arg->getOption().matches(OPT_INPUT)) {
524         if (!sys::fs::exists(Arg->getValue()) ||
525             sys::fs::is_directory(Arg->getValue()))
526           continue;
527 
528         file_magic Magic;
529         if (auto EC = identify_magic(Arg->getValue(), Magic))
530           return createStringError("Failed to open %s", Arg->getValue());
531         if (Magic != file_magic::archive &&
532             Magic != file_magic::elf_shared_object)
533           continue;
534       }
535       if (Arg->getOption().matches(OPT_whole_archive))
536         LinkerArgs.push_back(Args.MakeArgString("-Wl,--whole-archive"));
537       else if (Arg->getOption().matches(OPT_no_whole_archive))
538         LinkerArgs.push_back(Args.MakeArgString("-Wl,--no-whole-archive"));
539       else
540         Arg->render(Args, LinkerArgs);
541     }
542     llvm::copy(LinkerArgs, std::back_inserter(CmdArgs));
543   }
544 
545   // Pass on -mllvm options to the linker invocation.
546   for (const opt::Arg *Arg : Args.filtered(OPT_mllvm))
547     CmdArgs.append({"-Xlinker", Args.MakeArgString(
548                                     "-mllvm=" + StringRef(Arg->getValue()))});
549 
550   if (Args.hasArg(OPT_debug))
551     CmdArgs.push_back("-g");
552 
553   if (SaveTemps)
554     CmdArgs.push_back("-save-temps");
555 
556   if (SaveTemps && linkerSupportsLTO(Args))
557     CmdArgs.push_back("-Wl,--save-temps");
558 
559   if (Args.hasArg(OPT_embed_bitcode))
560     CmdArgs.push_back("-Wl,--lto-emit-llvm");
561 
562   if (Verbose)
563     CmdArgs.push_back("-v");
564 
565   if (!CudaBinaryPath.empty())
566     CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath));
567 
568   for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg))
569     llvm::copy(
570         SmallVector<StringRef>({"-Xcuda-ptxas", Args.MakeArgString(Arg)}),
571         std::back_inserter(CmdArgs));
572 
573   for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ))
574     CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)});
575   for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ))
576     CmdArgs.push_back(Args.MakeArgString(Arg));
577 
578   if (Error Err = executeCommands(*ClangPath, CmdArgs))
579     return std::move(Err);
580 
581   return *TempFileOrErr;
582 }
583 } // namespace generic
584 
585 Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
586                                const ArgList &Args) {
587   const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
588   switch (Triple.getArch()) {
589   case Triple::nvptx:
590   case Triple::nvptx64:
591   case Triple::amdgcn:
592   case Triple::x86:
593   case Triple::x86_64:
594   case Triple::aarch64:
595   case Triple::aarch64_be:
596   case Triple::ppc64:
597   case Triple::ppc64le:
598   case Triple::spirv64:
599   case Triple::systemz:
600   case Triple::loongarch64:
601     return generic::clang(InputFiles, Args);
602   default:
603     return createStringError(Triple.getArchName() +
604                              " linking is not supported");
605   }
606 }
607 
608 Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
609   const OffloadBinary &Binary = *File.getBinary();
610 
611   StringRef Prefix =
612       sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier());
613 
614   auto TempFileOrErr = createOutputFile(
615       Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), "o");
616   if (!TempFileOrErr)
617     return TempFileOrErr.takeError();
618 
619   Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
620       FileOutputBuffer::create(*TempFileOrErr, Binary.getImage().size());
621   if (!OutputOrErr)
622     return OutputOrErr.takeError();
623   std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
624   llvm::copy(Binary.getImage(), Output->getBufferStart());
625   if (Error E = Output->commit())
626     return std::move(E);
627 
628   return *TempFileOrErr;
629 }
630 
631 // Compile the module to an object file using the appropriate target machine for
632 // the host triple.
633 Expected<StringRef> compileModule(Module &M, OffloadKind Kind) {
634   llvm::TimeTraceScope TimeScope("Compile module");
635   std::string Msg;
636   const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
637   if (!T)
638     return createStringError(Msg);
639 
640   auto Options =
641       codegen::InitTargetOptionsFromCodeGenFlags(Triple(M.getTargetTriple()));
642   StringRef CPU = "";
643   StringRef Features = "";
644   std::unique_ptr<TargetMachine> TM(
645       T->createTargetMachine(M.getTargetTriple(), CPU, Features, Options,
646                              Reloc::PIC_, M.getCodeModel()));
647 
648   if (M.getDataLayout().isDefault())
649     M.setDataLayout(TM->createDataLayout());
650 
651   int FD = -1;
652   auto TempFileOrErr =
653       createOutputFile(sys::path::filename(ExecutableName) + "." +
654                            getOffloadKindName(Kind) + ".image.wrapper",
655                        "o");
656   if (!TempFileOrErr)
657     return TempFileOrErr.takeError();
658   if (std::error_code EC = sys::fs::openFileForWrite(*TempFileOrErr, FD))
659     return errorCodeToError(EC);
660 
661   auto OS = std::make_unique<llvm::raw_fd_ostream>(FD, true);
662 
663   legacy::PassManager CodeGenPasses;
664   TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
665   CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII));
666   if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr,
667                               CodeGenFileType::ObjectFile))
668     return createStringError("Failed to execute host backend");
669   CodeGenPasses.run(M);
670 
671   return *TempFileOrErr;
672 }
673 
674 /// Creates the object file containing the device image and runtime
675 /// registration code from the device images stored in \p Images.
676 Expected<StringRef>
677 wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
678                  const ArgList &Args, OffloadKind Kind) {
679   llvm::TimeTraceScope TimeScope("Wrap bundled images");
680 
681   SmallVector<ArrayRef<char>, 4> BuffersToWrap;
682   for (const auto &Buffer : Buffers)
683     BuffersToWrap.emplace_back(
684         ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
685 
686   LLVMContext Context;
687   Module M("offload.wrapper.module", Context);
688   M.setTargetTriple(
689       Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
690 
691   switch (Kind) {
692   case OFK_OpenMP:
693     if (Error Err = offloading::wrapOpenMPBinaries(
694             M, BuffersToWrap,
695             offloading::getOffloadEntryArray(M, "omp_offloading_entries"),
696             /*Suffix=*/"", /*Relocatable=*/Args.hasArg(OPT_relocatable)))
697       return std::move(Err);
698     break;
699   case OFK_Cuda:
700     if (Error Err = offloading::wrapCudaBinary(
701             M, BuffersToWrap.front(),
702             offloading::getOffloadEntryArray(M, "cuda_offloading_entries"),
703             /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
704       return std::move(Err);
705     break;
706   case OFK_HIP:
707     if (Error Err = offloading::wrapHIPBinary(
708             M, BuffersToWrap.front(),
709             offloading::getOffloadEntryArray(M, "hip_offloading_entries")))
710       return std::move(Err);
711     break;
712   default:
713     return createStringError(getOffloadKindName(Kind) +
714                              " wrapping is not supported");
715   }
716 
717   if (Args.hasArg(OPT_print_wrapped_module))
718     errs() << M;
719   if (Args.hasArg(OPT_save_temps)) {
720     int FD = -1;
721     auto TempFileOrErr =
722         createOutputFile(sys::path::filename(ExecutableName) + "." +
723                              getOffloadKindName(Kind) + ".image.wrapper",
724                          "bc");
725     if (!TempFileOrErr)
726       return TempFileOrErr.takeError();
727     if (std::error_code EC = sys::fs::openFileForWrite(*TempFileOrErr, FD))
728       return errorCodeToError(EC);
729     llvm::raw_fd_ostream OS(FD, true);
730     WriteBitcodeToFile(M, OS);
731   }
732 
733   auto FileOrErr = compileModule(M, Kind);
734   if (!FileOrErr)
735     return FileOrErr.takeError();
736   return *FileOrErr;
737 }
738 
739 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
740 bundleOpenMP(ArrayRef<OffloadingImage> Images) {
741   SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
742   for (const OffloadingImage &Image : Images)
743     Buffers.emplace_back(
744         MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image)));
745 
746   return std::move(Buffers);
747 }
748 
749 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
750 bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
751   SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
752   for (const OffloadingImage &Image : Images)
753     InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
754                                            Image.StringData.lookup("arch")));
755 
756   Triple TheTriple = Triple(Images.front().StringData.lookup("triple"));
757   auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
758   if (!FileOrErr)
759     return FileOrErr.takeError();
760 
761   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
762       llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
763 
764   SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
765   if (std::error_code EC = ImageOrError.getError())
766     return createFileError(*FileOrErr, EC);
767   Buffers.emplace_back(std::move(*ImageOrError));
768 
769   return std::move(Buffers);
770 }
771 
772 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
773 bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
774   SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
775   for (const OffloadingImage &Image : Images)
776     InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
777                                            Image.StringData.lookup("arch")));
778 
779   Triple TheTriple = Triple(Images.front().StringData.lookup("triple"));
780   auto FileOrErr = amdgcn::fatbinary(InputFiles, Args);
781   if (!FileOrErr)
782     return FileOrErr.takeError();
783 
784   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
785       llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
786 
787   SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
788   if (std::error_code EC = ImageOrError.getError())
789     return createFileError(*FileOrErr, EC);
790   Buffers.emplace_back(std::move(*ImageOrError));
791 
792   return std::move(Buffers);
793 }
794 
795 /// Transforms the input \p Images into the binary format the runtime expects
796 /// for the given \p Kind.
797 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
798 bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
799                    OffloadKind Kind) {
800   llvm::TimeTraceScope TimeScope("Bundle linked output");
801   switch (Kind) {
802   case OFK_OpenMP:
803     return bundleOpenMP(Images);
804   case OFK_Cuda:
805     return bundleCuda(Images, Args);
806   case OFK_HIP:
807     return bundleHIP(Images, Args);
808   default:
809     return createStringError(getOffloadKindName(Kind) +
810                              " bundling is not supported");
811   }
812 }
813 
814 /// Returns a new ArgList containg arguments used for the device linking phase.
815 DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
816                              const InputArgList &Args) {
817   DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));
818   for (Arg *A : Args)
819     DAL.append(A);
820 
821   // Set the subarchitecture and target triple for this compilation.
822   const OptTable &Tbl = getOptTable();
823   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ),
824                    Args.MakeArgString(Input.front().getBinary()->getArch()));
825   DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ),
826                    Args.MakeArgString(Input.front().getBinary()->getTriple()));
827 
828   // If every input file is bitcode we have whole program visibility as we
829   // do only support static linking with bitcode.
830   auto ContainsBitcode = [](const OffloadFile &F) {
831     return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode;
832   };
833   if (llvm::all_of(Input, ContainsBitcode))
834     DAL.AddFlagArg(nullptr, Tbl.getOption(OPT_whole_program));
835 
836   // Forward '-Xoffload-linker' options to the appropriate backend.
837   for (StringRef Arg : Args.getAllArgValues(OPT_device_linker_args_EQ)) {
838     auto [Triple, Value] = Arg.split('=');
839     llvm::Triple TT(Triple);
840     // If this isn't a recognized triple then it's an `arg=value` option.
841     if (TT.getArch() == Triple::ArchType::UnknownArch)
842       DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ),
843                        Args.MakeArgString(Arg));
844     else if (Value.empty())
845       DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ),
846                        Args.MakeArgString(Triple));
847     else if (Triple == DAL.getLastArgValue(OPT_triple_EQ))
848       DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ),
849                        Args.MakeArgString(Value));
850   }
851 
852   // Forward '-Xoffload-compiler' options to the appropriate backend.
853   for (StringRef Arg : Args.getAllArgValues(OPT_device_compiler_args_EQ)) {
854     auto [Triple, Value] = Arg.split('=');
855     llvm::Triple TT(Triple);
856     // If this isn't a recognized triple then it's an `arg=value` option.
857     if (TT.getArch() == Triple::ArchType::UnknownArch)
858       DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_compiler_arg_EQ),
859                        Args.MakeArgString(Arg));
860     else if (Value.empty())
861       DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_compiler_arg_EQ),
862                        Args.MakeArgString(Triple));
863     else if (Triple == DAL.getLastArgValue(OPT_triple_EQ))
864       DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_compiler_arg_EQ),
865                        Args.MakeArgString(Value));
866   }
867 
868   return DAL;
869 }
870 
871 Error handleOverrideImages(
872     const InputArgList &Args,
873     MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> &Images) {
874   for (StringRef Arg : Args.getAllArgValues(OPT_override_image)) {
875     OffloadKind Kind = getOffloadKind(Arg.split("=").first);
876     StringRef Filename = Arg.split("=").second;
877 
878     ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
879         MemoryBuffer::getFileOrSTDIN(Filename);
880     if (std::error_code EC = BufferOrErr.getError())
881       return createFileError(Filename, EC);
882 
883     Expected<std::unique_ptr<ObjectFile>> ElfOrErr =
884         ObjectFile::createELFObjectFile(**BufferOrErr,
885                                         /*InitContent=*/false);
886     if (!ElfOrErr)
887       return ElfOrErr.takeError();
888     ObjectFile &Elf = **ElfOrErr;
889 
890     OffloadingImage TheImage{};
891     TheImage.TheImageKind = IMG_Object;
892     TheImage.TheOffloadKind = Kind;
893     TheImage.StringData["triple"] =
894         Args.MakeArgString(Elf.makeTriple().getTriple());
895     if (std::optional<StringRef> CPU = Elf.tryGetCPUName())
896       TheImage.StringData["arch"] = Args.MakeArgString(*CPU);
897     TheImage.Image = std::move(*BufferOrErr);
898 
899     Images[Kind].emplace_back(std::move(TheImage));
900   }
901   return Error::success();
902 }
903 
904 /// Transforms all the extracted offloading input files into an image that can
905 /// be registered by the runtime.
906 Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
907     SmallVectorImpl<SmallVector<OffloadFile>> &LinkerInputFiles,
908     const InputArgList &Args, char **Argv, int Argc) {
909   llvm::TimeTraceScope TimeScope("Handle all device input");
910 
911   std::mutex ImageMtx;
912   MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> Images;
913 
914   // Initialize the images with any overriding inputs.
915   if (Args.hasArg(OPT_override_image))
916     if (Error Err = handleOverrideImages(Args, Images))
917       return std::move(Err);
918 
919   auto Err = parallelForEachError(LinkerInputFiles, [&](auto &Input) -> Error {
920     llvm::TimeTraceScope TimeScope("Link device input");
921 
922     // Each thread needs its own copy of the base arguments to maintain
923     // per-device argument storage of synthetic strings.
924     const OptTable &Tbl = getOptTable();
925     BumpPtrAllocator Alloc;
926     StringSaver Saver(Alloc);
927     auto BaseArgs =
928         Tbl.parseArgs(Argc, Argv, OPT_INVALID, Saver, [](StringRef Err) {
929           reportError(createStringError(Err));
930         });
931     auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
932 
933     DenseSet<OffloadKind> ActiveOffloadKinds;
934     for (const auto &File : Input)
935       if (File.getBinary()->getOffloadKind() != OFK_None)
936         ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());
937 
938     // Write any remaining device inputs to an output file.
939     SmallVector<StringRef> InputFiles;
940     for (const OffloadFile &File : Input) {
941       auto FileNameOrErr = writeOffloadFile(File);
942       if (!FileNameOrErr)
943         return FileNameOrErr.takeError();
944       InputFiles.emplace_back(*FileNameOrErr);
945     }
946 
947     // Link the remaining device files using the device linker.
948     auto OutputOrErr = linkDevice(InputFiles, LinkerArgs);
949     if (!OutputOrErr)
950       return OutputOrErr.takeError();
951 
952     // Store the offloading image for each linked output file.
953     for (OffloadKind Kind : ActiveOffloadKinds) {
954       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
955           llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr);
956       if (std::error_code EC = FileOrErr.getError()) {
957         if (DryRun)
958           FileOrErr = MemoryBuffer::getMemBuffer("");
959         else
960           return createFileError(*OutputOrErr, EC);
961       }
962 
963       std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
964       OffloadingImage TheImage{};
965       TheImage.TheImageKind =
966           Args.hasArg(OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
967       TheImage.TheOffloadKind = Kind;
968       TheImage.StringData["triple"] =
969           Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_triple_EQ));
970       TheImage.StringData["arch"] =
971           Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_arch_EQ));
972       TheImage.Image = std::move(*FileOrErr);
973 
974       Images[Kind].emplace_back(std::move(TheImage));
975     }
976     return Error::success();
977   });
978   if (Err)
979     return std::move(Err);
980 
981   // Create a binary image of each offloading image and embed it into a new
982   // object file.
983   SmallVector<StringRef> WrappedOutput;
984   for (auto &[Kind, Input] : Images) {
985     // We sort the entries before bundling so they appear in a deterministic
986     // order in the final binary.
987     llvm::sort(Input, [](OffloadingImage &A, OffloadingImage &B) {
988       return A.StringData["triple"] > B.StringData["triple"] ||
989              A.StringData["arch"] > B.StringData["arch"] ||
990              A.TheOffloadKind < B.TheOffloadKind;
991     });
992     auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind);
993     if (!BundledImagesOrErr)
994       return BundledImagesOrErr.takeError();
995     auto OutputOrErr = wrapDeviceImages(*BundledImagesOrErr, Args, Kind);
996     if (!OutputOrErr)
997       return OutputOrErr.takeError();
998     WrappedOutput.push_back(*OutputOrErr);
999   }
1000 
1001   return WrappedOutput;
1002 }
1003 
1004 std::optional<std::string> findFile(StringRef Dir, StringRef Root,
1005                                     const Twine &Name) {
1006   SmallString<128> Path;
1007   if (Dir.starts_with("="))
1008     sys::path::append(Path, Root, Dir.substr(1), Name);
1009   else
1010     sys::path::append(Path, Dir, Name);
1011 
1012   if (sys::fs::exists(Path))
1013     return static_cast<std::string>(Path);
1014   return std::nullopt;
1015 }
1016 
1017 std::optional<std::string>
1018 findFromSearchPaths(StringRef Name, StringRef Root,
1019                     ArrayRef<StringRef> SearchPaths) {
1020   for (StringRef Dir : SearchPaths)
1021     if (std::optional<std::string> File = findFile(Dir, Root, Name))
1022       return File;
1023   return std::nullopt;
1024 }
1025 
1026 std::optional<std::string>
1027 searchLibraryBaseName(StringRef Name, StringRef Root,
1028                       ArrayRef<StringRef> SearchPaths) {
1029   for (StringRef Dir : SearchPaths) {
1030     if (std::optional<std::string> File =
1031             findFile(Dir, Root, "lib" + Name + ".so"))
1032       return File;
1033     if (std::optional<std::string> File =
1034             findFile(Dir, Root, "lib" + Name + ".a"))
1035       return File;
1036   }
1037   return std::nullopt;
1038 }
1039 
1040 /// Search for static libraries in the linker's library path given input like
1041 /// `-lfoo` or `-l:libfoo.a`.
1042 std::optional<std::string> searchLibrary(StringRef Input, StringRef Root,
1043                                          ArrayRef<StringRef> SearchPaths) {
1044   if (Input.starts_with(":") || Input.ends_with(".lib"))
1045     return findFromSearchPaths(Input.drop_front(), Root, SearchPaths);
1046   return searchLibraryBaseName(Input, Root, SearchPaths);
1047 }
1048 
1049 /// Common redeclaration of needed symbol flags.
1050 enum Symbol : uint32_t {
1051   Sym_None = 0,
1052   Sym_Undefined = 1U << 1,
1053   Sym_Weak = 1U << 2,
1054 };
1055 
1056 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
1057 /// extract any symbols from it.
1058 Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
1059                                      bool IsArchive, StringSaver &Saver,
1060                                      DenseMap<StringRef, Symbol> &Syms) {
1061   Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer);
1062   if (!IRSymtabOrErr)
1063     return IRSymtabOrErr.takeError();
1064 
1065   bool ShouldExtract = !IsArchive;
1066   DenseMap<StringRef, Symbol> TmpSyms;
1067   for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
1068     for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
1069       if (Sym.isFormatSpecific() || !Sym.isGlobal())
1070         continue;
1071 
1072       bool NewSymbol = Syms.count(Sym.getName()) == 0;
1073       auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()];
1074 
1075       // We will extract if it defines a currenlty undefined non-weak
1076       // symbol.
1077       bool ResolvesStrongReference =
1078           ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
1079            !Sym.isUndefined());
1080       // We will extract if it defines a new global symbol visible to the
1081       // host. This is only necessary for code targeting an offloading
1082       // language.
1083       bool NewGlobalSymbol =
1084           ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
1085            !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
1086            (Sym.getVisibility() != GlobalValue::HiddenVisibility));
1087       ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1088 
1089       // Update this symbol in the "table" with the new information.
1090       if (OldSym & Sym_Undefined && !Sym.isUndefined())
1091         TmpSyms[Saver.save(Sym.getName())] =
1092             static_cast<Symbol>(OldSym & ~Sym_Undefined);
1093       if (Sym.isUndefined() && NewSymbol)
1094         TmpSyms[Saver.save(Sym.getName())] =
1095             static_cast<Symbol>(OldSym | Sym_Undefined);
1096       if (Sym.isWeak())
1097         TmpSyms[Saver.save(Sym.getName())] =
1098             static_cast<Symbol>(OldSym | Sym_Weak);
1099     }
1100   }
1101 
1102   // If the file gets extracted we update the table with the new symbols.
1103   if (ShouldExtract)
1104     Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
1105 
1106   return ShouldExtract;
1107 }
1108 
1109 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
1110 /// any symbols from it.
1111 Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
1112                                     bool IsArchive, StringSaver &Saver,
1113                                     DenseMap<StringRef, Symbol> &Syms) {
1114   bool ShouldExtract = !IsArchive;
1115   DenseMap<StringRef, Symbol> TmpSyms;
1116   for (SymbolRef Sym : Obj.symbols()) {
1117     auto FlagsOrErr = Sym.getFlags();
1118     if (!FlagsOrErr)
1119       return FlagsOrErr.takeError();
1120 
1121     if (!(*FlagsOrErr & SymbolRef::SF_Global) ||
1122         (*FlagsOrErr & SymbolRef::SF_FormatSpecific))
1123       continue;
1124 
1125     auto NameOrErr = Sym.getName();
1126     if (!NameOrErr)
1127       return NameOrErr.takeError();
1128 
1129     bool NewSymbol = Syms.count(*NameOrErr) == 0;
1130     auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr];
1131 
1132     // We will extract if it defines a currenlty undefined non-weak symbol.
1133     bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
1134                                    !(OldSym & Sym_Weak) &&
1135                                    !(*FlagsOrErr & SymbolRef::SF_Undefined);
1136 
1137     // We will extract if it defines a new global symbol visible to the
1138     // host. This is only necessary for code targeting an offloading
1139     // language.
1140     bool NewGlobalSymbol =
1141         ((NewSymbol || (OldSym & Sym_Undefined)) &&
1142          !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
1143          !(*FlagsOrErr & SymbolRef::SF_Hidden));
1144     ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1145 
1146     // Update this symbol in the "table" with the new information.
1147     if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
1148       TmpSyms[Saver.save(*NameOrErr)] =
1149           static_cast<Symbol>(OldSym & ~Sym_Undefined);
1150     if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol)
1151       TmpSyms[Saver.save(*NameOrErr)] =
1152           static_cast<Symbol>(OldSym | Sym_Undefined);
1153     if (*FlagsOrErr & SymbolRef::SF_Weak)
1154       TmpSyms[Saver.save(*NameOrErr)] = static_cast<Symbol>(OldSym | Sym_Weak);
1155   }
1156 
1157   // If the file gets extracted we update the table with the new symbols.
1158   if (ShouldExtract)
1159     Syms.insert(std::begin(TmpSyms), std::end(TmpSyms));
1160 
1161   return ShouldExtract;
1162 }
1163 
1164 /// Attempt to 'resolve' symbols found in input files. We use this to
1165 /// determine if an archive member needs to be extracted. An archive member
1166 /// will be extracted if any of the following is true.
1167 ///   1) It defines an undefined symbol in a regular object filie.
1168 ///   2) It defines a global symbol without hidden visibility that has not
1169 ///      yet been defined.
1170 Expected<bool> getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive,
1171                           StringSaver &Saver,
1172                           DenseMap<StringRef, Symbol> &Syms) {
1173   MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
1174   switch (identify_magic(Image)) {
1175   case file_magic::bitcode:
1176     return getSymbolsFromBitcode(Buffer, Kind, IsArchive, Saver, Syms);
1177   case file_magic::elf_relocatable: {
1178     Expected<std::unique_ptr<ObjectFile>> ObjFile =
1179         ObjectFile::createObjectFile(Buffer);
1180     if (!ObjFile)
1181       return ObjFile.takeError();
1182     return getSymbolsFromObject(**ObjFile, Kind, IsArchive, Saver, Syms);
1183   }
1184   default:
1185     return false;
1186   }
1187 }
1188 
1189 /// Search the input files and libraries for embedded device offloading code
1190 /// and add it to the list of files to be linked. Files coming from static
1191 /// libraries are only added to the input if they are used by an existing
1192 /// input file. Returns a list of input files intended for a single linking job.
1193 Expected<SmallVector<SmallVector<OffloadFile>>>
1194 getDeviceInput(const ArgList &Args) {
1195   llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
1196 
1197   // Skip all the input if the user is overriding the output.
1198   if (Args.hasArg(OPT_override_image))
1199     return SmallVector<SmallVector<OffloadFile>>();
1200 
1201   StringRef Root = Args.getLastArgValue(OPT_sysroot_EQ);
1202   SmallVector<StringRef> LibraryPaths;
1203   for (const opt::Arg *Arg : Args.filtered(OPT_library_path, OPT_libpath))
1204     LibraryPaths.push_back(Arg->getValue());
1205 
1206   BumpPtrAllocator Alloc;
1207   StringSaver Saver(Alloc);
1208 
1209   // Try to extract device code from the linker input files.
1210   bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false;
1211   SmallVector<OffloadFile> ObjectFilesToExtract;
1212   SmallVector<OffloadFile> ArchiveFilesToExtract;
1213   for (const opt::Arg *Arg : Args.filtered(
1214            OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {
1215     if (Arg->getOption().matches(OPT_whole_archive) ||
1216         Arg->getOption().matches(OPT_no_whole_archive)) {
1217       WholeArchive = Arg->getOption().matches(OPT_whole_archive);
1218       continue;
1219     }
1220 
1221     std::optional<std::string> Filename =
1222         Arg->getOption().matches(OPT_library)
1223             ? searchLibrary(Arg->getValue(), Root, LibraryPaths)
1224             : std::string(Arg->getValue());
1225 
1226     if (!Filename && Arg->getOption().matches(OPT_library))
1227       reportError(
1228           createStringError("unable to find library -l%s", Arg->getValue()));
1229 
1230     if (!Filename || !sys::fs::exists(*Filename) ||
1231         sys::fs::is_directory(*Filename))
1232       continue;
1233 
1234     ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1235         MemoryBuffer::getFileOrSTDIN(*Filename);
1236     if (std::error_code EC = BufferOrErr.getError())
1237       return createFileError(*Filename, EC);
1238 
1239     MemoryBufferRef Buffer = **BufferOrErr;
1240     if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object)
1241       continue;
1242 
1243     SmallVector<OffloadFile> Binaries;
1244     if (Error Err = extractOffloadBinaries(Buffer, Binaries))
1245       return std::move(Err);
1246 
1247     for (auto &OffloadFile : Binaries) {
1248       if (identify_magic(Buffer.getBuffer()) == file_magic::archive &&
1249           !WholeArchive)
1250         ArchiveFilesToExtract.emplace_back(std::move(OffloadFile));
1251       else
1252         ObjectFilesToExtract.emplace_back(std::move(OffloadFile));
1253     }
1254   }
1255 
1256   // Link all standard input files and update the list of symbols.
1257   MapVector<OffloadFile::TargetID, SmallVector<OffloadFile, 0>> InputFiles;
1258   DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms;
1259   for (OffloadFile &Binary : ObjectFilesToExtract) {
1260     if (!Binary.getBinary())
1261       continue;
1262 
1263     SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1264     for (const auto &[ID, Input] : InputFiles)
1265       if (object::areTargetsCompatible(Binary, ID))
1266         CompatibleTargets.emplace_back(ID);
1267 
1268     for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
1269       Expected<bool> ExtractOrErr = getSymbols(
1270           Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(),
1271           /*IsArchive=*/false, Saver, Syms[ID]);
1272       if (!ExtractOrErr)
1273         return ExtractOrErr.takeError();
1274 
1275       // If another target needs this binary it must be copied instead.
1276       if (Index == CompatibleTargets.size() - 1)
1277         InputFiles[ID].emplace_back(std::move(Binary));
1278       else
1279         InputFiles[ID].emplace_back(Binary.copy());
1280     }
1281   }
1282 
1283   // Archive members only extract if they define needed symbols. We do this
1284   // after every regular input file so that libraries may be included out of
1285   // order. This follows 'ld.lld' semantics which are more lenient.
1286   bool Extracted = true;
1287   while (Extracted) {
1288     Extracted = false;
1289     for (OffloadFile &Binary : ArchiveFilesToExtract) {
1290       // If the binary was previously extracted it will be set to null.
1291       if (!Binary.getBinary())
1292         continue;
1293 
1294       SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1295       for (const auto &[ID, Input] : InputFiles)
1296         if (object::areTargetsCompatible(Binary, ID))
1297           CompatibleTargets.emplace_back(ID);
1298 
1299       for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) {
1300         // Only extract an if we have an an object matching this target.
1301         if (!InputFiles.count(ID))
1302           continue;
1303 
1304         Expected<bool> ExtractOrErr =
1305             getSymbols(Binary.getBinary()->getImage(),
1306                        Binary.getBinary()->getOffloadKind(),
1307                        /*IsArchive=*/true, Saver, Syms[ID]);
1308         if (!ExtractOrErr)
1309           return ExtractOrErr.takeError();
1310 
1311         Extracted = *ExtractOrErr;
1312 
1313         // Skip including the file if it is an archive that does not resolve
1314         // any symbols.
1315         if (!Extracted)
1316           continue;
1317 
1318         // If another target needs this binary it must be copied instead.
1319         if (Index == CompatibleTargets.size() - 1)
1320           InputFiles[ID].emplace_back(std::move(Binary));
1321         else
1322           InputFiles[ID].emplace_back(Binary.copy());
1323       }
1324 
1325       // If we extracted any files we need to check all the symbols again.
1326       if (Extracted)
1327         break;
1328     }
1329   }
1330 
1331   SmallVector<SmallVector<OffloadFile>> InputsForTarget;
1332   for (auto &[ID, Input] : InputFiles)
1333     InputsForTarget.emplace_back(std::move(Input));
1334 
1335   return std::move(InputsForTarget);
1336 }
1337 
1338 } // namespace
1339 
1340 int main(int Argc, char **Argv) {
1341   InitLLVM X(Argc, Argv);
1342   InitializeAllTargetInfos();
1343   InitializeAllTargets();
1344   InitializeAllTargetMCs();
1345   InitializeAllAsmParsers();
1346   InitializeAllAsmPrinters();
1347 
1348   LinkerExecutable = Argv[0];
1349   sys::PrintStackTraceOnErrorSignal(Argv[0]);
1350 
1351   const OptTable &Tbl = getOptTable();
1352   BumpPtrAllocator Alloc;
1353   StringSaver Saver(Alloc);
1354   auto Args = Tbl.parseArgs(Argc, Argv, OPT_INVALID, Saver, [&](StringRef Err) {
1355     reportError(createStringError(Err));
1356   });
1357 
1358   if (Args.hasArg(OPT_help) || Args.hasArg(OPT_help_hidden)) {
1359     Tbl.printHelp(
1360         outs(),
1361         "clang-linker-wrapper [options] -- <options to passed to the linker>",
1362         "\nA wrapper utility over the host linker. It scans the input files\n"
1363         "for sections that require additional processing prior to linking.\n"
1364         "The will then transparently pass all arguments and input to the\n"
1365         "specified host linker to create the final binary.\n",
1366         Args.hasArg(OPT_help_hidden), Args.hasArg(OPT_help_hidden));
1367     return EXIT_SUCCESS;
1368   }
1369   if (Args.hasArg(OPT_v)) {
1370     printVersion(outs());
1371     return EXIT_SUCCESS;
1372   }
1373 
1374   // This forwards '-mllvm' arguments to LLVM if present.
1375   SmallVector<const char *> NewArgv = {Argv[0]};
1376   for (const opt::Arg *Arg : Args.filtered(OPT_mllvm))
1377     NewArgv.push_back(Arg->getValue());
1378   for (const opt::Arg *Arg : Args.filtered(OPT_offload_opt_eq_minus))
1379     NewArgv.push_back(Arg->getValue());
1380   SmallVector<PassPlugin, 1> PluginList;
1381   PassPlugins.setCallback([&](const std::string &PluginPath) {
1382     auto Plugin = PassPlugin::Load(PluginPath);
1383     if (!Plugin)
1384       report_fatal_error(Plugin.takeError(), /*gen_crash_diag=*/false);
1385     PluginList.emplace_back(Plugin.get());
1386   });
1387   cl::ParseCommandLineOptions(NewArgv.size(), &NewArgv[0]);
1388 
1389   Verbose = Args.hasArg(OPT_verbose);
1390   DryRun = Args.hasArg(OPT_dry_run);
1391   SaveTemps = Args.hasArg(OPT_save_temps);
1392   CudaBinaryPath = Args.getLastArgValue(OPT_cuda_path_EQ).str();
1393 
1394   llvm::Triple Triple(
1395       Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
1396   if (Args.hasArg(OPT_o))
1397     ExecutableName = Args.getLastArgValue(OPT_o, "a.out");
1398   else if (Args.hasArg(OPT_out))
1399     ExecutableName = Args.getLastArgValue(OPT_out, "a.exe");
1400   else
1401     ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out";
1402 
1403   parallel::strategy = hardware_concurrency(1);
1404   if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) {
1405     unsigned Threads = 0;
1406     if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0)
1407       reportError(createStringError("%s: expected a positive integer, got '%s'",
1408                                     Arg->getSpelling().data(),
1409                                     Arg->getValue()));
1410     parallel::strategy = hardware_concurrency(Threads);
1411   }
1412 
1413   if (Args.hasArg(OPT_wrapper_time_trace_eq)) {
1414     unsigned Granularity;
1415     Args.getLastArgValue(OPT_wrapper_time_trace_granularity, "500")
1416         .getAsInteger(10, Granularity);
1417     timeTraceProfilerInitialize(Granularity, Argv[0]);
1418   }
1419 
1420   {
1421     llvm::TimeTraceScope TimeScope("Execute linker wrapper");
1422 
1423     // Extract the device input files stored in the host fat binary.
1424     auto DeviceInputFiles = getDeviceInput(Args);
1425     if (!DeviceInputFiles)
1426       reportError(DeviceInputFiles.takeError());
1427 
1428     // Link and wrap the device images extracted from the linker input.
1429     auto FilesOrErr =
1430         linkAndWrapDeviceFiles(*DeviceInputFiles, Args, Argv, Argc);
1431     if (!FilesOrErr)
1432       reportError(FilesOrErr.takeError());
1433 
1434     // Run the host linking job with the rendered arguments.
1435     if (Error Err = runLinker(*FilesOrErr, Args))
1436       reportError(std::move(Err));
1437   }
1438 
1439   if (const opt::Arg *Arg = Args.getLastArg(OPT_wrapper_time_trace_eq)) {
1440     if (Error Err = timeTraceProfilerWrite(Arg->getValue(), ExecutableName))
1441       reportError(std::move(Err));
1442     timeTraceProfilerCleanup();
1443   }
1444 
1445   // Remove the temporary files created.
1446   if (!SaveTemps)
1447     for (const auto &TempFile : TempFiles)
1448       if (std::error_code EC = sys::fs::remove(TempFile))
1449         reportError(createFileError(TempFile, EC));
1450 
1451   return EXIT_SUCCESS;
1452 }
1453