1 //===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 // 9 // This tool works as a wrapper over a linking job. This tool is used to create 10 // linked device images for offloading. It scans the linker's input for embedded 11 // device offloading data stored in sections `.llvm.offloading` and extracts it 12 // as a temporary file. The extracted device files will then be passed to a 13 // device linking job to create a final device image. 14 // 15 //===---------------------------------------------------------------------===// 16 17 #include "clang/Basic/TargetID.h" 18 #include "clang/Basic/Version.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/BinaryFormat/Magic.h" 21 #include "llvm/Bitcode/BitcodeWriter.h" 22 #include "llvm/CodeGen/CommandFlags.h" 23 #include "llvm/Frontend/Offloading/OffloadWrapper.h" 24 #include "llvm/Frontend/Offloading/Utility.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/DiagnosticPrinter.h" 27 #include "llvm/IR/Module.h" 28 #include "llvm/IRReader/IRReader.h" 29 #include "llvm/LTO/LTO.h" 30 #include "llvm/MC/TargetRegistry.h" 31 #include "llvm/Object/Archive.h" 32 #include "llvm/Object/ArchiveWriter.h" 33 #include "llvm/Object/Binary.h" 34 #include "llvm/Object/ELFObjectFile.h" 35 #include "llvm/Object/IRObjectFile.h" 36 #include "llvm/Object/ObjectFile.h" 37 #include "llvm/Object/OffloadBinary.h" 38 #include "llvm/Option/ArgList.h" 39 #include "llvm/Option/OptTable.h" 40 #include "llvm/Option/Option.h" 41 #include "llvm/Passes/PassPlugin.h" 42 #include "llvm/Remarks/HotnessThresholdParser.h" 43 #include "llvm/Support/CommandLine.h" 44 #include "llvm/Support/Errc.h" 45 #include "llvm/Support/FileOutputBuffer.h" 46 #include "llvm/Support/FileSystem.h" 47 #include "llvm/Support/InitLLVM.h" 48 #include "llvm/Support/MemoryBuffer.h" 49 #include "llvm/Support/Parallel.h" 50 #include "llvm/Support/Path.h" 51 #include "llvm/Support/Program.h" 52 #include "llvm/Support/Signals.h" 53 #include "llvm/Support/SourceMgr.h" 54 #include "llvm/Support/StringSaver.h" 55 #include "llvm/Support/TargetSelect.h" 56 #include "llvm/Support/TimeProfiler.h" 57 #include "llvm/Support/WithColor.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Target/TargetMachine.h" 60 #include "llvm/TargetParser/Host.h" 61 #include <atomic> 62 #include <optional> 63 64 using namespace llvm; 65 using namespace llvm::opt; 66 using namespace llvm::object; 67 68 // Various tools (e.g., llc and opt) duplicate this series of declarations for 69 // options related to passes and remarks. 70 71 static cl::opt<bool> RemarksWithHotness( 72 "pass-remarks-with-hotness", 73 cl::desc("With PGO, include profile count in optimization remarks"), 74 cl::Hidden); 75 76 static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser> 77 RemarksHotnessThreshold( 78 "pass-remarks-hotness-threshold", 79 cl::desc("Minimum profile count required for " 80 "an optimization remark to be output. " 81 "Use 'auto' to apply the threshold from profile summary."), 82 cl::value_desc("N or 'auto'"), cl::init(0), cl::Hidden); 83 84 static cl::opt<std::string> 85 RemarksFilename("pass-remarks-output", 86 cl::desc("Output filename for pass remarks"), 87 cl::value_desc("filename")); 88 89 static cl::opt<std::string> 90 RemarksPasses("pass-remarks-filter", 91 cl::desc("Only record optimization remarks from passes whose " 92 "names match the given regular expression"), 93 cl::value_desc("regex")); 94 95 static cl::opt<std::string> RemarksFormat( 96 "pass-remarks-format", 97 cl::desc("The format used for serializing remarks (default: YAML)"), 98 cl::value_desc("format"), cl::init("yaml")); 99 100 static cl::list<std::string> 101 PassPlugins("load-pass-plugin", 102 cl::desc("Load passes from plugin library")); 103 104 static cl::opt<std::string> PassPipeline( 105 "passes", 106 cl::desc( 107 "A textual description of the pass pipeline. To have analysis passes " 108 "available before a certain pass, add 'require<foo-analysis>'. " 109 "'-passes' overrides the pass pipeline (but not all effects) from " 110 "specifying '--opt-level=O?' (O2 is the default) to " 111 "clang-linker-wrapper. Be sure to include the corresponding " 112 "'default<O?>' in '-passes'.")); 113 static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline), 114 cl::desc("Alias for -passes")); 115 116 /// Path of the current binary. 117 static const char *LinkerExecutable; 118 119 /// Ssave intermediary results. 120 static bool SaveTemps = false; 121 122 /// Print arguments without executing. 123 static bool DryRun = false; 124 125 /// Print verbose output. 126 static bool Verbose = false; 127 128 /// Filename of the executable being created. 129 static StringRef ExecutableName; 130 131 /// Binary path for the CUDA installation. 132 static std::string CudaBinaryPath; 133 134 /// Mutex lock to protect writes to shared TempFiles in parallel. 135 static std::mutex TempFilesMutex; 136 137 /// Temporary files created by the linker wrapper. 138 static std::list<SmallString<128>> TempFiles; 139 140 /// Codegen flags for LTO backend. 141 static codegen::RegisterCodeGenFlags CodeGenFlags; 142 143 using OffloadingImage = OffloadBinary::OffloadingImage; 144 145 namespace llvm { 146 // Provide DenseMapInfo so that OffloadKind can be used in a DenseMap. 147 template <> struct DenseMapInfo<OffloadKind> { 148 static inline OffloadKind getEmptyKey() { return OFK_LAST; } 149 static inline OffloadKind getTombstoneKey() { 150 return static_cast<OffloadKind>(OFK_LAST + 1); 151 } 152 static unsigned getHashValue(const OffloadKind &Val) { return Val; } 153 154 static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) { 155 return LHS == RHS; 156 } 157 }; 158 } // namespace llvm 159 160 namespace { 161 using std::error_code; 162 163 /// Must not overlap with llvm::opt::DriverFlag. 164 enum WrapperFlags { 165 WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper. 166 DeviceOnlyOption = (1 << 5), // Options only used for device linking. 167 }; 168 169 enum ID { 170 OPT_INVALID = 0, // This is not an option ID. 171 #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), 172 #include "LinkerWrapperOpts.inc" 173 LastOption 174 #undef OPTION 175 }; 176 177 #define OPTTABLE_STR_TABLE_CODE 178 #include "LinkerWrapperOpts.inc" 179 #undef OPTTABLE_STR_TABLE_CODE 180 181 #define OPTTABLE_PREFIXES_TABLE_CODE 182 #include "LinkerWrapperOpts.inc" 183 #undef OPTTABLE_PREFIXES_TABLE_CODE 184 185 static constexpr OptTable::Info InfoTable[] = { 186 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), 187 #include "LinkerWrapperOpts.inc" 188 #undef OPTION 189 }; 190 191 class WrapperOptTable : public opt::GenericOptTable { 192 public: 193 WrapperOptTable() 194 : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {} 195 }; 196 197 const OptTable &getOptTable() { 198 static const WrapperOptTable *Table = []() { 199 auto Result = std::make_unique<WrapperOptTable>(); 200 return Result.release(); 201 }(); 202 return *Table; 203 } 204 205 void printCommands(ArrayRef<StringRef> CmdArgs) { 206 if (CmdArgs.empty()) 207 return; 208 209 llvm::errs() << " \"" << CmdArgs.front() << "\" "; 210 for (auto IC = std::next(CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC) 211 llvm::errs() << *IC << (std::next(IC) != IE ? " " : "\n"); 212 } 213 214 [[noreturn]] void reportError(Error E) { 215 outs().flush(); 216 logAllUnhandledErrors(std::move(E), 217 WithColor::error(errs(), LinkerExecutable)); 218 exit(EXIT_FAILURE); 219 } 220 221 std::string getMainExecutable(const char *Name) { 222 void *Ptr = (void *)(intptr_t)&getMainExecutable; 223 auto COWPath = sys::fs::getMainExecutable(Name, Ptr); 224 return sys::path::parent_path(COWPath).str(); 225 } 226 227 /// Get a temporary filename suitable for output. 228 Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) { 229 std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex); 230 SmallString<128> OutputFile; 231 if (SaveTemps) { 232 (Prefix + "." + Extension).toNullTerminatedStringRef(OutputFile); 233 } else { 234 if (std::error_code EC = 235 sys::fs::createTemporaryFile(Prefix, Extension, OutputFile)) 236 return createFileError(OutputFile, EC); 237 } 238 239 TempFiles.emplace_back(std::move(OutputFile)); 240 return TempFiles.back(); 241 } 242 243 /// Execute the command \p ExecutablePath with the arguments \p Args. 244 Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) { 245 if (Verbose || DryRun) 246 printCommands(Args); 247 248 if (!DryRun) 249 if (sys::ExecuteAndWait(ExecutablePath, Args)) 250 return createStringError( 251 "'%s' failed", sys::path::filename(ExecutablePath).str().c_str()); 252 return Error::success(); 253 } 254 255 Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) { 256 257 ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths); 258 if (!Path) 259 Path = sys::findProgramByName(Name); 260 if (!Path && DryRun) 261 return Name.str(); 262 if (!Path) 263 return createStringError(Path.getError(), 264 "Unable to find '" + Name + "' in path"); 265 return *Path; 266 } 267 268 bool linkerSupportsLTO(const ArgList &Args) { 269 llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); 270 return Triple.isNVPTX() || Triple.isAMDGPU() || 271 Args.getLastArgValue(OPT_linker_path_EQ).ends_with("lld"); 272 } 273 274 /// Returns the hashed value for a constant string. 275 std::string getHash(StringRef Str) { 276 llvm::MD5 Hasher; 277 llvm::MD5::MD5Result Hash; 278 Hasher.update(Str); 279 Hasher.final(Hash); 280 return llvm::utohexstr(Hash.low(), /*LowerCase=*/true); 281 } 282 283 /// Renames offloading entry sections in a relocatable link so they do not 284 /// conflict with a later link job. 285 Error relocateOffloadSection(const ArgList &Args, StringRef Output) { 286 llvm::Triple Triple( 287 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); 288 if (Triple.isOSWindows()) 289 return createStringError( 290 "Relocatable linking is not supported on COFF targets"); 291 292 Expected<std::string> ObjcopyPath = 293 findProgram("llvm-objcopy", {getMainExecutable("llvm-objcopy")}); 294 if (!ObjcopyPath) 295 return ObjcopyPath.takeError(); 296 297 // Use the linker output file to get a unique hash. This creates a unique 298 // identifier to rename the sections to that is deterministic to the contents. 299 auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer("") 300 : MemoryBuffer::getFileOrSTDIN(Output); 301 if (!BufferOrErr) 302 return createStringError("Failed to open %s", Output.str().c_str()); 303 std::string Suffix = "_" + getHash((*BufferOrErr)->getBuffer()); 304 305 SmallVector<StringRef> ObjcopyArgs = { 306 *ObjcopyPath, 307 Output, 308 }; 309 310 // Remove the old .llvm.offloading section to prevent further linking. 311 ObjcopyArgs.emplace_back("--remove-section"); 312 ObjcopyArgs.emplace_back(".llvm.offloading"); 313 for (StringRef Prefix : {"omp", "cuda", "hip"}) { 314 auto Section = (Prefix + "_offloading_entries").str(); 315 // Rename the offloading entires to make them private to this link unit. 316 ObjcopyArgs.emplace_back("--rename-section"); 317 ObjcopyArgs.emplace_back( 318 Args.MakeArgString(Section + "=" + Section + Suffix)); 319 320 // Rename the __start_ / __stop_ symbols appropriately to iterate over the 321 // newly renamed section containing the offloading entries. 322 ObjcopyArgs.emplace_back("--redefine-sym"); 323 ObjcopyArgs.emplace_back(Args.MakeArgString("__start_" + Section + "=" + 324 "__start_" + Section + Suffix)); 325 ObjcopyArgs.emplace_back("--redefine-sym"); 326 ObjcopyArgs.emplace_back(Args.MakeArgString("__stop_" + Section + "=" + 327 "__stop_" + Section + Suffix)); 328 } 329 330 if (Error Err = executeCommands(*ObjcopyPath, ObjcopyArgs)) 331 return Err; 332 333 return Error::success(); 334 } 335 336 /// Runs the wrapped linker job with the newly created input. 337 Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) { 338 llvm::TimeTraceScope TimeScope("Execute host linker"); 339 340 // Render the linker arguments and add the newly created image. We add it 341 // after the output file to ensure it is linked with the correct libraries. 342 StringRef LinkerPath = Args.getLastArgValue(OPT_linker_path_EQ); 343 if (LinkerPath.empty()) 344 return createStringError("linker path missing, must pass 'linker-path'"); 345 ArgStringList NewLinkerArgs; 346 for (const opt::Arg *Arg : Args) { 347 // Do not forward arguments only intended for the linker wrapper. 348 if (Arg->getOption().hasFlag(WrapperOnlyOption)) 349 continue; 350 351 Arg->render(Args, NewLinkerArgs); 352 if (Arg->getOption().matches(OPT_o) || Arg->getOption().matches(OPT_out)) 353 llvm::transform(Files, std::back_inserter(NewLinkerArgs), 354 [&](StringRef Arg) { return Args.MakeArgString(Arg); }); 355 } 356 357 SmallVector<StringRef> LinkerArgs({LinkerPath}); 358 for (StringRef Arg : NewLinkerArgs) 359 LinkerArgs.push_back(Arg); 360 if (Error Err = executeCommands(LinkerPath, LinkerArgs)) 361 return Err; 362 363 if (Args.hasArg(OPT_relocatable)) 364 return relocateOffloadSection(Args, ExecutableName); 365 366 return Error::success(); 367 } 368 369 void printVersion(raw_ostream &OS) { 370 OS << clang::getClangToolFullVersion("clang-linker-wrapper") << '\n'; 371 } 372 373 namespace nvptx { 374 Expected<StringRef> 375 fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, 376 const ArgList &Args) { 377 llvm::TimeTraceScope TimeScope("NVPTX fatbinary"); 378 // NVPTX uses the fatbinary program to bundle the linked images. 379 Expected<std::string> FatBinaryPath = 380 findProgram("fatbinary", {CudaBinaryPath + "/bin"}); 381 if (!FatBinaryPath) 382 return FatBinaryPath.takeError(); 383 384 llvm::Triple Triple( 385 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); 386 387 // Create a new file to write the linked device image to. 388 auto TempFileOrErr = 389 createOutputFile(sys::path::filename(ExecutableName), "fatbin"); 390 if (!TempFileOrErr) 391 return TempFileOrErr.takeError(); 392 393 SmallVector<StringRef, 16> CmdArgs; 394 CmdArgs.push_back(*FatBinaryPath); 395 CmdArgs.push_back(Triple.isArch64Bit() ? "-64" : "-32"); 396 CmdArgs.push_back("--create"); 397 CmdArgs.push_back(*TempFileOrErr); 398 for (const auto &[File, Arch] : InputFiles) 399 CmdArgs.push_back( 400 Args.MakeArgString("--image=profile=" + Arch + ",file=" + File)); 401 402 if (Error Err = executeCommands(*FatBinaryPath, CmdArgs)) 403 return std::move(Err); 404 405 return *TempFileOrErr; 406 } 407 } // namespace nvptx 408 409 namespace amdgcn { 410 Expected<StringRef> 411 fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, 412 const ArgList &Args) { 413 llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary"); 414 415 // AMDGPU uses the clang-offload-bundler to bundle the linked images. 416 Expected<std::string> OffloadBundlerPath = findProgram( 417 "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")}); 418 if (!OffloadBundlerPath) 419 return OffloadBundlerPath.takeError(); 420 421 llvm::Triple Triple( 422 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); 423 424 // Create a new file to write the linked device image to. 425 auto TempFileOrErr = 426 createOutputFile(sys::path::filename(ExecutableName), "hipfb"); 427 if (!TempFileOrErr) 428 return TempFileOrErr.takeError(); 429 430 BumpPtrAllocator Alloc; 431 StringSaver Saver(Alloc); 432 433 SmallVector<StringRef, 16> CmdArgs; 434 CmdArgs.push_back(*OffloadBundlerPath); 435 CmdArgs.push_back("-type=o"); 436 CmdArgs.push_back("-bundle-align=4096"); 437 438 if (Args.hasArg(OPT_compress)) 439 CmdArgs.push_back("-compress"); 440 if (auto *Arg = Args.getLastArg(OPT_compression_level_eq)) 441 CmdArgs.push_back( 442 Args.MakeArgString(Twine("-compression-level=") + Arg->getValue())); 443 444 SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux-gnu"}; 445 for (const auto &[File, Arch] : InputFiles) 446 Targets.push_back(Saver.save("hip-amdgcn-amd-amdhsa--" + Arch)); 447 CmdArgs.push_back(Saver.save(llvm::join(Targets, ","))); 448 449 #ifdef _WIN32 450 CmdArgs.push_back("-input=NUL"); 451 #else 452 CmdArgs.push_back("-input=/dev/null"); 453 #endif 454 for (const auto &[File, Arch] : InputFiles) 455 CmdArgs.push_back(Saver.save("-input=" + File)); 456 457 CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr)); 458 459 if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs)) 460 return std::move(Err); 461 462 return *TempFileOrErr; 463 } 464 } // namespace amdgcn 465 466 namespace generic { 467 Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { 468 llvm::TimeTraceScope TimeScope("Clang"); 469 // Use `clang` to invoke the appropriate device tools. 470 Expected<std::string> ClangPath = 471 findProgram("clang", {getMainExecutable("clang")}); 472 if (!ClangPath) 473 return ClangPath.takeError(); 474 475 const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); 476 StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); 477 if (Arch.empty()) 478 Arch = "native"; 479 // Create a new file to write the linked device image to. Assume that the 480 // input filename already has the device and architecture. 481 auto TempFileOrErr = 482 createOutputFile(sys::path::filename(ExecutableName) + "." + 483 Triple.getArchName() + "." + Arch, 484 "img"); 485 if (!TempFileOrErr) 486 return TempFileOrErr.takeError(); 487 488 StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); 489 SmallVector<StringRef, 16> CmdArgs{ 490 *ClangPath, 491 "--no-default-config", 492 "-o", 493 *TempFileOrErr, 494 Args.MakeArgString("--target=" + Triple.getTriple()), 495 Triple.isAMDGPU() ? Args.MakeArgString("-mcpu=" + Arch) 496 : Args.MakeArgString("-march=" + Arch), 497 Args.MakeArgString("-" + OptLevel), 498 }; 499 500 // Forward all of the `--offload-opt` and similar options to the device. 501 CmdArgs.push_back("-flto"); 502 for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) 503 CmdArgs.append( 504 {"-Xlinker", 505 Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); 506 507 if (!Triple.isNVPTX() && !Triple.isSPIRV()) 508 CmdArgs.push_back("-Wl,--no-undefined"); 509 510 for (StringRef InputFile : InputFiles) 511 CmdArgs.push_back(InputFile); 512 513 // If this is CPU offloading we copy the input libraries. 514 if (!Triple.isAMDGPU() && !Triple.isNVPTX() && !Triple.isSPIRV()) { 515 CmdArgs.push_back("-Wl,-Bsymbolic"); 516 CmdArgs.push_back("-shared"); 517 ArgStringList LinkerArgs; 518 for (const opt::Arg *Arg : 519 Args.filtered(OPT_INPUT, OPT_library, OPT_library_path, OPT_rpath, 520 OPT_whole_archive, OPT_no_whole_archive)) { 521 // Sometimes needed libraries are passed by name, such as when using 522 // sanitizers. We need to check the file magic for any libraries. 523 if (Arg->getOption().matches(OPT_INPUT)) { 524 if (!sys::fs::exists(Arg->getValue()) || 525 sys::fs::is_directory(Arg->getValue())) 526 continue; 527 528 file_magic Magic; 529 if (auto EC = identify_magic(Arg->getValue(), Magic)) 530 return createStringError("Failed to open %s", Arg->getValue()); 531 if (Magic != file_magic::archive && 532 Magic != file_magic::elf_shared_object) 533 continue; 534 } 535 if (Arg->getOption().matches(OPT_whole_archive)) 536 LinkerArgs.push_back(Args.MakeArgString("-Wl,--whole-archive")); 537 else if (Arg->getOption().matches(OPT_no_whole_archive)) 538 LinkerArgs.push_back(Args.MakeArgString("-Wl,--no-whole-archive")); 539 else 540 Arg->render(Args, LinkerArgs); 541 } 542 llvm::copy(LinkerArgs, std::back_inserter(CmdArgs)); 543 } 544 545 // Pass on -mllvm options to the linker invocation. 546 for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) 547 CmdArgs.append({"-Xlinker", Args.MakeArgString( 548 "-mllvm=" + StringRef(Arg->getValue()))}); 549 550 if (Args.hasArg(OPT_debug)) 551 CmdArgs.push_back("-g"); 552 553 if (SaveTemps) 554 CmdArgs.push_back("-save-temps"); 555 556 if (SaveTemps && linkerSupportsLTO(Args)) 557 CmdArgs.push_back("-Wl,--save-temps"); 558 559 if (Args.hasArg(OPT_embed_bitcode)) 560 CmdArgs.push_back("-Wl,--lto-emit-llvm"); 561 562 if (Verbose) 563 CmdArgs.push_back("-v"); 564 565 if (!CudaBinaryPath.empty()) 566 CmdArgs.push_back(Args.MakeArgString("--cuda-path=" + CudaBinaryPath)); 567 568 for (StringRef Arg : Args.getAllArgValues(OPT_ptxas_arg)) 569 llvm::copy( 570 SmallVector<StringRef>({"-Xcuda-ptxas", Args.MakeArgString(Arg)}), 571 std::back_inserter(CmdArgs)); 572 573 for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) 574 CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)}); 575 for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ)) 576 CmdArgs.push_back(Args.MakeArgString(Arg)); 577 578 if (Error Err = executeCommands(*ClangPath, CmdArgs)) 579 return std::move(Err); 580 581 return *TempFileOrErr; 582 } 583 } // namespace generic 584 585 Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, 586 const ArgList &Args) { 587 const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); 588 switch (Triple.getArch()) { 589 case Triple::nvptx: 590 case Triple::nvptx64: 591 case Triple::amdgcn: 592 case Triple::x86: 593 case Triple::x86_64: 594 case Triple::aarch64: 595 case Triple::aarch64_be: 596 case Triple::ppc64: 597 case Triple::ppc64le: 598 case Triple::spirv64: 599 case Triple::systemz: 600 case Triple::loongarch64: 601 return generic::clang(InputFiles, Args); 602 default: 603 return createStringError(Triple.getArchName() + 604 " linking is not supported"); 605 } 606 } 607 608 Expected<StringRef> writeOffloadFile(const OffloadFile &File) { 609 const OffloadBinary &Binary = *File.getBinary(); 610 611 StringRef Prefix = 612 sys::path::stem(Binary.getMemoryBufferRef().getBufferIdentifier()); 613 614 auto TempFileOrErr = createOutputFile( 615 Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), "o"); 616 if (!TempFileOrErr) 617 return TempFileOrErr.takeError(); 618 619 Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr = 620 FileOutputBuffer::create(*TempFileOrErr, Binary.getImage().size()); 621 if (!OutputOrErr) 622 return OutputOrErr.takeError(); 623 std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr); 624 llvm::copy(Binary.getImage(), Output->getBufferStart()); 625 if (Error E = Output->commit()) 626 return std::move(E); 627 628 return *TempFileOrErr; 629 } 630 631 // Compile the module to an object file using the appropriate target machine for 632 // the host triple. 633 Expected<StringRef> compileModule(Module &M, OffloadKind Kind) { 634 llvm::TimeTraceScope TimeScope("Compile module"); 635 std::string Msg; 636 const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg); 637 if (!T) 638 return createStringError(Msg); 639 640 auto Options = 641 codegen::InitTargetOptionsFromCodeGenFlags(Triple(M.getTargetTriple())); 642 StringRef CPU = ""; 643 StringRef Features = ""; 644 std::unique_ptr<TargetMachine> TM( 645 T->createTargetMachine(M.getTargetTriple(), CPU, Features, Options, 646 Reloc::PIC_, M.getCodeModel())); 647 648 if (M.getDataLayout().isDefault()) 649 M.setDataLayout(TM->createDataLayout()); 650 651 int FD = -1; 652 auto TempFileOrErr = 653 createOutputFile(sys::path::filename(ExecutableName) + "." + 654 getOffloadKindName(Kind) + ".image.wrapper", 655 "o"); 656 if (!TempFileOrErr) 657 return TempFileOrErr.takeError(); 658 if (std::error_code EC = sys::fs::openFileForWrite(*TempFileOrErr, FD)) 659 return errorCodeToError(EC); 660 661 auto OS = std::make_unique<llvm::raw_fd_ostream>(FD, true); 662 663 legacy::PassManager CodeGenPasses; 664 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple())); 665 CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); 666 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr, 667 CodeGenFileType::ObjectFile)) 668 return createStringError("Failed to execute host backend"); 669 CodeGenPasses.run(M); 670 671 return *TempFileOrErr; 672 } 673 674 /// Creates the object file containing the device image and runtime 675 /// registration code from the device images stored in \p Images. 676 Expected<StringRef> 677 wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers, 678 const ArgList &Args, OffloadKind Kind) { 679 llvm::TimeTraceScope TimeScope("Wrap bundled images"); 680 681 SmallVector<ArrayRef<char>, 4> BuffersToWrap; 682 for (const auto &Buffer : Buffers) 683 BuffersToWrap.emplace_back( 684 ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize())); 685 686 LLVMContext Context; 687 Module M("offload.wrapper.module", Context); 688 M.setTargetTriple( 689 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); 690 691 switch (Kind) { 692 case OFK_OpenMP: 693 if (Error Err = offloading::wrapOpenMPBinaries( 694 M, BuffersToWrap, 695 offloading::getOffloadEntryArray(M, "omp_offloading_entries"), 696 /*Suffix=*/"", /*Relocatable=*/Args.hasArg(OPT_relocatable))) 697 return std::move(Err); 698 break; 699 case OFK_Cuda: 700 if (Error Err = offloading::wrapCudaBinary( 701 M, BuffersToWrap.front(), 702 offloading::getOffloadEntryArray(M, "cuda_offloading_entries"), 703 /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false)) 704 return std::move(Err); 705 break; 706 case OFK_HIP: 707 if (Error Err = offloading::wrapHIPBinary( 708 M, BuffersToWrap.front(), 709 offloading::getOffloadEntryArray(M, "hip_offloading_entries"))) 710 return std::move(Err); 711 break; 712 default: 713 return createStringError(getOffloadKindName(Kind) + 714 " wrapping is not supported"); 715 } 716 717 if (Args.hasArg(OPT_print_wrapped_module)) 718 errs() << M; 719 if (Args.hasArg(OPT_save_temps)) { 720 int FD = -1; 721 auto TempFileOrErr = 722 createOutputFile(sys::path::filename(ExecutableName) + "." + 723 getOffloadKindName(Kind) + ".image.wrapper", 724 "bc"); 725 if (!TempFileOrErr) 726 return TempFileOrErr.takeError(); 727 if (std::error_code EC = sys::fs::openFileForWrite(*TempFileOrErr, FD)) 728 return errorCodeToError(EC); 729 llvm::raw_fd_ostream OS(FD, true); 730 WriteBitcodeToFile(M, OS); 731 } 732 733 auto FileOrErr = compileModule(M, Kind); 734 if (!FileOrErr) 735 return FileOrErr.takeError(); 736 return *FileOrErr; 737 } 738 739 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> 740 bundleOpenMP(ArrayRef<OffloadingImage> Images) { 741 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers; 742 for (const OffloadingImage &Image : Images) 743 Buffers.emplace_back( 744 MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image))); 745 746 return std::move(Buffers); 747 } 748 749 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> 750 bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) { 751 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles; 752 for (const OffloadingImage &Image : Images) 753 InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(), 754 Image.StringData.lookup("arch"))); 755 756 Triple TheTriple = Triple(Images.front().StringData.lookup("triple")); 757 auto FileOrErr = nvptx::fatbinary(InputFiles, Args); 758 if (!FileOrErr) 759 return FileOrErr.takeError(); 760 761 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError = 762 llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr); 763 764 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers; 765 if (std::error_code EC = ImageOrError.getError()) 766 return createFileError(*FileOrErr, EC); 767 Buffers.emplace_back(std::move(*ImageOrError)); 768 769 return std::move(Buffers); 770 } 771 772 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> 773 bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) { 774 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles; 775 for (const OffloadingImage &Image : Images) 776 InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(), 777 Image.StringData.lookup("arch"))); 778 779 Triple TheTriple = Triple(Images.front().StringData.lookup("triple")); 780 auto FileOrErr = amdgcn::fatbinary(InputFiles, Args); 781 if (!FileOrErr) 782 return FileOrErr.takeError(); 783 784 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError = 785 llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr); 786 787 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers; 788 if (std::error_code EC = ImageOrError.getError()) 789 return createFileError(*FileOrErr, EC); 790 Buffers.emplace_back(std::move(*ImageOrError)); 791 792 return std::move(Buffers); 793 } 794 795 /// Transforms the input \p Images into the binary format the runtime expects 796 /// for the given \p Kind. 797 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> 798 bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args, 799 OffloadKind Kind) { 800 llvm::TimeTraceScope TimeScope("Bundle linked output"); 801 switch (Kind) { 802 case OFK_OpenMP: 803 return bundleOpenMP(Images); 804 case OFK_Cuda: 805 return bundleCuda(Images, Args); 806 case OFK_HIP: 807 return bundleHIP(Images, Args); 808 default: 809 return createStringError(getOffloadKindName(Kind) + 810 " bundling is not supported"); 811 } 812 } 813 814 /// Returns a new ArgList containg arguments used for the device linking phase. 815 DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input, 816 const InputArgList &Args) { 817 DerivedArgList DAL = DerivedArgList(DerivedArgList(Args)); 818 for (Arg *A : Args) 819 DAL.append(A); 820 821 // Set the subarchitecture and target triple for this compilation. 822 const OptTable &Tbl = getOptTable(); 823 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_arch_EQ), 824 Args.MakeArgString(Input.front().getBinary()->getArch())); 825 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_triple_EQ), 826 Args.MakeArgString(Input.front().getBinary()->getTriple())); 827 828 // If every input file is bitcode we have whole program visibility as we 829 // do only support static linking with bitcode. 830 auto ContainsBitcode = [](const OffloadFile &F) { 831 return identify_magic(F.getBinary()->getImage()) == file_magic::bitcode; 832 }; 833 if (llvm::all_of(Input, ContainsBitcode)) 834 DAL.AddFlagArg(nullptr, Tbl.getOption(OPT_whole_program)); 835 836 // Forward '-Xoffload-linker' options to the appropriate backend. 837 for (StringRef Arg : Args.getAllArgValues(OPT_device_linker_args_EQ)) { 838 auto [Triple, Value] = Arg.split('='); 839 llvm::Triple TT(Triple); 840 // If this isn't a recognized triple then it's an `arg=value` option. 841 if (TT.getArch() == Triple::ArchType::UnknownArch) 842 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ), 843 Args.MakeArgString(Arg)); 844 else if (Value.empty()) 845 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ), 846 Args.MakeArgString(Triple)); 847 else if (Triple == DAL.getLastArgValue(OPT_triple_EQ)) 848 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_linker_arg_EQ), 849 Args.MakeArgString(Value)); 850 } 851 852 // Forward '-Xoffload-compiler' options to the appropriate backend. 853 for (StringRef Arg : Args.getAllArgValues(OPT_device_compiler_args_EQ)) { 854 auto [Triple, Value] = Arg.split('='); 855 llvm::Triple TT(Triple); 856 // If this isn't a recognized triple then it's an `arg=value` option. 857 if (TT.getArch() == Triple::ArchType::UnknownArch) 858 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_compiler_arg_EQ), 859 Args.MakeArgString(Arg)); 860 else if (Value.empty()) 861 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_compiler_arg_EQ), 862 Args.MakeArgString(Triple)); 863 else if (Triple == DAL.getLastArgValue(OPT_triple_EQ)) 864 DAL.AddJoinedArg(nullptr, Tbl.getOption(OPT_compiler_arg_EQ), 865 Args.MakeArgString(Value)); 866 } 867 868 return DAL; 869 } 870 871 Error handleOverrideImages( 872 const InputArgList &Args, 873 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> &Images) { 874 for (StringRef Arg : Args.getAllArgValues(OPT_override_image)) { 875 OffloadKind Kind = getOffloadKind(Arg.split("=").first); 876 StringRef Filename = Arg.split("=").second; 877 878 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 879 MemoryBuffer::getFileOrSTDIN(Filename); 880 if (std::error_code EC = BufferOrErr.getError()) 881 return createFileError(Filename, EC); 882 883 Expected<std::unique_ptr<ObjectFile>> ElfOrErr = 884 ObjectFile::createELFObjectFile(**BufferOrErr, 885 /*InitContent=*/false); 886 if (!ElfOrErr) 887 return ElfOrErr.takeError(); 888 ObjectFile &Elf = **ElfOrErr; 889 890 OffloadingImage TheImage{}; 891 TheImage.TheImageKind = IMG_Object; 892 TheImage.TheOffloadKind = Kind; 893 TheImage.StringData["triple"] = 894 Args.MakeArgString(Elf.makeTriple().getTriple()); 895 if (std::optional<StringRef> CPU = Elf.tryGetCPUName()) 896 TheImage.StringData["arch"] = Args.MakeArgString(*CPU); 897 TheImage.Image = std::move(*BufferOrErr); 898 899 Images[Kind].emplace_back(std::move(TheImage)); 900 } 901 return Error::success(); 902 } 903 904 /// Transforms all the extracted offloading input files into an image that can 905 /// be registered by the runtime. 906 Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles( 907 SmallVectorImpl<SmallVector<OffloadFile>> &LinkerInputFiles, 908 const InputArgList &Args, char **Argv, int Argc) { 909 llvm::TimeTraceScope TimeScope("Handle all device input"); 910 911 std::mutex ImageMtx; 912 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> Images; 913 914 // Initialize the images with any overriding inputs. 915 if (Args.hasArg(OPT_override_image)) 916 if (Error Err = handleOverrideImages(Args, Images)) 917 return std::move(Err); 918 919 auto Err = parallelForEachError(LinkerInputFiles, [&](auto &Input) -> Error { 920 llvm::TimeTraceScope TimeScope("Link device input"); 921 922 // Each thread needs its own copy of the base arguments to maintain 923 // per-device argument storage of synthetic strings. 924 const OptTable &Tbl = getOptTable(); 925 BumpPtrAllocator Alloc; 926 StringSaver Saver(Alloc); 927 auto BaseArgs = 928 Tbl.parseArgs(Argc, Argv, OPT_INVALID, Saver, [](StringRef Err) { 929 reportError(createStringError(Err)); 930 }); 931 auto LinkerArgs = getLinkerArgs(Input, BaseArgs); 932 933 DenseSet<OffloadKind> ActiveOffloadKinds; 934 for (const auto &File : Input) 935 if (File.getBinary()->getOffloadKind() != OFK_None) 936 ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind()); 937 938 // Write any remaining device inputs to an output file. 939 SmallVector<StringRef> InputFiles; 940 for (const OffloadFile &File : Input) { 941 auto FileNameOrErr = writeOffloadFile(File); 942 if (!FileNameOrErr) 943 return FileNameOrErr.takeError(); 944 InputFiles.emplace_back(*FileNameOrErr); 945 } 946 947 // Link the remaining device files using the device linker. 948 auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); 949 if (!OutputOrErr) 950 return OutputOrErr.takeError(); 951 952 // Store the offloading image for each linked output file. 953 for (OffloadKind Kind : ActiveOffloadKinds) { 954 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr = 955 llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr); 956 if (std::error_code EC = FileOrErr.getError()) { 957 if (DryRun) 958 FileOrErr = MemoryBuffer::getMemBuffer(""); 959 else 960 return createFileError(*OutputOrErr, EC); 961 } 962 963 std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx); 964 OffloadingImage TheImage{}; 965 TheImage.TheImageKind = 966 Args.hasArg(OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object; 967 TheImage.TheOffloadKind = Kind; 968 TheImage.StringData["triple"] = 969 Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_triple_EQ)); 970 TheImage.StringData["arch"] = 971 Args.MakeArgString(LinkerArgs.getLastArgValue(OPT_arch_EQ)); 972 TheImage.Image = std::move(*FileOrErr); 973 974 Images[Kind].emplace_back(std::move(TheImage)); 975 } 976 return Error::success(); 977 }); 978 if (Err) 979 return std::move(Err); 980 981 // Create a binary image of each offloading image and embed it into a new 982 // object file. 983 SmallVector<StringRef> WrappedOutput; 984 for (auto &[Kind, Input] : Images) { 985 // We sort the entries before bundling so they appear in a deterministic 986 // order in the final binary. 987 llvm::sort(Input, [](OffloadingImage &A, OffloadingImage &B) { 988 return A.StringData["triple"] > B.StringData["triple"] || 989 A.StringData["arch"] > B.StringData["arch"] || 990 A.TheOffloadKind < B.TheOffloadKind; 991 }); 992 auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind); 993 if (!BundledImagesOrErr) 994 return BundledImagesOrErr.takeError(); 995 auto OutputOrErr = wrapDeviceImages(*BundledImagesOrErr, Args, Kind); 996 if (!OutputOrErr) 997 return OutputOrErr.takeError(); 998 WrappedOutput.push_back(*OutputOrErr); 999 } 1000 1001 return WrappedOutput; 1002 } 1003 1004 std::optional<std::string> findFile(StringRef Dir, StringRef Root, 1005 const Twine &Name) { 1006 SmallString<128> Path; 1007 if (Dir.starts_with("=")) 1008 sys::path::append(Path, Root, Dir.substr(1), Name); 1009 else 1010 sys::path::append(Path, Dir, Name); 1011 1012 if (sys::fs::exists(Path)) 1013 return static_cast<std::string>(Path); 1014 return std::nullopt; 1015 } 1016 1017 std::optional<std::string> 1018 findFromSearchPaths(StringRef Name, StringRef Root, 1019 ArrayRef<StringRef> SearchPaths) { 1020 for (StringRef Dir : SearchPaths) 1021 if (std::optional<std::string> File = findFile(Dir, Root, Name)) 1022 return File; 1023 return std::nullopt; 1024 } 1025 1026 std::optional<std::string> 1027 searchLibraryBaseName(StringRef Name, StringRef Root, 1028 ArrayRef<StringRef> SearchPaths) { 1029 for (StringRef Dir : SearchPaths) { 1030 if (std::optional<std::string> File = 1031 findFile(Dir, Root, "lib" + Name + ".so")) 1032 return File; 1033 if (std::optional<std::string> File = 1034 findFile(Dir, Root, "lib" + Name + ".a")) 1035 return File; 1036 } 1037 return std::nullopt; 1038 } 1039 1040 /// Search for static libraries in the linker's library path given input like 1041 /// `-lfoo` or `-l:libfoo.a`. 1042 std::optional<std::string> searchLibrary(StringRef Input, StringRef Root, 1043 ArrayRef<StringRef> SearchPaths) { 1044 if (Input.starts_with(":") || Input.ends_with(".lib")) 1045 return findFromSearchPaths(Input.drop_front(), Root, SearchPaths); 1046 return searchLibraryBaseName(Input, Root, SearchPaths); 1047 } 1048 1049 /// Common redeclaration of needed symbol flags. 1050 enum Symbol : uint32_t { 1051 Sym_None = 0, 1052 Sym_Undefined = 1U << 1, 1053 Sym_Weak = 1U << 2, 1054 }; 1055 1056 /// Scan the symbols from a BitcodeFile \p Buffer and record if we need to 1057 /// extract any symbols from it. 1058 Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind, 1059 bool IsArchive, StringSaver &Saver, 1060 DenseMap<StringRef, Symbol> &Syms) { 1061 Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer); 1062 if (!IRSymtabOrErr) 1063 return IRSymtabOrErr.takeError(); 1064 1065 bool ShouldExtract = !IsArchive; 1066 DenseMap<StringRef, Symbol> TmpSyms; 1067 for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { 1068 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { 1069 if (Sym.isFormatSpecific() || !Sym.isGlobal()) 1070 continue; 1071 1072 bool NewSymbol = Syms.count(Sym.getName()) == 0; 1073 auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()]; 1074 1075 // We will extract if it defines a currenlty undefined non-weak 1076 // symbol. 1077 bool ResolvesStrongReference = 1078 ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) && 1079 !Sym.isUndefined()); 1080 // We will extract if it defines a new global symbol visible to the 1081 // host. This is only necessary for code targeting an offloading 1082 // language. 1083 bool NewGlobalSymbol = 1084 ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() && 1085 !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None && 1086 (Sym.getVisibility() != GlobalValue::HiddenVisibility)); 1087 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; 1088 1089 // Update this symbol in the "table" with the new information. 1090 if (OldSym & Sym_Undefined && !Sym.isUndefined()) 1091 TmpSyms[Saver.save(Sym.getName())] = 1092 static_cast<Symbol>(OldSym & ~Sym_Undefined); 1093 if (Sym.isUndefined() && NewSymbol) 1094 TmpSyms[Saver.save(Sym.getName())] = 1095 static_cast<Symbol>(OldSym | Sym_Undefined); 1096 if (Sym.isWeak()) 1097 TmpSyms[Saver.save(Sym.getName())] = 1098 static_cast<Symbol>(OldSym | Sym_Weak); 1099 } 1100 } 1101 1102 // If the file gets extracted we update the table with the new symbols. 1103 if (ShouldExtract) 1104 Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); 1105 1106 return ShouldExtract; 1107 } 1108 1109 /// Scan the symbols from an ObjectFile \p Obj and record if we need to extract 1110 /// any symbols from it. 1111 Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind, 1112 bool IsArchive, StringSaver &Saver, 1113 DenseMap<StringRef, Symbol> &Syms) { 1114 bool ShouldExtract = !IsArchive; 1115 DenseMap<StringRef, Symbol> TmpSyms; 1116 for (SymbolRef Sym : Obj.symbols()) { 1117 auto FlagsOrErr = Sym.getFlags(); 1118 if (!FlagsOrErr) 1119 return FlagsOrErr.takeError(); 1120 1121 if (!(*FlagsOrErr & SymbolRef::SF_Global) || 1122 (*FlagsOrErr & SymbolRef::SF_FormatSpecific)) 1123 continue; 1124 1125 auto NameOrErr = Sym.getName(); 1126 if (!NameOrErr) 1127 return NameOrErr.takeError(); 1128 1129 bool NewSymbol = Syms.count(*NameOrErr) == 0; 1130 auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr]; 1131 1132 // We will extract if it defines a currenlty undefined non-weak symbol. 1133 bool ResolvesStrongReference = (OldSym & Sym_Undefined) && 1134 !(OldSym & Sym_Weak) && 1135 !(*FlagsOrErr & SymbolRef::SF_Undefined); 1136 1137 // We will extract if it defines a new global symbol visible to the 1138 // host. This is only necessary for code targeting an offloading 1139 // language. 1140 bool NewGlobalSymbol = 1141 ((NewSymbol || (OldSym & Sym_Undefined)) && 1142 !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None && 1143 !(*FlagsOrErr & SymbolRef::SF_Hidden)); 1144 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol; 1145 1146 // Update this symbol in the "table" with the new information. 1147 if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined)) 1148 TmpSyms[Saver.save(*NameOrErr)] = 1149 static_cast<Symbol>(OldSym & ~Sym_Undefined); 1150 if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol) 1151 TmpSyms[Saver.save(*NameOrErr)] = 1152 static_cast<Symbol>(OldSym | Sym_Undefined); 1153 if (*FlagsOrErr & SymbolRef::SF_Weak) 1154 TmpSyms[Saver.save(*NameOrErr)] = static_cast<Symbol>(OldSym | Sym_Weak); 1155 } 1156 1157 // If the file gets extracted we update the table with the new symbols. 1158 if (ShouldExtract) 1159 Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); 1160 1161 return ShouldExtract; 1162 } 1163 1164 /// Attempt to 'resolve' symbols found in input files. We use this to 1165 /// determine if an archive member needs to be extracted. An archive member 1166 /// will be extracted if any of the following is true. 1167 /// 1) It defines an undefined symbol in a regular object filie. 1168 /// 2) It defines a global symbol without hidden visibility that has not 1169 /// yet been defined. 1170 Expected<bool> getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive, 1171 StringSaver &Saver, 1172 DenseMap<StringRef, Symbol> &Syms) { 1173 MemoryBufferRef Buffer = MemoryBufferRef(Image, ""); 1174 switch (identify_magic(Image)) { 1175 case file_magic::bitcode: 1176 return getSymbolsFromBitcode(Buffer, Kind, IsArchive, Saver, Syms); 1177 case file_magic::elf_relocatable: { 1178 Expected<std::unique_ptr<ObjectFile>> ObjFile = 1179 ObjectFile::createObjectFile(Buffer); 1180 if (!ObjFile) 1181 return ObjFile.takeError(); 1182 return getSymbolsFromObject(**ObjFile, Kind, IsArchive, Saver, Syms); 1183 } 1184 default: 1185 return false; 1186 } 1187 } 1188 1189 /// Search the input files and libraries for embedded device offloading code 1190 /// and add it to the list of files to be linked. Files coming from static 1191 /// libraries are only added to the input if they are used by an existing 1192 /// input file. Returns a list of input files intended for a single linking job. 1193 Expected<SmallVector<SmallVector<OffloadFile>>> 1194 getDeviceInput(const ArgList &Args) { 1195 llvm::TimeTraceScope TimeScope("ExtractDeviceCode"); 1196 1197 // Skip all the input if the user is overriding the output. 1198 if (Args.hasArg(OPT_override_image)) 1199 return SmallVector<SmallVector<OffloadFile>>(); 1200 1201 StringRef Root = Args.getLastArgValue(OPT_sysroot_EQ); 1202 SmallVector<StringRef> LibraryPaths; 1203 for (const opt::Arg *Arg : Args.filtered(OPT_library_path, OPT_libpath)) 1204 LibraryPaths.push_back(Arg->getValue()); 1205 1206 BumpPtrAllocator Alloc; 1207 StringSaver Saver(Alloc); 1208 1209 // Try to extract device code from the linker input files. 1210 bool WholeArchive = Args.hasArg(OPT_wholearchive_flag) ? true : false; 1211 SmallVector<OffloadFile> ObjectFilesToExtract; 1212 SmallVector<OffloadFile> ArchiveFilesToExtract; 1213 for (const opt::Arg *Arg : Args.filtered( 1214 OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) { 1215 if (Arg->getOption().matches(OPT_whole_archive) || 1216 Arg->getOption().matches(OPT_no_whole_archive)) { 1217 WholeArchive = Arg->getOption().matches(OPT_whole_archive); 1218 continue; 1219 } 1220 1221 std::optional<std::string> Filename = 1222 Arg->getOption().matches(OPT_library) 1223 ? searchLibrary(Arg->getValue(), Root, LibraryPaths) 1224 : std::string(Arg->getValue()); 1225 1226 if (!Filename && Arg->getOption().matches(OPT_library)) 1227 reportError( 1228 createStringError("unable to find library -l%s", Arg->getValue())); 1229 1230 if (!Filename || !sys::fs::exists(*Filename) || 1231 sys::fs::is_directory(*Filename)) 1232 continue; 1233 1234 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 1235 MemoryBuffer::getFileOrSTDIN(*Filename); 1236 if (std::error_code EC = BufferOrErr.getError()) 1237 return createFileError(*Filename, EC); 1238 1239 MemoryBufferRef Buffer = **BufferOrErr; 1240 if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object) 1241 continue; 1242 1243 SmallVector<OffloadFile> Binaries; 1244 if (Error Err = extractOffloadBinaries(Buffer, Binaries)) 1245 return std::move(Err); 1246 1247 for (auto &OffloadFile : Binaries) { 1248 if (identify_magic(Buffer.getBuffer()) == file_magic::archive && 1249 !WholeArchive) 1250 ArchiveFilesToExtract.emplace_back(std::move(OffloadFile)); 1251 else 1252 ObjectFilesToExtract.emplace_back(std::move(OffloadFile)); 1253 } 1254 } 1255 1256 // Link all standard input files and update the list of symbols. 1257 MapVector<OffloadFile::TargetID, SmallVector<OffloadFile, 0>> InputFiles; 1258 DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms; 1259 for (OffloadFile &Binary : ObjectFilesToExtract) { 1260 if (!Binary.getBinary()) 1261 continue; 1262 1263 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary}; 1264 for (const auto &[ID, Input] : InputFiles) 1265 if (object::areTargetsCompatible(Binary, ID)) 1266 CompatibleTargets.emplace_back(ID); 1267 1268 for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { 1269 Expected<bool> ExtractOrErr = getSymbols( 1270 Binary.getBinary()->getImage(), Binary.getBinary()->getOffloadKind(), 1271 /*IsArchive=*/false, Saver, Syms[ID]); 1272 if (!ExtractOrErr) 1273 return ExtractOrErr.takeError(); 1274 1275 // If another target needs this binary it must be copied instead. 1276 if (Index == CompatibleTargets.size() - 1) 1277 InputFiles[ID].emplace_back(std::move(Binary)); 1278 else 1279 InputFiles[ID].emplace_back(Binary.copy()); 1280 } 1281 } 1282 1283 // Archive members only extract if they define needed symbols. We do this 1284 // after every regular input file so that libraries may be included out of 1285 // order. This follows 'ld.lld' semantics which are more lenient. 1286 bool Extracted = true; 1287 while (Extracted) { 1288 Extracted = false; 1289 for (OffloadFile &Binary : ArchiveFilesToExtract) { 1290 // If the binary was previously extracted it will be set to null. 1291 if (!Binary.getBinary()) 1292 continue; 1293 1294 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary}; 1295 for (const auto &[ID, Input] : InputFiles) 1296 if (object::areTargetsCompatible(Binary, ID)) 1297 CompatibleTargets.emplace_back(ID); 1298 1299 for (const auto &[Index, ID] : llvm::enumerate(CompatibleTargets)) { 1300 // Only extract an if we have an an object matching this target. 1301 if (!InputFiles.count(ID)) 1302 continue; 1303 1304 Expected<bool> ExtractOrErr = 1305 getSymbols(Binary.getBinary()->getImage(), 1306 Binary.getBinary()->getOffloadKind(), 1307 /*IsArchive=*/true, Saver, Syms[ID]); 1308 if (!ExtractOrErr) 1309 return ExtractOrErr.takeError(); 1310 1311 Extracted = *ExtractOrErr; 1312 1313 // Skip including the file if it is an archive that does not resolve 1314 // any symbols. 1315 if (!Extracted) 1316 continue; 1317 1318 // If another target needs this binary it must be copied instead. 1319 if (Index == CompatibleTargets.size() - 1) 1320 InputFiles[ID].emplace_back(std::move(Binary)); 1321 else 1322 InputFiles[ID].emplace_back(Binary.copy()); 1323 } 1324 1325 // If we extracted any files we need to check all the symbols again. 1326 if (Extracted) 1327 break; 1328 } 1329 } 1330 1331 SmallVector<SmallVector<OffloadFile>> InputsForTarget; 1332 for (auto &[ID, Input] : InputFiles) 1333 InputsForTarget.emplace_back(std::move(Input)); 1334 1335 return std::move(InputsForTarget); 1336 } 1337 1338 } // namespace 1339 1340 int main(int Argc, char **Argv) { 1341 InitLLVM X(Argc, Argv); 1342 InitializeAllTargetInfos(); 1343 InitializeAllTargets(); 1344 InitializeAllTargetMCs(); 1345 InitializeAllAsmParsers(); 1346 InitializeAllAsmPrinters(); 1347 1348 LinkerExecutable = Argv[0]; 1349 sys::PrintStackTraceOnErrorSignal(Argv[0]); 1350 1351 const OptTable &Tbl = getOptTable(); 1352 BumpPtrAllocator Alloc; 1353 StringSaver Saver(Alloc); 1354 auto Args = Tbl.parseArgs(Argc, Argv, OPT_INVALID, Saver, [&](StringRef Err) { 1355 reportError(createStringError(Err)); 1356 }); 1357 1358 if (Args.hasArg(OPT_help) || Args.hasArg(OPT_help_hidden)) { 1359 Tbl.printHelp( 1360 outs(), 1361 "clang-linker-wrapper [options] -- <options to passed to the linker>", 1362 "\nA wrapper utility over the host linker. It scans the input files\n" 1363 "for sections that require additional processing prior to linking.\n" 1364 "The will then transparently pass all arguments and input to the\n" 1365 "specified host linker to create the final binary.\n", 1366 Args.hasArg(OPT_help_hidden), Args.hasArg(OPT_help_hidden)); 1367 return EXIT_SUCCESS; 1368 } 1369 if (Args.hasArg(OPT_v)) { 1370 printVersion(outs()); 1371 return EXIT_SUCCESS; 1372 } 1373 1374 // This forwards '-mllvm' arguments to LLVM if present. 1375 SmallVector<const char *> NewArgv = {Argv[0]}; 1376 for (const opt::Arg *Arg : Args.filtered(OPT_mllvm)) 1377 NewArgv.push_back(Arg->getValue()); 1378 for (const opt::Arg *Arg : Args.filtered(OPT_offload_opt_eq_minus)) 1379 NewArgv.push_back(Arg->getValue()); 1380 SmallVector<PassPlugin, 1> PluginList; 1381 PassPlugins.setCallback([&](const std::string &PluginPath) { 1382 auto Plugin = PassPlugin::Load(PluginPath); 1383 if (!Plugin) 1384 report_fatal_error(Plugin.takeError(), /*gen_crash_diag=*/false); 1385 PluginList.emplace_back(Plugin.get()); 1386 }); 1387 cl::ParseCommandLineOptions(NewArgv.size(), &NewArgv[0]); 1388 1389 Verbose = Args.hasArg(OPT_verbose); 1390 DryRun = Args.hasArg(OPT_dry_run); 1391 SaveTemps = Args.hasArg(OPT_save_temps); 1392 CudaBinaryPath = Args.getLastArgValue(OPT_cuda_path_EQ).str(); 1393 1394 llvm::Triple Triple( 1395 Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); 1396 if (Args.hasArg(OPT_o)) 1397 ExecutableName = Args.getLastArgValue(OPT_o, "a.out"); 1398 else if (Args.hasArg(OPT_out)) 1399 ExecutableName = Args.getLastArgValue(OPT_out, "a.exe"); 1400 else 1401 ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out"; 1402 1403 parallel::strategy = hardware_concurrency(1); 1404 if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) { 1405 unsigned Threads = 0; 1406 if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0) 1407 reportError(createStringError("%s: expected a positive integer, got '%s'", 1408 Arg->getSpelling().data(), 1409 Arg->getValue())); 1410 parallel::strategy = hardware_concurrency(Threads); 1411 } 1412 1413 if (Args.hasArg(OPT_wrapper_time_trace_eq)) { 1414 unsigned Granularity; 1415 Args.getLastArgValue(OPT_wrapper_time_trace_granularity, "500") 1416 .getAsInteger(10, Granularity); 1417 timeTraceProfilerInitialize(Granularity, Argv[0]); 1418 } 1419 1420 { 1421 llvm::TimeTraceScope TimeScope("Execute linker wrapper"); 1422 1423 // Extract the device input files stored in the host fat binary. 1424 auto DeviceInputFiles = getDeviceInput(Args); 1425 if (!DeviceInputFiles) 1426 reportError(DeviceInputFiles.takeError()); 1427 1428 // Link and wrap the device images extracted from the linker input. 1429 auto FilesOrErr = 1430 linkAndWrapDeviceFiles(*DeviceInputFiles, Args, Argv, Argc); 1431 if (!FilesOrErr) 1432 reportError(FilesOrErr.takeError()); 1433 1434 // Run the host linking job with the rendered arguments. 1435 if (Error Err = runLinker(*FilesOrErr, Args)) 1436 reportError(std::move(Err)); 1437 } 1438 1439 if (const opt::Arg *Arg = Args.getLastArg(OPT_wrapper_time_trace_eq)) { 1440 if (Error Err = timeTraceProfilerWrite(Arg->getValue(), ExecutableName)) 1441 reportError(std::move(Err)); 1442 timeTraceProfilerCleanup(); 1443 } 1444 1445 // Remove the temporary files created. 1446 if (!SaveTemps) 1447 for (const auto &TempFile : TempFiles) 1448 if (std::error_code EC = sys::fs::remove(TempFile)) 1449 reportError(createFileError(TempFile, EC)); 1450 1451 return EXIT_SUCCESS; 1452 } 1453