xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/Driver/ToolChains/HIP.cpp (revision e038c9c4676b0f19b1b7dd08a940c6ed64a6d5ae)
1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HIP.h"
10 #include "AMDGPU.h"
11 #include "CommonArgs.h"
12 #include "InputInfo.h"
13 #include "clang/Basic/Cuda.h"
14 #include "clang/Basic/TargetID.h"
15 #include "clang/Driver/Compilation.h"
16 #include "clang/Driver/Driver.h"
17 #include "clang/Driver/DriverDiagnostic.h"
18 #include "clang/Driver/Options.h"
19 #include "llvm/Support/Alignment.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/TargetParser.h"
23 
24 using namespace clang::driver;
25 using namespace clang::driver::toolchains;
26 using namespace clang::driver::tools;
27 using namespace clang;
28 using namespace llvm::opt;
29 
30 #if defined(_WIN32) || defined(_WIN64)
31 #define NULL_FILE "nul"
32 #else
33 #define NULL_FILE "/dev/null"
34 #endif
35 
36 namespace {
37 const unsigned HIPCodeObjectAlign = 4096;
38 } // namespace
39 
constructLldCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args) const40 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
41                                           const InputInfoList &Inputs,
42                                           const InputInfo &Output,
43                                           const llvm::opt::ArgList &Args) const {
44   // Construct lld command.
45   // The output from ld.lld is an HSA code object file.
46   ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
47                         "-plugin-opt=-amdgpu-internalize-symbols"};
48 
49   auto &TC = getToolChain();
50   auto &D = TC.getDriver();
51   assert(!Inputs.empty() && "Must have at least one input.");
52   bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin;
53   addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO);
54 
55   // Extract all the -m options
56   std::vector<llvm::StringRef> Features;
57   amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features);
58 
59   // Add features to mattr such as cumode
60   std::string MAttrString = "-plugin-opt=-mattr=";
61   for (auto OneFeature : unifyTargetFeatures(Features)) {
62     MAttrString.append(Args.MakeArgString(OneFeature));
63     if (OneFeature != Features.back())
64       MAttrString.append(",");
65   }
66   if (!Features.empty())
67     LldArgs.push_back(Args.MakeArgString(MAttrString));
68 
69   // ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
70   // Since AMDGPU backend currently does not support ISA-level linking, all
71   // called functions need to be imported.
72   if (IsThinLTO)
73     LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all"));
74 
75   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
76     LldArgs.push_back(
77         Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0)));
78   }
79 
80   if (C.getDriver().isSaveTempsEnabled())
81     LldArgs.push_back("-save-temps");
82 
83   addLinkerCompressDebugSectionsOption(TC, Args, LldArgs);
84 
85   LldArgs.append({"-o", Output.getFilename()});
86   for (auto Input : Inputs)
87     LldArgs.push_back(Input.getFilename());
88 
89   if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize,
90                    false))
91     llvm::for_each(TC.getHIPDeviceLibs(Args), [&](StringRef BCFile) {
92       LldArgs.push_back(Args.MakeArgString(BCFile));
93     });
94 
95   const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
96   C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
97                                          Lld, LldArgs, Inputs, Output));
98 }
99 
100 // Construct a clang-offload-bundler command to bundle code objects for
101 // different GPU's into a HIP fat binary.
constructHIPFatbinCommand(Compilation & C,const JobAction & JA,StringRef OutputFileName,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,const Tool & T)102 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
103                   StringRef OutputFileName, const InputInfoList &Inputs,
104                   const llvm::opt::ArgList &Args, const Tool& T) {
105   // Construct clang-offload-bundler command to bundle object files for
106   // for different GPU archs.
107   ArgStringList BundlerArgs;
108   BundlerArgs.push_back(Args.MakeArgString("-type=o"));
109   BundlerArgs.push_back(
110       Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign)));
111 
112   // ToDo: Remove the dummy host binary entry which is required by
113   // clang-offload-bundler.
114   std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
115   std::string BundlerInputArg = "-inputs=" NULL_FILE;
116 
117   // For code object version 2 and 3, the offload kind in bundle ID is 'hip'
118   // for backward compatibility. For code object version 4 and greater, the
119   // offload kind in bundle ID is 'hipv4'.
120   std::string OffloadKind = "hip";
121   if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4)
122     OffloadKind = OffloadKind + "v4";
123   for (const auto &II : Inputs) {
124     const auto* A = II.getAction();
125     BundlerTargetArg = BundlerTargetArg + "," + OffloadKind +
126                        "-amdgcn-amd-amdhsa--" +
127                        StringRef(A->getOffloadingArch()).str();
128     BundlerInputArg = BundlerInputArg + "," + II.getFilename();
129   }
130   BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
131   BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
132 
133   std::string Output = std::string(OutputFileName);
134   auto BundlerOutputArg =
135       Args.MakeArgString(std::string("-outputs=").append(Output));
136   BundlerArgs.push_back(BundlerOutputArg);
137 
138   const char *Bundler = Args.MakeArgString(
139       T.getToolChain().GetProgramPath("clang-offload-bundler"));
140   C.addCommand(std::make_unique<Command>(
141       JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs,
142       InputInfo(&JA, Args.MakeArgString(Output))));
143 }
144 
145 /// Add Generated HIP Object File which has device images embedded into the
146 /// host to the argument list for linking. Using MC directives, embed the
147 /// device code and also define symbols required by the code generation so that
148 /// the image can be retrieved at runtime.
constructGenerateObjFileFromHIPFatBinary(Compilation & C,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const JobAction & JA) const149 void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
150     Compilation &C, const InputInfo &Output,
151     const InputInfoList &Inputs, const ArgList &Args,
152     const JobAction &JA) const {
153   const ToolChain &TC = getToolChain();
154   std::string Name =
155       std::string(llvm::sys::path::stem(Output.getFilename()));
156 
157   // Create Temp Object File Generator,
158   // Offload Bundled file and Bundled Object file.
159   // Keep them if save-temps is enabled.
160   const char *McinFile;
161   const char *BundleFile;
162   if (C.getDriver().isSaveTempsEnabled()) {
163     McinFile = C.getArgs().MakeArgString(Name + ".mcin");
164     BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
165   } else {
166     auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
167     McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
168     auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
169     BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
170   }
171   constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);
172 
173   // Create a buffer to write the contents of the temp obj generator.
174   std::string ObjBuffer;
175   llvm::raw_string_ostream ObjStream(ObjBuffer);
176 
177   // Add MC directives to embed target binaries. We ensure that each
178   // section and image is 16-byte aligned. This is not mandatory, but
179   // increases the likelihood of data to be aligned with a cache block
180   // in several main host machines.
181   ObjStream << "#       HIP Object Generator\n";
182   ObjStream << "# *** Automatically generated by Clang ***\n";
183   ObjStream << "  .type __hip_fatbin,@object\n";
184   ObjStream << "  .section .hip_fatbin,\"a\",@progbits\n";
185   ObjStream << "  .globl __hip_fatbin\n";
186   ObjStream << "  .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
187             << "\n";
188   ObjStream << "__hip_fatbin:\n";
189   ObjStream << "  .incbin \"" << BundleFile << "\"\n";
190   ObjStream.flush();
191 
192   // Dump the contents of the temp object file gen if the user requested that.
193   // We support this option to enable testing of behavior with -###.
194   if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
195     llvm::errs() << ObjBuffer;
196 
197   // Open script file and write the contents.
198   std::error_code EC;
199   llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);
200 
201   if (EC) {
202     C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
203     return;
204   }
205 
206   Objf << ObjBuffer;
207 
208   ArgStringList McArgs{"-o",      Output.getFilename(),
209                        McinFile,  "--filetype=obj"};
210   const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
211   C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
212                                          Mc, McArgs, Inputs, Output));
213 }
214 
215 // For amdgcn the inputs of the linker job are device bitcode and output is
216 // object file. It calls llvm-link, opt, llc, then lld steps.
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const217 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
218                                    const InputInfo &Output,
219                                    const InputInfoList &Inputs,
220                                    const ArgList &Args,
221                                    const char *LinkingOutput) const {
222   if (Inputs.size() > 0 &&
223       Inputs[0].getType() == types::TY_Image &&
224       JA.getType() == types::TY_Object)
225     return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);
226 
227   if (JA.getType() == types::TY_HIP_FATBIN)
228     return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
229 
230   return constructLldCommand(C, JA, Inputs, Output, Args);
231 }
232 
HIPToolChain(const Driver & D,const llvm::Triple & Triple,const ToolChain & HostTC,const ArgList & Args)233 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
234                              const ToolChain &HostTC, const ArgList &Args)
235     : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
236   // Lookup binaries into the driver directory, this is used to
237   // discover the clang-offload-bundler executable.
238   getProgramPaths().push_back(getDriver().Dir);
239 }
240 
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const241 void HIPToolChain::addClangTargetOptions(
242     const llvm::opt::ArgList &DriverArgs,
243     llvm::opt::ArgStringList &CC1Args,
244     Action::OffloadKind DeviceOffloadingKind) const {
245   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
246 
247   assert(DeviceOffloadingKind == Action::OFK_HIP &&
248          "Only HIP offloading kinds are supported for GPUs.");
249 
250   CC1Args.push_back("-fcuda-is-device");
251 
252   if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
253                          options::OPT_fno_cuda_approx_transcendentals, false))
254     CC1Args.push_back("-fcuda-approx-transcendentals");
255 
256   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
257                           false))
258     CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
259 
260   StringRef MaxThreadsPerBlock =
261       DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
262   if (!MaxThreadsPerBlock.empty()) {
263     std::string ArgStr =
264         std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str();
265     CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr));
266   }
267 
268   CC1Args.push_back("-fcuda-allow-variadic-functions");
269 
270   // Default to "hidden" visibility, as object level linking will not be
271   // supported for the foreseeable future.
272   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
273                          options::OPT_fvisibility_ms_compat)) {
274     CC1Args.append({"-fvisibility", "hidden"});
275     CC1Args.push_back("-fapply-global-visibility-to-externs");
276   }
277 
278   llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) {
279     CC1Args.push_back("-mlink-builtin-bitcode");
280     CC1Args.push_back(DriverArgs.MakeArgString(BCFile));
281   });
282 }
283 
284 llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const285 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
286                              StringRef BoundArch,
287                              Action::OffloadKind DeviceOffloadKind) const {
288   DerivedArgList *DAL =
289       HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
290   if (!DAL)
291     DAL = new DerivedArgList(Args.getBaseArgs());
292 
293   const OptTable &Opts = getDriver().getOpts();
294 
295   for (Arg *A : Args) {
296     if (!shouldSkipArgument(A))
297       DAL->append(A);
298   }
299 
300   if (!BoundArch.empty()) {
301     DAL->eraseArg(options::OPT_mcpu_EQ);
302     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch);
303     checkTargetID(*DAL);
304   }
305 
306   return DAL;
307 }
308 
buildLinker() const309 Tool *HIPToolChain::buildLinker() const {
310   assert(getTriple().getArch() == llvm::Triple::amdgcn);
311   return new tools::AMDGCN::Linker(*this);
312 }
313 
addClangWarningOptions(ArgStringList & CC1Args) const314 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
315   HostTC.addClangWarningOptions(CC1Args);
316 }
317 
318 ToolChain::CXXStdlibType
GetCXXStdlibType(const ArgList & Args) const319 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
320   return HostTC.GetCXXStdlibType(Args);
321 }
322 
AddClangSystemIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const323 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
324                                               ArgStringList &CC1Args) const {
325   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
326 }
327 
AddClangCXXStdlibIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const328 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
329                                                  ArgStringList &CC1Args) const {
330   HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
331 }
332 
AddIAMCUIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const333 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
334                                         ArgStringList &CC1Args) const {
335   HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
336 }
337 
AddHIPIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const338 void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
339                                      ArgStringList &CC1Args) const {
340   RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
341 }
342 
getSupportedSanitizers() const343 SanitizerMask HIPToolChain::getSupportedSanitizers() const {
344   // The HIPToolChain only supports sanitizers in the sense that it allows
345   // sanitizer arguments on the command line if they are supported by the host
346   // toolchain. The HIPToolChain will actually ignore any command line
347   // arguments for any of these "supported" sanitizers. That means that no
348   // sanitization of device code is actually supported at this time.
349   //
350   // This behavior is necessary because the host and device toolchains
351   // invocations often share the command line, so the device toolchain must
352   // tolerate flags meant only for the host toolchain.
353   return HostTC.getSupportedSanitizers();
354 }
355 
computeMSVCVersion(const Driver * D,const ArgList & Args) const356 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
357                                                const ArgList &Args) const {
358   return HostTC.computeMSVCVersion(D, Args);
359 }
360 
361 llvm::SmallVector<std::string, 12>
getHIPDeviceLibs(const llvm::opt::ArgList & DriverArgs) const362 HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
363   llvm::SmallVector<std::string, 12> BCLibs;
364   if (DriverArgs.hasArg(options::OPT_nogpulib))
365     return {};
366   ArgStringList LibraryPaths;
367 
368   // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
369   for (auto Path : RocmInstallation.getRocmDeviceLibPathArg())
370     LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
371 
372   addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH");
373 
374   // Maintain compatability with --hip-device-lib.
375   auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ);
376   if (!BCLibArgs.empty()) {
377     llvm::for_each(BCLibArgs, [&](StringRef BCName) {
378       StringRef FullName;
379       for (std::string LibraryPath : LibraryPaths) {
380         SmallString<128> Path(LibraryPath);
381         llvm::sys::path::append(Path, BCName);
382         FullName = Path;
383         if (llvm::sys::fs::exists(FullName)) {
384           BCLibs.push_back(FullName.str());
385           return;
386         }
387       }
388       getDriver().Diag(diag::err_drv_no_such_file) << BCName;
389     });
390   } else {
391     if (!RocmInstallation.hasDeviceLibrary()) {
392       getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
393       return {};
394     }
395     StringRef GpuArch = getGPUArch(DriverArgs);
396     assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
397     (void)GpuArch;
398     auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
399     const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
400 
401     std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
402     if (LibDeviceFile.empty()) {
403       getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
404       return {};
405     }
406 
407     // If --hip-device-lib is not set, add the default bitcode libraries.
408     // TODO: There are way too many flags that change this. Do we need to check
409     // them all?
410     bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
411                                   options::OPT_fno_gpu_flush_denormals_to_zero,
412                                   getDefaultDenormsAreZeroForTarget(Kind));
413     bool FiniteOnly =
414         DriverArgs.hasFlag(options::OPT_ffinite_math_only,
415                            options::OPT_fno_finite_math_only, false);
416     bool UnsafeMathOpt =
417         DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
418                            options::OPT_fno_unsafe_math_optimizations, false);
419     bool FastRelaxedMath = DriverArgs.hasFlag(
420         options::OPT_ffast_math, options::OPT_fno_fast_math, false);
421     bool CorrectSqrt = DriverArgs.hasFlag(
422         options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
423         options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
424     bool Wave64 = isWave64(DriverArgs, Kind);
425 
426     if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
427                            options::OPT_fno_gpu_sanitize, false)) {
428       auto AsanRTL = RocmInstallation.getAsanRTLPath();
429       if (AsanRTL.empty()) {
430         unsigned DiagID = getDriver().getDiags().getCustomDiagID(
431             DiagnosticsEngine::Error,
432             "AMDGPU address sanitizer runtime library (asanrtl) is not found. "
433             "Please install ROCm device library which supports address "
434             "sanitizer");
435         getDriver().Diag(DiagID);
436         return {};
437       } else
438         BCLibs.push_back(AsanRTL.str());
439     }
440 
441     // Add the HIP specific bitcode library.
442     BCLibs.push_back(RocmInstallation.getHIPPath().str());
443 
444     // Add the generic set of libraries.
445     BCLibs.append(RocmInstallation.getCommonBitcodeLibs(
446         DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
447         FastRelaxedMath, CorrectSqrt));
448 
449     // Add instrument lib.
450     auto InstLib =
451         DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ);
452     if (InstLib.empty())
453       return BCLibs;
454     if (llvm::sys::fs::exists(InstLib))
455       BCLibs.push_back(InstLib.str());
456     else
457       getDriver().Diag(diag::err_drv_no_such_file) << InstLib;
458   }
459 
460   return BCLibs;
461 }
462