xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/Driver/ToolChains/HIP.cpp (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HIP.h"
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Basic/Cuda.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/Driver.h"
15 #include "clang/Driver/DriverDiagnostic.h"
16 #include "clang/Driver/Options.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/Path.h"
19 
20 using namespace clang::driver;
21 using namespace clang::driver::toolchains;
22 using namespace clang::driver::tools;
23 using namespace clang;
24 using namespace llvm::opt;
25 
26 #if defined(_WIN32) || defined(_WIN64)
27 #define NULL_FILE "nul"
28 #else
29 #define NULL_FILE "/dev/null"
30 #endif
31 
32 namespace {
33 
34 static void addBCLib(const Driver &D, const ArgList &Args,
35                      ArgStringList &CmdArgs, ArgStringList LibraryPaths,
36                      StringRef BCName) {
37   StringRef FullName;
38   for (std::string LibraryPath : LibraryPaths) {
39     SmallString<128> Path(LibraryPath);
40     llvm::sys::path::append(Path, BCName);
41     FullName = Path;
42     if (llvm::sys::fs::exists(FullName)) {
43       CmdArgs.push_back("-mlink-builtin-bitcode");
44       CmdArgs.push_back(Args.MakeArgString(FullName));
45       return;
46     }
47   }
48   D.Diag(diag::err_drv_no_such_file) << BCName;
49 }
50 
51 static const char *getOutputFileName(Compilation &C, StringRef Base,
52                                      const char *Postfix,
53                                      const char *Extension) {
54   const char *OutputFileName;
55   if (C.getDriver().isSaveTempsEnabled()) {
56     OutputFileName =
57         C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
58   } else {
59     std::string TmpName =
60         C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
61     OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
62   }
63   return OutputFileName;
64 }
65 } // namespace
66 
67 const char *AMDGCN::Linker::constructLLVMLinkCommand(
68     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
69     const ArgList &Args, StringRef SubArchName,
70     StringRef OutputFilePrefix) const {
71   ArgStringList CmdArgs;
72   // Add the input bc's created by compile step.
73   for (const auto &II : Inputs)
74     CmdArgs.push_back(II.getFilename());
75 
76   // Add an intermediate output file.
77   CmdArgs.push_back("-o");
78   auto OutputFileName = getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
79   CmdArgs.push_back(OutputFileName);
80   SmallString<128> ExecPath(C.getDriver().Dir);
81   llvm::sys::path::append(ExecPath, "llvm-link");
82   const char *Exec = Args.MakeArgString(ExecPath);
83   C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
84   return OutputFileName;
85 }
86 
87 const char *AMDGCN::Linker::constructOptCommand(
88     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
89     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
90     llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
91   // Construct opt command.
92   ArgStringList OptArgs;
93   // The input to opt is the output from llvm-link.
94   OptArgs.push_back(InputFileName);
95   // Pass optimization arg to opt.
96   if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
97     StringRef OOpt = "3";
98     if (A->getOption().matches(options::OPT_O4) ||
99         A->getOption().matches(options::OPT_Ofast))
100       OOpt = "3";
101     else if (A->getOption().matches(options::OPT_O0))
102       OOpt = "0";
103     else if (A->getOption().matches(options::OPT_O)) {
104       // -Os, -Oz, and -O(anything else) map to -O2
105       OOpt = llvm::StringSwitch<const char *>(A->getValue())
106                  .Case("1", "1")
107                  .Case("2", "2")
108                  .Case("3", "3")
109                  .Case("s", "2")
110                  .Case("z", "2")
111                  .Default("2");
112     }
113     OptArgs.push_back(Args.MakeArgString("-O" + OOpt));
114   }
115   OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
116   OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
117 
118   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
119     OptArgs.push_back(A->getValue(0));
120   }
121 
122   OptArgs.push_back("-o");
123   auto OutputFileName =
124       getOutputFileName(C, OutputFilePrefix, "-optimized", "bc");
125   OptArgs.push_back(OutputFileName);
126   SmallString<128> OptPath(C.getDriver().Dir);
127   llvm::sys::path::append(OptPath, "opt");
128   const char *OptExec = Args.MakeArgString(OptPath);
129   C.addCommand(std::make_unique<Command>(JA, *this, OptExec, OptArgs, Inputs));
130   return OutputFileName;
131 }
132 
133 const char *AMDGCN::Linker::constructLlcCommand(
134     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
135     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
136     llvm::StringRef OutputFilePrefix, const char *InputFileName,
137     bool OutputIsAsm) const {
138   // Construct llc command.
139   ArgStringList LlcArgs{
140       InputFileName, "-mtriple=amdgcn-amd-amdhsa",
141       Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")),
142       Args.MakeArgString("-mcpu=" + SubArchName)};
143 
144   // Extract all the -m options
145   std::vector<llvm::StringRef> Features;
146   handleTargetFeaturesGroup(
147     Args, Features, options::OPT_m_amdgpu_Features_Group);
148 
149   // Add features to mattr such as xnack
150   std::string MAttrString = "-mattr=";
151   for(auto OneFeature : Features) {
152     MAttrString.append(Args.MakeArgString(OneFeature));
153     if (OneFeature != Features.back())
154       MAttrString.append(",");
155   }
156   if(!Features.empty())
157     LlcArgs.push_back(Args.MakeArgString(MAttrString));
158 
159   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
160     LlcArgs.push_back(A->getValue(0));
161   }
162 
163   // Add output filename
164   LlcArgs.push_back("-o");
165   auto LlcOutputFile =
166       getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
167   LlcArgs.push_back(LlcOutputFile);
168   SmallString<128> LlcPath(C.getDriver().Dir);
169   llvm::sys::path::append(LlcPath, "llc");
170   const char *Llc = Args.MakeArgString(LlcPath);
171   C.addCommand(std::make_unique<Command>(JA, *this, Llc, LlcArgs, Inputs));
172   return LlcOutputFile;
173 }
174 
175 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
176                                           const InputInfoList &Inputs,
177                                           const InputInfo &Output,
178                                           const llvm::opt::ArgList &Args,
179                                           const char *InputFileName) const {
180   // Construct lld command.
181   // The output from ld.lld is an HSA code object file.
182   ArgStringList LldArgs{
183       "-flavor", "gnu", "-shared", "-o", Output.getFilename(), InputFileName};
184   SmallString<128> LldPath(C.getDriver().Dir);
185   llvm::sys::path::append(LldPath, "lld");
186   const char *Lld = Args.MakeArgString(LldPath);
187   C.addCommand(std::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs));
188 }
189 
190 // Construct a clang-offload-bundler command to bundle code objects for
191 // different GPU's into a HIP fat binary.
192 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
193                   StringRef OutputFileName, const InputInfoList &Inputs,
194                   const llvm::opt::ArgList &Args, const Tool& T) {
195   // Construct clang-offload-bundler command to bundle object files for
196   // for different GPU archs.
197   ArgStringList BundlerArgs;
198   BundlerArgs.push_back(Args.MakeArgString("-type=o"));
199 
200   // ToDo: Remove the dummy host binary entry which is required by
201   // clang-offload-bundler.
202   std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
203   std::string BundlerInputArg = "-inputs=" NULL_FILE;
204 
205   for (const auto &II : Inputs) {
206     const auto* A = II.getAction();
207     BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
208                        StringRef(A->getOffloadingArch()).str();
209     BundlerInputArg = BundlerInputArg + "," + II.getFilename();
210   }
211   BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
212   BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
213 
214   auto BundlerOutputArg =
215       Args.MakeArgString(std::string("-outputs=").append(OutputFileName));
216   BundlerArgs.push_back(BundlerOutputArg);
217 
218   SmallString<128> BundlerPath(C.getDriver().Dir);
219   llvm::sys::path::append(BundlerPath, "clang-offload-bundler");
220   const char *Bundler = Args.MakeArgString(BundlerPath);
221   C.addCommand(std::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
222 }
223 
224 // For amdgcn the inputs of the linker job are device bitcode and output is
225 // object file. It calls llvm-link, opt, llc, then lld steps.
226 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
227                                    const InputInfo &Output,
228                                    const InputInfoList &Inputs,
229                                    const ArgList &Args,
230                                    const char *LinkingOutput) const {
231 
232   if (JA.getType() == types::TY_HIP_FATBIN)
233     return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
234 
235   assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&
236          "Unsupported target");
237 
238   std::string SubArchName = JA.getOffloadingArch();
239   assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch");
240 
241   // Prefix for temporary file name.
242   std::string Prefix = llvm::sys::path::stem(Inputs[0].getFilename()).str();
243   if (!C.getDriver().isSaveTempsEnabled())
244     Prefix += "-" + SubArchName;
245 
246   // Each command outputs different files.
247   const char *LLVMLinkCommand =
248       constructLLVMLinkCommand(C, JA, Inputs, Args, SubArchName, Prefix);
249   const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, SubArchName,
250                                                Prefix, LLVMLinkCommand);
251   if (C.getDriver().isSaveTempsEnabled())
252     constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand,
253                         /*OutputIsAsm=*/true);
254   const char *LlcCommand =
255       constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand);
256   constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
257 }
258 
259 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
260                              const ToolChain &HostTC, const ArgList &Args)
261     : ToolChain(D, Triple, Args), HostTC(HostTC) {
262   // Lookup binaries into the driver directory, this is used to
263   // discover the clang-offload-bundler executable.
264   getProgramPaths().push_back(getDriver().Dir);
265 }
266 
267 void HIPToolChain::addClangTargetOptions(
268     const llvm::opt::ArgList &DriverArgs,
269     llvm::opt::ArgStringList &CC1Args,
270     Action::OffloadKind DeviceOffloadingKind) const {
271   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
272 
273   StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
274   assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
275   (void) GpuArch;
276   assert(DeviceOffloadingKind == Action::OFK_HIP &&
277          "Only HIP offloading kinds are supported for GPUs.");
278 
279   CC1Args.push_back("-target-cpu");
280   CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
281   CC1Args.push_back("-fcuda-is-device");
282 
283   if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
284                          options::OPT_fno_cuda_flush_denormals_to_zero, false))
285     CC1Args.push_back("-fcuda-flush-denormals-to-zero");
286 
287   if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
288                          options::OPT_fno_cuda_approx_transcendentals, false))
289     CC1Args.push_back("-fcuda-approx-transcendentals");
290 
291   if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
292                          false))
293     CC1Args.push_back("-fgpu-rdc");
294 
295   if (DriverArgs.hasFlag(options::OPT_fgpu_allow_device_init,
296                          options::OPT_fno_gpu_allow_device_init, false))
297     CC1Args.push_back("-fgpu-allow-device-init");
298 
299   CC1Args.push_back("-fcuda-allow-variadic-functions");
300 
301   // Default to "hidden" visibility, as object level linking will not be
302   // supported for the foreseeable future.
303   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
304                          options::OPT_fvisibility_ms_compat)) {
305     CC1Args.append({"-fvisibility", "hidden"});
306     CC1Args.push_back("-fapply-global-visibility-to-externs");
307   }
308 
309   if (DriverArgs.hasArg(options::OPT_nogpulib))
310     return;
311   ArgStringList LibraryPaths;
312 
313   // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
314   for (auto Path :
315        DriverArgs.getAllArgValues(options::OPT_hip_device_lib_path_EQ))
316     LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
317 
318   addDirectoryList(DriverArgs, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH");
319 
320   llvm::SmallVector<std::string, 10> BCLibs;
321 
322   // Add bitcode library in --hip-device-lib.
323   for (auto Lib : DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ)) {
324     BCLibs.push_back(DriverArgs.MakeArgString(Lib));
325   }
326 
327   // If --hip-device-lib is not set, add the default bitcode libraries.
328   if (BCLibs.empty()) {
329     // Get the bc lib file name for ISA version. For example,
330     // gfx803 => oclc_isa_version_803.amdgcn.bc.
331     std::string GFXVersion = GpuArch.drop_front(3).str();
332     std::string ISAVerBC = "oclc_isa_version_" + GFXVersion + ".amdgcn.bc";
333 
334     llvm::StringRef FlushDenormalControlBC;
335     if (DriverArgs.hasArg(options::OPT_fcuda_flush_denormals_to_zero))
336       FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc";
337     else
338       FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
339 
340     llvm::StringRef WaveFrontSizeBC;
341     if (stoi(GFXVersion) < 1000)
342       WaveFrontSizeBC = "oclc_wavefrontsize64_on.amdgcn.bc";
343     else
344       WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc";
345 
346     BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc",
347                    "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc",
348                    FlushDenormalControlBC,
349                    "oclc_correctly_rounded_sqrt_on.amdgcn.bc",
350                    "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC,
351                    WaveFrontSizeBC});
352   }
353   for (auto Lib : BCLibs)
354     addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib);
355 }
356 
357 llvm::opt::DerivedArgList *
358 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
359                              StringRef BoundArch,
360                              Action::OffloadKind DeviceOffloadKind) const {
361   DerivedArgList *DAL =
362       HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
363   if (!DAL)
364     DAL = new DerivedArgList(Args.getBaseArgs());
365 
366   const OptTable &Opts = getDriver().getOpts();
367 
368   for (Arg *A : Args) {
369     if (A->getOption().matches(options::OPT_Xarch__)) {
370       // Skip this argument unless the architecture matches BoundArch.
371       if (BoundArch.empty() || A->getValue(0) != BoundArch)
372         continue;
373 
374       unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
375       unsigned Prev = Index;
376       std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
377 
378       // If the argument parsing failed or more than one argument was
379       // consumed, the -Xarch_ argument's parameter tried to consume
380       // extra arguments. Emit an error and ignore.
381       //
382       // We also want to disallow any options which would alter the
383       // driver behavior; that isn't going to work in our model. We
384       // use isDriverOption() as an approximation, although things
385       // like -O4 are going to slip through.
386       if (!XarchArg || Index > Prev + 1) {
387         getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
388             << A->getAsString(Args);
389         continue;
390       } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
391         getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
392             << A->getAsString(Args);
393         continue;
394       }
395       XarchArg->setBaseArg(A);
396       A = XarchArg.release();
397       DAL->AddSynthesizedArg(A);
398     }
399     DAL->append(A);
400   }
401 
402   if (!BoundArch.empty()) {
403     DAL->eraseArg(options::OPT_march_EQ);
404     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
405   }
406 
407   return DAL;
408 }
409 
410 Tool *HIPToolChain::buildLinker() const {
411   assert(getTriple().getArch() == llvm::Triple::amdgcn);
412   return new tools::AMDGCN::Linker(*this);
413 }
414 
415 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
416   HostTC.addClangWarningOptions(CC1Args);
417 }
418 
419 ToolChain::CXXStdlibType
420 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
421   return HostTC.GetCXXStdlibType(Args);
422 }
423 
424 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
425                                               ArgStringList &CC1Args) const {
426   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
427 }
428 
429 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
430                                                  ArgStringList &CC1Args) const {
431   HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
432 }
433 
434 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
435                                         ArgStringList &CC1Args) const {
436   HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
437 }
438 
439 SanitizerMask HIPToolChain::getSupportedSanitizers() const {
440   // The HIPToolChain only supports sanitizers in the sense that it allows
441   // sanitizer arguments on the command line if they are supported by the host
442   // toolchain. The HIPToolChain will actually ignore any command line
443   // arguments for any of these "supported" sanitizers. That means that no
444   // sanitization of device code is actually supported at this time.
445   //
446   // This behavior is necessary because the host and device toolchains
447   // invocations often share the command line, so the device toolchain must
448   // tolerate flags meant only for the host toolchain.
449   return HostTC.getSupportedSanitizers();
450 }
451 
452 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
453                                                const ArgList &Args) const {
454   return HostTC.computeMSVCVersion(D, Args);
455 }
456