xref: /freebsd-src/contrib/llvm-project/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUOpenMP.h"
10 #include "AMDGPU.h"
11 #include "CommonArgs.h"
12 #include "ToolChains/ROCm.h"
13 #include "clang/Basic/DiagnosticDriver.h"
14 #include "clang/Driver/Compilation.h"
15 #include "clang/Driver/Driver.h"
16 #include "clang/Driver/DriverDiagnostic.h"
17 #include "clang/Driver/InputInfo.h"
18 #include "clang/Driver/Options.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/FormatAdapters.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/Path.h"
24 
25 using namespace clang::driver;
26 using namespace clang::driver::toolchains;
27 using namespace clang::driver::tools;
28 using namespace clang;
29 using namespace llvm::opt;
30 
31 namespace {
32 
33 static const char *getOutputFileName(Compilation &C, StringRef Base,
34                                      const char *Postfix,
35                                      const char *Extension) {
36   const char *OutputFileName;
37   if (C.getDriver().isSaveTempsEnabled()) {
38     OutputFileName =
39         C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
40   } else {
41     std::string TmpName =
42         C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
43     OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
44   }
45   return OutputFileName;
46 }
47 
48 static void addLLCOptArg(const llvm::opt::ArgList &Args,
49                          llvm::opt::ArgStringList &CmdArgs) {
50   if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
51     StringRef OOpt = "0";
52     if (A->getOption().matches(options::OPT_O4) ||
53         A->getOption().matches(options::OPT_Ofast))
54       OOpt = "3";
55     else if (A->getOption().matches(options::OPT_O0))
56       OOpt = "0";
57     else if (A->getOption().matches(options::OPT_O)) {
58       // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
59       // so we map -Os/-Oz to -O2.
60       // Only clang supports -Og, and maps it to -O1.
61       // We map anything else to -O2.
62       OOpt = llvm::StringSwitch<const char *>(A->getValue())
63                  .Case("1", "1")
64                  .Case("2", "2")
65                  .Case("3", "3")
66                  .Case("s", "2")
67                  .Case("z", "2")
68                  .Case("g", "1")
69                  .Default("0");
70     }
71     CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
72   }
73 }
74 
75 static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
76                                  std::string &GPUArch) {
77   if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
78     std::string ErrMsg =
79         llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
80     TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
81     return false;
82   }
83 
84   return true;
85 }
86 } // namespace
87 
88 const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
89     const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C,
90     const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args,
91     StringRef SubArchName, StringRef OutputFilePrefix) const {
92   ArgStringList CmdArgs;
93 
94   for (const auto &II : Inputs)
95     if (II.isFilename())
96       CmdArgs.push_back(II.getFilename());
97 
98   if (Args.hasArg(options::OPT_l)) {
99     auto Lm = Args.getAllArgValues(options::OPT_l);
100     bool HasLibm = false;
101     for (auto &Lib : Lm) {
102       if (Lib == "m") {
103         HasLibm = true;
104         break;
105       }
106     }
107 
108     if (HasLibm) {
109       // This is not certain to work. The device libs added here, and passed to
110       // llvm-link, are missing attributes that they expect to be inserted when
111       // passed to mlink-builtin-bitcode. The amdgpu backend does not generate
112       // conservatively correct code when attributes are missing, so this may
113       // be the root cause of miscompilations. Passing via mlink-builtin-bitcode
114       // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes
115       // on each function, see D28538 for context.
116       // Potential workarounds:
117       //  - unconditionally link all of the device libs to every translation
118       //    unit in clang via mlink-builtin-bitcode
119       //  - build a libm bitcode file as part of the DeviceRTL and explictly
120       //    mlink-builtin-bitcode the rocm device libs components at build time
121       //  - drop this llvm-link fork in favour or some calls into LLVM, chosen
122       //    to do basically the same work as llvm-link but with that call first
123       //  - write an opt pass that sets that on every function it sees and pipe
124       //    the device-libs bitcode through that on the way to this llvm-link
125       SmallVector<std::string, 12> BCLibs =
126           AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str());
127       llvm::for_each(BCLibs, [&](StringRef BCFile) {
128         CmdArgs.push_back(Args.MakeArgString(BCFile));
129       });
130     }
131   }
132 
133   AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
134                       SubArchName,
135                       /* bitcode SDL?*/ true,
136                       /* PostClang Link? */ false);
137   // Add an intermediate output file.
138   CmdArgs.push_back("-o");
139   const char *OutputFileName =
140       getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
141   CmdArgs.push_back(OutputFileName);
142   const char *Exec =
143       Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
144   C.addCommand(std::make_unique<Command>(
145       JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
146       InputInfo(&JA, Args.MakeArgString(OutputFileName))));
147   return OutputFileName;
148 }
149 
150 const char *AMDGCN::OpenMPLinker::constructLlcCommand(
151     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
152     const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
153     llvm::StringRef OutputFilePrefix, const char *InputFileName,
154     bool OutputIsAsm) const {
155   // Construct llc command.
156   ArgStringList LlcArgs;
157   // The input to llc is the output from opt.
158   LlcArgs.push_back(InputFileName);
159   // Pass optimization arg to llc.
160   addLLCOptArg(Args, LlcArgs);
161   LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
162   LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
163   LlcArgs.push_back(
164       Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
165 
166   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
167     LlcArgs.push_back(A->getValue(0));
168   }
169 
170   // Add output filename
171   LlcArgs.push_back("-o");
172   const char *LlcOutputFile =
173       getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
174   LlcArgs.push_back(LlcOutputFile);
175   const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
176   C.addCommand(std::make_unique<Command>(
177       JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
178       InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
179   return LlcOutputFile;
180 }
181 
182 void AMDGCN::OpenMPLinker::constructLldCommand(
183     Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
184     const InputInfo &Output, const llvm::opt::ArgList &Args,
185     const char *InputFileName) const {
186   // Construct lld command.
187   // The output from ld.lld is an HSA code object file.
188   ArgStringList LldArgs{"-flavor",    "gnu", "--no-undefined",
189                         "-shared",    "-o",  Output.getFilename(),
190                         InputFileName};
191 
192   const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
193   C.addCommand(std::make_unique<Command>(
194       JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
195       InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
196 }
197 
198 // For amdgcn the inputs of the linker job are device bitcode and output is
199 // object file. It calls llvm-link, opt, llc, then lld steps.
200 void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
201                                         const InputInfo &Output,
202                                         const InputInfoList &Inputs,
203                                         const ArgList &Args,
204                                         const char *LinkingOutput) const {
205   const ToolChain &TC = getToolChain();
206   assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
207 
208   const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
209       static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);
210 
211   std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
212   if (GPUArch.empty()) {
213     if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
214       return;
215   }
216 
217   // Prefix for temporary file name.
218   std::string Prefix;
219   for (const auto &II : Inputs)
220     if (II.isFilename())
221       Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
222   assert(Prefix.length() && "no linker inputs are files ");
223 
224   // Each command outputs different files.
225   const char *LLVMLinkCommand = constructLLVMLinkCommand(
226       AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix);
227 
228   // Produce readable assembly if save-temps is enabled.
229   if (C.getDriver().isSaveTempsEnabled())
230     constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand,
231                         /*OutputIsAsm=*/true);
232   const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
233                                                Prefix, LLVMLinkCommand);
234   constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
235 }
236 
237 AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
238                                              const llvm::Triple &Triple,
239                                              const ToolChain &HostTC,
240                                              const ArgList &Args)
241     : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
242   // Lookup binaries into the driver directory, this is used to
243   // discover the clang-offload-bundler executable.
244   getProgramPaths().push_back(getDriver().Dir);
245 }
246 
247 void AMDGPUOpenMPToolChain::addClangTargetOptions(
248     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
249     Action::OffloadKind DeviceOffloadingKind) const {
250   HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
251 
252   std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str();
253   if (GPUArch.empty()) {
254     if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch))
255       return;
256   }
257 
258   assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
259          "Only OpenMP offloading kinds are supported.");
260 
261   CC1Args.push_back("-target-cpu");
262   CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
263   CC1Args.push_back("-fcuda-is-device");
264 
265   if (DriverArgs.hasArg(options::OPT_nogpulib))
266     return;
267 
268   std::string BitcodeSuffix;
269   if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
270                          options::OPT_fno_openmp_target_new_runtime, false))
271     BitcodeSuffix = "new-amdgpu-" + GPUArch;
272   else
273     BitcodeSuffix = "amdgcn-" + GPUArch;
274 
275   addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
276                      getTriple());
277 }
278 
279 llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
280     const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
281     Action::OffloadKind DeviceOffloadKind) const {
282   DerivedArgList *DAL =
283       HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
284   if (!DAL)
285     DAL = new DerivedArgList(Args.getBaseArgs());
286 
287   const OptTable &Opts = getDriver().getOpts();
288 
289   if (DeviceOffloadKind != Action::OFK_OpenMP) {
290     for (Arg *A : Args) {
291       DAL->append(A);
292     }
293   }
294 
295   if (!BoundArch.empty()) {
296     DAL->eraseArg(options::OPT_march_EQ);
297     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
298                       BoundArch);
299   }
300 
301   return DAL;
302 }
303 
304 Tool *AMDGPUOpenMPToolChain::buildLinker() const {
305   assert(getTriple().isAMDGCN());
306   return new tools::AMDGCN::OpenMPLinker(*this);
307 }
308 
309 void AMDGPUOpenMPToolChain::addClangWarningOptions(
310     ArgStringList &CC1Args) const {
311   HostTC.addClangWarningOptions(CC1Args);
312 }
313 
314 ToolChain::CXXStdlibType
315 AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
316   return HostTC.GetCXXStdlibType(Args);
317 }
318 
319 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
320     const ArgList &DriverArgs, ArgStringList &CC1Args) const {
321   HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
322 }
323 
324 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
325                                                 ArgStringList &CC1Args) const {
326   HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
327 }
328 
329 SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const {
330   // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it
331   // allows sanitizer arguments on the command line if they are supported by the
332   // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command
333   // line arguments for any of these "supported" sanitizers. That means that no
334   // sanitization of device code is actually supported at this time.
335   //
336   // This behavior is necessary because the host and device toolchains
337   // invocations often share the command line, so the device toolchain must
338   // tolerate flags meant only for the host toolchain.
339   return HostTC.getSupportedSanitizers();
340 }
341 
342 VersionTuple
343 AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
344                                           const ArgList &Args) const {
345   return HostTC.computeMSVCVersion(D, Args);
346 }
347