1*e038c9c4Sjoerg //===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===//
2*e038c9c4Sjoerg //
3*e038c9c4Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e038c9c4Sjoerg // See https://llvm.org/LICENSE.txt for license information.
5*e038c9c4Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e038c9c4Sjoerg //
7*e038c9c4Sjoerg //===----------------------------------------------------------------------===//
8*e038c9c4Sjoerg
9*e038c9c4Sjoerg #include "AMDGPUOpenMP.h"
10*e038c9c4Sjoerg #include "AMDGPU.h"
11*e038c9c4Sjoerg #include "CommonArgs.h"
12*e038c9c4Sjoerg #include "InputInfo.h"
13*e038c9c4Sjoerg #include "clang/Basic/DiagnosticDriver.h"
14*e038c9c4Sjoerg #include "clang/Driver/Compilation.h"
15*e038c9c4Sjoerg #include "clang/Driver/Driver.h"
16*e038c9c4Sjoerg #include "clang/Driver/DriverDiagnostic.h"
17*e038c9c4Sjoerg #include "clang/Driver/Options.h"
18*e038c9c4Sjoerg #include "llvm/Support/FileSystem.h"
19*e038c9c4Sjoerg #include "llvm/Support/FormatAdapters.h"
20*e038c9c4Sjoerg #include "llvm/Support/FormatVariadic.h"
21*e038c9c4Sjoerg #include "llvm/Support/Path.h"
22*e038c9c4Sjoerg
23*e038c9c4Sjoerg using namespace clang::driver;
24*e038c9c4Sjoerg using namespace clang::driver::toolchains;
25*e038c9c4Sjoerg using namespace clang::driver::tools;
26*e038c9c4Sjoerg using namespace clang;
27*e038c9c4Sjoerg using namespace llvm::opt;
28*e038c9c4Sjoerg
29*e038c9c4Sjoerg namespace {
30*e038c9c4Sjoerg
getOutputFileName(Compilation & C,StringRef Base,const char * Postfix,const char * Extension)31*e038c9c4Sjoerg static const char *getOutputFileName(Compilation &C, StringRef Base,
32*e038c9c4Sjoerg const char *Postfix,
33*e038c9c4Sjoerg const char *Extension) {
34*e038c9c4Sjoerg const char *OutputFileName;
35*e038c9c4Sjoerg if (C.getDriver().isSaveTempsEnabled()) {
36*e038c9c4Sjoerg OutputFileName =
37*e038c9c4Sjoerg C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
38*e038c9c4Sjoerg } else {
39*e038c9c4Sjoerg std::string TmpName =
40*e038c9c4Sjoerg C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
41*e038c9c4Sjoerg OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
42*e038c9c4Sjoerg }
43*e038c9c4Sjoerg return OutputFileName;
44*e038c9c4Sjoerg }
45*e038c9c4Sjoerg
addLLCOptArg(const llvm::opt::ArgList & Args,llvm::opt::ArgStringList & CmdArgs)46*e038c9c4Sjoerg static void addLLCOptArg(const llvm::opt::ArgList &Args,
47*e038c9c4Sjoerg llvm::opt::ArgStringList &CmdArgs) {
48*e038c9c4Sjoerg if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
49*e038c9c4Sjoerg StringRef OOpt = "0";
50*e038c9c4Sjoerg if (A->getOption().matches(options::OPT_O4) ||
51*e038c9c4Sjoerg A->getOption().matches(options::OPT_Ofast))
52*e038c9c4Sjoerg OOpt = "3";
53*e038c9c4Sjoerg else if (A->getOption().matches(options::OPT_O0))
54*e038c9c4Sjoerg OOpt = "0";
55*e038c9c4Sjoerg else if (A->getOption().matches(options::OPT_O)) {
56*e038c9c4Sjoerg // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3
57*e038c9c4Sjoerg // so we map -Os/-Oz to -O2.
58*e038c9c4Sjoerg // Only clang supports -Og, and maps it to -O1.
59*e038c9c4Sjoerg // We map anything else to -O2.
60*e038c9c4Sjoerg OOpt = llvm::StringSwitch<const char *>(A->getValue())
61*e038c9c4Sjoerg .Case("1", "1")
62*e038c9c4Sjoerg .Case("2", "2")
63*e038c9c4Sjoerg .Case("3", "3")
64*e038c9c4Sjoerg .Case("s", "2")
65*e038c9c4Sjoerg .Case("z", "2")
66*e038c9c4Sjoerg .Case("g", "1")
67*e038c9c4Sjoerg .Default("0");
68*e038c9c4Sjoerg }
69*e038c9c4Sjoerg CmdArgs.push_back(Args.MakeArgString("-O" + OOpt));
70*e038c9c4Sjoerg }
71*e038c9c4Sjoerg }
72*e038c9c4Sjoerg
checkSystemForAMDGPU(const ArgList & Args,const AMDGPUToolChain & TC,std::string & GPUArch)73*e038c9c4Sjoerg static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
74*e038c9c4Sjoerg std::string &GPUArch) {
75*e038c9c4Sjoerg if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
76*e038c9c4Sjoerg std::string ErrMsg =
77*e038c9c4Sjoerg llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
78*e038c9c4Sjoerg TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
79*e038c9c4Sjoerg return false;
80*e038c9c4Sjoerg }
81*e038c9c4Sjoerg
82*e038c9c4Sjoerg return true;
83*e038c9c4Sjoerg }
84*e038c9c4Sjoerg } // namespace
85*e038c9c4Sjoerg
constructLLVMLinkCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const ArgList & Args,StringRef SubArchName,StringRef OutputFilePrefix) const86*e038c9c4Sjoerg const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
87*e038c9c4Sjoerg Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
88*e038c9c4Sjoerg const ArgList &Args, StringRef SubArchName,
89*e038c9c4Sjoerg StringRef OutputFilePrefix) const {
90*e038c9c4Sjoerg ArgStringList CmdArgs;
91*e038c9c4Sjoerg
92*e038c9c4Sjoerg for (const auto &II : Inputs)
93*e038c9c4Sjoerg if (II.isFilename())
94*e038c9c4Sjoerg CmdArgs.push_back(II.getFilename());
95*e038c9c4Sjoerg // Add an intermediate output file.
96*e038c9c4Sjoerg CmdArgs.push_back("-o");
97*e038c9c4Sjoerg const char *OutputFileName =
98*e038c9c4Sjoerg getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
99*e038c9c4Sjoerg CmdArgs.push_back(OutputFileName);
100*e038c9c4Sjoerg const char *Exec =
101*e038c9c4Sjoerg Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
102*e038c9c4Sjoerg C.addCommand(std::make_unique<Command>(
103*e038c9c4Sjoerg JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs,
104*e038c9c4Sjoerg InputInfo(&JA, Args.MakeArgString(OutputFileName))));
105*e038c9c4Sjoerg return OutputFileName;
106*e038c9c4Sjoerg }
107*e038c9c4Sjoerg
constructLlcCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const llvm::opt::ArgList & Args,llvm::StringRef SubArchName,llvm::StringRef OutputFilePrefix,const char * InputFileName,bool OutputIsAsm) const108*e038c9c4Sjoerg const char *AMDGCN::OpenMPLinker::constructLlcCommand(
109*e038c9c4Sjoerg Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
110*e038c9c4Sjoerg const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
111*e038c9c4Sjoerg llvm::StringRef OutputFilePrefix, const char *InputFileName,
112*e038c9c4Sjoerg bool OutputIsAsm) const {
113*e038c9c4Sjoerg // Construct llc command.
114*e038c9c4Sjoerg ArgStringList LlcArgs;
115*e038c9c4Sjoerg // The input to llc is the output from opt.
116*e038c9c4Sjoerg LlcArgs.push_back(InputFileName);
117*e038c9c4Sjoerg // Pass optimization arg to llc.
118*e038c9c4Sjoerg addLLCOptArg(Args, LlcArgs);
119*e038c9c4Sjoerg LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
120*e038c9c4Sjoerg LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
121*e038c9c4Sjoerg LlcArgs.push_back(
122*e038c9c4Sjoerg Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")));
123*e038c9c4Sjoerg
124*e038c9c4Sjoerg for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
125*e038c9c4Sjoerg LlcArgs.push_back(A->getValue(0));
126*e038c9c4Sjoerg }
127*e038c9c4Sjoerg
128*e038c9c4Sjoerg // Add output filename
129*e038c9c4Sjoerg LlcArgs.push_back("-o");
130*e038c9c4Sjoerg const char *LlcOutputFile =
131*e038c9c4Sjoerg getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
132*e038c9c4Sjoerg LlcArgs.push_back(LlcOutputFile);
133*e038c9c4Sjoerg const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc"));
134*e038c9c4Sjoerg C.addCommand(std::make_unique<Command>(
135*e038c9c4Sjoerg JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs,
136*e038c9c4Sjoerg InputInfo(&JA, Args.MakeArgString(LlcOutputFile))));
137*e038c9c4Sjoerg return LlcOutputFile;
138*e038c9c4Sjoerg }
139*e038c9c4Sjoerg
constructLldCommand(Compilation & C,const JobAction & JA,const InputInfoList & Inputs,const InputInfo & Output,const llvm::opt::ArgList & Args,const char * InputFileName) const140*e038c9c4Sjoerg void AMDGCN::OpenMPLinker::constructLldCommand(
141*e038c9c4Sjoerg Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
142*e038c9c4Sjoerg const InputInfo &Output, const llvm::opt::ArgList &Args,
143*e038c9c4Sjoerg const char *InputFileName) const {
144*e038c9c4Sjoerg // Construct lld command.
145*e038c9c4Sjoerg // The output from ld.lld is an HSA code object file.
146*e038c9c4Sjoerg ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
147*e038c9c4Sjoerg "-shared", "-o", Output.getFilename(),
148*e038c9c4Sjoerg InputFileName};
149*e038c9c4Sjoerg
150*e038c9c4Sjoerg const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld"));
151*e038c9c4Sjoerg C.addCommand(std::make_unique<Command>(
152*e038c9c4Sjoerg JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs,
153*e038c9c4Sjoerg InputInfo(&JA, Args.MakeArgString(Output.getFilename()))));
154*e038c9c4Sjoerg }
155*e038c9c4Sjoerg
156*e038c9c4Sjoerg // For amdgcn the inputs of the linker job are device bitcode and output is
157*e038c9c4Sjoerg // object file. It calls llvm-link, opt, llc, then lld steps.
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const158*e038c9c4Sjoerg void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
159*e038c9c4Sjoerg const InputInfo &Output,
160*e038c9c4Sjoerg const InputInfoList &Inputs,
161*e038c9c4Sjoerg const ArgList &Args,
162*e038c9c4Sjoerg const char *LinkingOutput) const {
163*e038c9c4Sjoerg const ToolChain &TC = getToolChain();
164*e038c9c4Sjoerg assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target");
165*e038c9c4Sjoerg
166*e038c9c4Sjoerg const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC =
167*e038c9c4Sjoerg static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC);
168*e038c9c4Sjoerg
169*e038c9c4Sjoerg std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str();
170*e038c9c4Sjoerg if (GPUArch.empty()) {
171*e038c9c4Sjoerg if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch))
172*e038c9c4Sjoerg return;
173*e038c9c4Sjoerg }
174*e038c9c4Sjoerg
175*e038c9c4Sjoerg // Prefix for temporary file name.
176*e038c9c4Sjoerg std::string Prefix;
177*e038c9c4Sjoerg for (const auto &II : Inputs)
178*e038c9c4Sjoerg if (II.isFilename())
179*e038c9c4Sjoerg Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch;
180*e038c9c4Sjoerg assert(Prefix.length() && "no linker inputs are files ");
181*e038c9c4Sjoerg
182*e038c9c4Sjoerg // Each command outputs different files.
183*e038c9c4Sjoerg const char *LLVMLinkCommand =
184*e038c9c4Sjoerg constructLLVMLinkCommand(C, JA, Inputs, Args, GPUArch, Prefix);
185*e038c9c4Sjoerg
186*e038c9c4Sjoerg // Produce readable assembly if save-temps is enabled.
187*e038c9c4Sjoerg if (C.getDriver().isSaveTempsEnabled())
188*e038c9c4Sjoerg constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand,
189*e038c9c4Sjoerg /*OutputIsAsm=*/true);
190*e038c9c4Sjoerg const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch,
191*e038c9c4Sjoerg Prefix, LLVMLinkCommand);
192*e038c9c4Sjoerg constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
193*e038c9c4Sjoerg }
194*e038c9c4Sjoerg
AMDGPUOpenMPToolChain(const Driver & D,const llvm::Triple & Triple,const ToolChain & HostTC,const ArgList & Args)195*e038c9c4Sjoerg AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D,
196*e038c9c4Sjoerg const llvm::Triple &Triple,
197*e038c9c4Sjoerg const ToolChain &HostTC,
198*e038c9c4Sjoerg const ArgList &Args)
199*e038c9c4Sjoerg : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
200*e038c9c4Sjoerg // Lookup binaries into the driver directory, this is used to
201*e038c9c4Sjoerg // discover the clang-offload-bundler executable.
202*e038c9c4Sjoerg getProgramPaths().push_back(getDriver().Dir);
203*e038c9c4Sjoerg }
204*e038c9c4Sjoerg
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const205*e038c9c4Sjoerg void AMDGPUOpenMPToolChain::addClangTargetOptions(
206*e038c9c4Sjoerg const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
207*e038c9c4Sjoerg Action::OffloadKind DeviceOffloadingKind) const {
208*e038c9c4Sjoerg HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
209*e038c9c4Sjoerg
210*e038c9c4Sjoerg std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str();
211*e038c9c4Sjoerg if (GPUArch.empty()) {
212*e038c9c4Sjoerg if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch))
213*e038c9c4Sjoerg return;
214*e038c9c4Sjoerg }
215*e038c9c4Sjoerg
216*e038c9c4Sjoerg assert(DeviceOffloadingKind == Action::OFK_OpenMP &&
217*e038c9c4Sjoerg "Only OpenMP offloading kinds are supported.");
218*e038c9c4Sjoerg
219*e038c9c4Sjoerg CC1Args.push_back("-target-cpu");
220*e038c9c4Sjoerg CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch));
221*e038c9c4Sjoerg CC1Args.push_back("-fcuda-is-device");
222*e038c9c4Sjoerg
223*e038c9c4Sjoerg if (DriverArgs.hasArg(options::OPT_nogpulib))
224*e038c9c4Sjoerg return;
225*e038c9c4Sjoerg std::string BitcodeSuffix = "amdgcn-" + GPUArch;
226*e038c9c4Sjoerg addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
227*e038c9c4Sjoerg getTriple());
228*e038c9c4Sjoerg }
229*e038c9c4Sjoerg
TranslateArgs(const llvm::opt::DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const230*e038c9c4Sjoerg llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs(
231*e038c9c4Sjoerg const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
232*e038c9c4Sjoerg Action::OffloadKind DeviceOffloadKind) const {
233*e038c9c4Sjoerg DerivedArgList *DAL =
234*e038c9c4Sjoerg HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
235*e038c9c4Sjoerg if (!DAL)
236*e038c9c4Sjoerg DAL = new DerivedArgList(Args.getBaseArgs());
237*e038c9c4Sjoerg
238*e038c9c4Sjoerg const OptTable &Opts = getDriver().getOpts();
239*e038c9c4Sjoerg
240*e038c9c4Sjoerg if (DeviceOffloadKind != Action::OFK_OpenMP) {
241*e038c9c4Sjoerg for (Arg *A : Args) {
242*e038c9c4Sjoerg DAL->append(A);
243*e038c9c4Sjoerg }
244*e038c9c4Sjoerg }
245*e038c9c4Sjoerg
246*e038c9c4Sjoerg if (!BoundArch.empty()) {
247*e038c9c4Sjoerg DAL->eraseArg(options::OPT_march_EQ);
248*e038c9c4Sjoerg DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
249*e038c9c4Sjoerg BoundArch);
250*e038c9c4Sjoerg }
251*e038c9c4Sjoerg
252*e038c9c4Sjoerg return DAL;
253*e038c9c4Sjoerg }
254*e038c9c4Sjoerg
buildLinker() const255*e038c9c4Sjoerg Tool *AMDGPUOpenMPToolChain::buildLinker() const {
256*e038c9c4Sjoerg assert(getTriple().isAMDGCN());
257*e038c9c4Sjoerg return new tools::AMDGCN::OpenMPLinker(*this);
258*e038c9c4Sjoerg }
259*e038c9c4Sjoerg
addClangWarningOptions(ArgStringList & CC1Args) const260*e038c9c4Sjoerg void AMDGPUOpenMPToolChain::addClangWarningOptions(
261*e038c9c4Sjoerg ArgStringList &CC1Args) const {
262*e038c9c4Sjoerg HostTC.addClangWarningOptions(CC1Args);
263*e038c9c4Sjoerg }
264*e038c9c4Sjoerg
265*e038c9c4Sjoerg ToolChain::CXXStdlibType
GetCXXStdlibType(const ArgList & Args) const266*e038c9c4Sjoerg AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const {
267*e038c9c4Sjoerg return HostTC.GetCXXStdlibType(Args);
268*e038c9c4Sjoerg }
269*e038c9c4Sjoerg
AddClangSystemIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const270*e038c9c4Sjoerg void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs(
271*e038c9c4Sjoerg const ArgList &DriverArgs, ArgStringList &CC1Args) const {
272*e038c9c4Sjoerg HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
273*e038c9c4Sjoerg }
274*e038c9c4Sjoerg
AddIAMCUIncludeArgs(const ArgList & Args,ArgStringList & CC1Args) const275*e038c9c4Sjoerg void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
276*e038c9c4Sjoerg ArgStringList &CC1Args) const {
277*e038c9c4Sjoerg HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
278*e038c9c4Sjoerg }
279*e038c9c4Sjoerg
getSupportedSanitizers() const280*e038c9c4Sjoerg SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const {
281*e038c9c4Sjoerg // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it
282*e038c9c4Sjoerg // allows sanitizer arguments on the command line if they are supported by the
283*e038c9c4Sjoerg // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command
284*e038c9c4Sjoerg // line arguments for any of these "supported" sanitizers. That means that no
285*e038c9c4Sjoerg // sanitization of device code is actually supported at this time.
286*e038c9c4Sjoerg //
287*e038c9c4Sjoerg // This behavior is necessary because the host and device toolchains
288*e038c9c4Sjoerg // invocations often share the command line, so the device toolchain must
289*e038c9c4Sjoerg // tolerate flags meant only for the host toolchain.
290*e038c9c4Sjoerg return HostTC.getSupportedSanitizers();
291*e038c9c4Sjoerg }
292*e038c9c4Sjoerg
293*e038c9c4Sjoerg VersionTuple
computeMSVCVersion(const Driver * D,const ArgList & Args) const294*e038c9c4Sjoerg AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
295*e038c9c4Sjoerg const ArgList &Args) const {
296*e038c9c4Sjoerg return HostTC.computeMSVCVersion(D, Args);
297*e038c9c4Sjoerg }
298