1 //===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUOpenMP.h" 10 #include "AMDGPU.h" 11 #include "CommonArgs.h" 12 #include "ToolChains/ROCm.h" 13 #include "clang/Basic/DiagnosticDriver.h" 14 #include "clang/Driver/Compilation.h" 15 #include "clang/Driver/Driver.h" 16 #include "clang/Driver/DriverDiagnostic.h" 17 #include "clang/Driver/InputInfo.h" 18 #include "clang/Driver/Options.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/FormatAdapters.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include "llvm/Support/Path.h" 24 25 using namespace clang::driver; 26 using namespace clang::driver::toolchains; 27 using namespace clang::driver::tools; 28 using namespace clang; 29 using namespace llvm::opt; 30 31 namespace { 32 33 static const char *getOutputFileName(Compilation &C, StringRef Base, 34 const char *Postfix, 35 const char *Extension) { 36 const char *OutputFileName; 37 if (C.getDriver().isSaveTempsEnabled()) { 38 OutputFileName = 39 C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); 40 } else { 41 std::string TmpName = 42 C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); 43 OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); 44 } 45 return OutputFileName; 46 } 47 48 static void addLLCOptArg(const llvm::opt::ArgList &Args, 49 llvm::opt::ArgStringList &CmdArgs) { 50 if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { 51 StringRef OOpt = "0"; 52 if (A->getOption().matches(options::OPT_O4) || 53 A->getOption().matches(options::OPT_Ofast)) 54 OOpt = "3"; 55 else if (A->getOption().matches(options::OPT_O0)) 56 OOpt = "0"; 57 else if (A->getOption().matches(options::OPT_O)) { 58 // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 59 // so we map -Os/-Oz to -O2. 60 // Only clang supports -Og, and maps it to -O1. 61 // We map anything else to -O2. 62 OOpt = llvm::StringSwitch<const char *>(A->getValue()) 63 .Case("1", "1") 64 .Case("2", "2") 65 .Case("3", "3") 66 .Case("s", "2") 67 .Case("z", "2") 68 .Case("g", "1") 69 .Default("0"); 70 } 71 CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); 72 } 73 } 74 75 static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, 76 std::string &GPUArch) { 77 if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { 78 std::string ErrMsg = 79 llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); 80 TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; 81 return false; 82 } 83 84 return true; 85 } 86 } // namespace 87 88 const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( 89 const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, 90 const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, 91 StringRef SubArchName, StringRef OutputFilePrefix) const { 92 ArgStringList CmdArgs; 93 94 for (const auto &II : Inputs) 95 if (II.isFilename()) 96 CmdArgs.push_back(II.getFilename()); 97 98 if (Args.hasArg(options::OPT_l)) { 99 auto Lm = Args.getAllArgValues(options::OPT_l); 100 bool HasLibm = false; 101 for (auto &Lib : Lm) { 102 if (Lib == "m") { 103 HasLibm = true; 104 break; 105 } 106 } 107 108 if (HasLibm) { 109 // This is not certain to work. The device libs added here, and passed to 110 // llvm-link, are missing attributes that they expect to be inserted when 111 // passed to mlink-builtin-bitcode. The amdgpu backend does not generate 112 // conservatively correct code when attributes are missing, so this may 113 // be the root cause of miscompilations. Passing via mlink-builtin-bitcode 114 // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes 115 // on each function, see D28538 for context. 116 // Potential workarounds: 117 // - unconditionally link all of the device libs to every translation 118 // unit in clang via mlink-builtin-bitcode 119 // - build a libm bitcode file as part of the DeviceRTL and explictly 120 // mlink-builtin-bitcode the rocm device libs components at build time 121 // - drop this llvm-link fork in favour or some calls into LLVM, chosen 122 // to do basically the same work as llvm-link but with that call first 123 // - write an opt pass that sets that on every function it sees and pipe 124 // the device-libs bitcode through that on the way to this llvm-link 125 SmallVector<std::string, 12> BCLibs = 126 AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); 127 llvm::for_each(BCLibs, [&](StringRef BCFile) { 128 CmdArgs.push_back(Args.MakeArgString(BCFile)); 129 }); 130 } 131 } 132 133 AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn", 134 SubArchName, 135 /* bitcode SDL?*/ true, 136 /* PostClang Link? */ false); 137 // Add an intermediate output file. 138 CmdArgs.push_back("-o"); 139 const char *OutputFileName = 140 getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); 141 CmdArgs.push_back(OutputFileName); 142 const char *Exec = 143 Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); 144 C.addCommand(std::make_unique<Command>( 145 JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, 146 InputInfo(&JA, Args.MakeArgString(OutputFileName)))); 147 return OutputFileName; 148 } 149 150 const char *AMDGCN::OpenMPLinker::constructLlcCommand( 151 Compilation &C, const JobAction &JA, const InputInfoList &Inputs, 152 const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, 153 llvm::StringRef OutputFilePrefix, const char *InputFileName, 154 bool OutputIsAsm) const { 155 // Construct llc command. 156 ArgStringList LlcArgs; 157 // The input to llc is the output from opt. 158 LlcArgs.push_back(InputFileName); 159 // Pass optimization arg to llc. 160 addLLCOptArg(Args, LlcArgs); 161 LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); 162 LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); 163 LlcArgs.push_back( 164 Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); 165 166 for (const Arg *A : Args.filtered(options::OPT_mllvm)) { 167 LlcArgs.push_back(A->getValue(0)); 168 } 169 170 // Add output filename 171 LlcArgs.push_back("-o"); 172 const char *LlcOutputFile = 173 getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); 174 LlcArgs.push_back(LlcOutputFile); 175 const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); 176 C.addCommand(std::make_unique<Command>( 177 JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs, 178 InputInfo(&JA, Args.MakeArgString(LlcOutputFile)))); 179 return LlcOutputFile; 180 } 181 182 void AMDGCN::OpenMPLinker::constructLldCommand( 183 Compilation &C, const JobAction &JA, const InputInfoList &Inputs, 184 const InputInfo &Output, const llvm::opt::ArgList &Args, 185 const char *InputFileName) const { 186 // Construct lld command. 187 // The output from ld.lld is an HSA code object file. 188 ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", 189 "-shared", "-o", Output.getFilename(), 190 InputFileName}; 191 192 const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); 193 C.addCommand(std::make_unique<Command>( 194 JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs, 195 InputInfo(&JA, Args.MakeArgString(Output.getFilename())))); 196 } 197 198 // For amdgcn the inputs of the linker job are device bitcode and output is 199 // object file. It calls llvm-link, opt, llc, then lld steps. 200 void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, 201 const InputInfo &Output, 202 const InputInfoList &Inputs, 203 const ArgList &Args, 204 const char *LinkingOutput) const { 205 const ToolChain &TC = getToolChain(); 206 assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); 207 208 const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC = 209 static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC); 210 211 std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str(); 212 if (GPUArch.empty()) { 213 if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch)) 214 return; 215 } 216 217 // Prefix for temporary file name. 218 std::string Prefix; 219 for (const auto &II : Inputs) 220 if (II.isFilename()) 221 Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch; 222 assert(Prefix.length() && "no linker inputs are files "); 223 224 // Each command outputs different files. 225 const char *LLVMLinkCommand = constructLLVMLinkCommand( 226 AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix); 227 228 // Produce readable assembly if save-temps is enabled. 229 if (C.getDriver().isSaveTempsEnabled()) 230 constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand, 231 /*OutputIsAsm=*/true); 232 const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch, 233 Prefix, LLVMLinkCommand); 234 constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); 235 } 236 237 AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, 238 const llvm::Triple &Triple, 239 const ToolChain &HostTC, 240 const ArgList &Args) 241 : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { 242 // Lookup binaries into the driver directory, this is used to 243 // discover the clang-offload-bundler executable. 244 getProgramPaths().push_back(getDriver().Dir); 245 } 246 247 void AMDGPUOpenMPToolChain::addClangTargetOptions( 248 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, 249 Action::OffloadKind DeviceOffloadingKind) const { 250 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); 251 252 std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str(); 253 if (GPUArch.empty()) { 254 if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch)) 255 return; 256 } 257 258 assert(DeviceOffloadingKind == Action::OFK_OpenMP && 259 "Only OpenMP offloading kinds are supported."); 260 261 CC1Args.push_back("-target-cpu"); 262 CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch)); 263 CC1Args.push_back("-fcuda-is-device"); 264 265 if (DriverArgs.hasArg(options::OPT_nogpulib)) 266 return; 267 268 std::string BitcodeSuffix; 269 if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, 270 options::OPT_fno_openmp_target_new_runtime, false)) 271 BitcodeSuffix = "new-amdgpu-" + GPUArch; 272 else 273 BitcodeSuffix = "amdgcn-" + GPUArch; 274 275 addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, 276 getTriple()); 277 } 278 279 llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( 280 const llvm::opt::DerivedArgList &Args, StringRef BoundArch, 281 Action::OffloadKind DeviceOffloadKind) const { 282 DerivedArgList *DAL = 283 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); 284 if (!DAL) 285 DAL = new DerivedArgList(Args.getBaseArgs()); 286 287 const OptTable &Opts = getDriver().getOpts(); 288 289 if (DeviceOffloadKind != Action::OFK_OpenMP) { 290 for (Arg *A : Args) { 291 DAL->append(A); 292 } 293 } 294 295 if (!BoundArch.empty()) { 296 DAL->eraseArg(options::OPT_march_EQ); 297 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), 298 BoundArch); 299 } 300 301 return DAL; 302 } 303 304 Tool *AMDGPUOpenMPToolChain::buildLinker() const { 305 assert(getTriple().isAMDGCN()); 306 return new tools::AMDGCN::OpenMPLinker(*this); 307 } 308 309 void AMDGPUOpenMPToolChain::addClangWarningOptions( 310 ArgStringList &CC1Args) const { 311 HostTC.addClangWarningOptions(CC1Args); 312 } 313 314 ToolChain::CXXStdlibType 315 AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { 316 return HostTC.GetCXXStdlibType(Args); 317 } 318 319 void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( 320 const ArgList &DriverArgs, ArgStringList &CC1Args) const { 321 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); 322 } 323 324 void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, 325 ArgStringList &CC1Args) const { 326 HostTC.AddIAMCUIncludeArgs(Args, CC1Args); 327 } 328 329 SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const { 330 // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it 331 // allows sanitizer arguments on the command line if they are supported by the 332 // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command 333 // line arguments for any of these "supported" sanitizers. That means that no 334 // sanitization of device code is actually supported at this time. 335 // 336 // This behavior is necessary because the host and device toolchains 337 // invocations often share the command line, so the device toolchain must 338 // tolerate flags meant only for the host toolchain. 339 return HostTC.getSupportedSanitizers(); 340 } 341 342 VersionTuple 343 AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, 344 const ArgList &Args) const { 345 return HostTC.computeMSVCVersion(D, Args); 346 } 347