1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HIP.h" 10 #include "CommonArgs.h" 11 #include "InputInfo.h" 12 #include "clang/Basic/Cuda.h" 13 #include "clang/Driver/Compilation.h" 14 #include "clang/Driver/Driver.h" 15 #include "clang/Driver/DriverDiagnostic.h" 16 #include "clang/Driver/Options.h" 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/Support/Path.h" 19 20 using namespace clang::driver; 21 using namespace clang::driver::toolchains; 22 using namespace clang::driver::tools; 23 using namespace clang; 24 using namespace llvm::opt; 25 26 #if defined(_WIN32) || defined(_WIN64) 27 #define NULL_FILE "nul" 28 #else 29 #define NULL_FILE "/dev/null" 30 #endif 31 32 namespace { 33 34 static void addBCLib(const Driver &D, const ArgList &Args, 35 ArgStringList &CmdArgs, ArgStringList LibraryPaths, 36 StringRef BCName) { 37 StringRef FullName; 38 for (std::string LibraryPath : LibraryPaths) { 39 SmallString<128> Path(LibraryPath); 40 llvm::sys::path::append(Path, BCName); 41 FullName = Path; 42 if (llvm::sys::fs::exists(FullName)) { 43 CmdArgs.push_back("-mlink-builtin-bitcode"); 44 CmdArgs.push_back(Args.MakeArgString(FullName)); 45 return; 46 } 47 } 48 D.Diag(diag::err_drv_no_such_file) << BCName; 49 } 50 51 static const char *getOutputFileName(Compilation &C, StringRef Base, 52 const char *Postfix, 53 const char *Extension) { 54 const char *OutputFileName; 55 if (C.getDriver().isSaveTempsEnabled()) { 56 OutputFileName = 57 C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); 58 } else { 59 std::string TmpName = 60 C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); 61 OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); 62 } 63 return OutputFileName; 64 } 65 } // namespace 66 67 const char *AMDGCN::Linker::constructLLVMLinkCommand( 68 Compilation &C, const JobAction &JA, const InputInfoList &Inputs, 69 const ArgList &Args, StringRef SubArchName, 70 StringRef OutputFilePrefix) const { 71 ArgStringList CmdArgs; 72 // Add the input bc's created by compile step. 73 for (const auto &II : Inputs) 74 CmdArgs.push_back(II.getFilename()); 75 76 // Add an intermediate output file. 77 CmdArgs.push_back("-o"); 78 auto OutputFileName = getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); 79 CmdArgs.push_back(OutputFileName); 80 SmallString<128> ExecPath(C.getDriver().Dir); 81 llvm::sys::path::append(ExecPath, "llvm-link"); 82 const char *Exec = Args.MakeArgString(ExecPath); 83 C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs)); 84 return OutputFileName; 85 } 86 87 const char *AMDGCN::Linker::constructOptCommand( 88 Compilation &C, const JobAction &JA, const InputInfoList &Inputs, 89 const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, 90 llvm::StringRef OutputFilePrefix, const char *InputFileName) const { 91 // Construct opt command. 92 ArgStringList OptArgs; 93 // The input to opt is the output from llvm-link. 94 OptArgs.push_back(InputFileName); 95 // Pass optimization arg to opt. 96 if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { 97 StringRef OOpt = "3"; 98 if (A->getOption().matches(options::OPT_O4) || 99 A->getOption().matches(options::OPT_Ofast)) 100 OOpt = "3"; 101 else if (A->getOption().matches(options::OPT_O0)) 102 OOpt = "0"; 103 else if (A->getOption().matches(options::OPT_O)) { 104 // -Os, -Oz, and -O(anything else) map to -O2 105 OOpt = llvm::StringSwitch<const char *>(A->getValue()) 106 .Case("1", "1") 107 .Case("2", "2") 108 .Case("3", "3") 109 .Case("s", "2") 110 .Case("z", "2") 111 .Default("2"); 112 } 113 OptArgs.push_back(Args.MakeArgString("-O" + OOpt)); 114 } 115 OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); 116 OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); 117 118 for (const Arg *A : Args.filtered(options::OPT_mllvm)) { 119 OptArgs.push_back(A->getValue(0)); 120 } 121 122 OptArgs.push_back("-o"); 123 auto OutputFileName = 124 getOutputFileName(C, OutputFilePrefix, "-optimized", "bc"); 125 OptArgs.push_back(OutputFileName); 126 SmallString<128> OptPath(C.getDriver().Dir); 127 llvm::sys::path::append(OptPath, "opt"); 128 const char *OptExec = Args.MakeArgString(OptPath); 129 C.addCommand(std::make_unique<Command>(JA, *this, OptExec, OptArgs, Inputs)); 130 return OutputFileName; 131 } 132 133 const char *AMDGCN::Linker::constructLlcCommand( 134 Compilation &C, const JobAction &JA, const InputInfoList &Inputs, 135 const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, 136 llvm::StringRef OutputFilePrefix, const char *InputFileName, 137 bool OutputIsAsm) const { 138 // Construct llc command. 139 ArgStringList LlcArgs{ 140 InputFileName, "-mtriple=amdgcn-amd-amdhsa", 141 Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")), 142 Args.MakeArgString("-mcpu=" + SubArchName)}; 143 144 // Extract all the -m options 145 std::vector<llvm::StringRef> Features; 146 handleTargetFeaturesGroup( 147 Args, Features, options::OPT_m_amdgpu_Features_Group); 148 149 // Add features to mattr such as xnack 150 std::string MAttrString = "-mattr="; 151 for(auto OneFeature : Features) { 152 MAttrString.append(Args.MakeArgString(OneFeature)); 153 if (OneFeature != Features.back()) 154 MAttrString.append(","); 155 } 156 if(!Features.empty()) 157 LlcArgs.push_back(Args.MakeArgString(MAttrString)); 158 159 for (const Arg *A : Args.filtered(options::OPT_mllvm)) { 160 LlcArgs.push_back(A->getValue(0)); 161 } 162 163 // Add output filename 164 LlcArgs.push_back("-o"); 165 auto LlcOutputFile = 166 getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); 167 LlcArgs.push_back(LlcOutputFile); 168 SmallString<128> LlcPath(C.getDriver().Dir); 169 llvm::sys::path::append(LlcPath, "llc"); 170 const char *Llc = Args.MakeArgString(LlcPath); 171 C.addCommand(std::make_unique<Command>(JA, *this, Llc, LlcArgs, Inputs)); 172 return LlcOutputFile; 173 } 174 175 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, 176 const InputInfoList &Inputs, 177 const InputInfo &Output, 178 const llvm::opt::ArgList &Args, 179 const char *InputFileName) const { 180 // Construct lld command. 181 // The output from ld.lld is an HSA code object file. 182 ArgStringList LldArgs{ 183 "-flavor", "gnu", "-shared", "-o", Output.getFilename(), InputFileName}; 184 SmallString<128> LldPath(C.getDriver().Dir); 185 llvm::sys::path::append(LldPath, "lld"); 186 const char *Lld = Args.MakeArgString(LldPath); 187 C.addCommand(std::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs)); 188 } 189 190 // Construct a clang-offload-bundler command to bundle code objects for 191 // different GPU's into a HIP fat binary. 192 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, 193 StringRef OutputFileName, const InputInfoList &Inputs, 194 const llvm::opt::ArgList &Args, const Tool& T) { 195 // Construct clang-offload-bundler command to bundle object files for 196 // for different GPU archs. 197 ArgStringList BundlerArgs; 198 BundlerArgs.push_back(Args.MakeArgString("-type=o")); 199 200 // ToDo: Remove the dummy host binary entry which is required by 201 // clang-offload-bundler. 202 std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux"; 203 std::string BundlerInputArg = "-inputs=" NULL_FILE; 204 205 for (const auto &II : Inputs) { 206 const auto* A = II.getAction(); 207 BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" + 208 StringRef(A->getOffloadingArch()).str(); 209 BundlerInputArg = BundlerInputArg + "," + II.getFilename(); 210 } 211 BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); 212 BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); 213 214 auto BundlerOutputArg = 215 Args.MakeArgString(std::string("-outputs=").append(OutputFileName)); 216 BundlerArgs.push_back(BundlerOutputArg); 217 218 SmallString<128> BundlerPath(C.getDriver().Dir); 219 llvm::sys::path::append(BundlerPath, "clang-offload-bundler"); 220 const char *Bundler = Args.MakeArgString(BundlerPath); 221 C.addCommand(std::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs)); 222 } 223 224 // For amdgcn the inputs of the linker job are device bitcode and output is 225 // object file. It calls llvm-link, opt, llc, then lld steps. 226 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, 227 const InputInfo &Output, 228 const InputInfoList &Inputs, 229 const ArgList &Args, 230 const char *LinkingOutput) const { 231 232 if (JA.getType() == types::TY_HIP_FATBIN) 233 return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); 234 235 assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn && 236 "Unsupported target"); 237 238 std::string SubArchName = JA.getOffloadingArch(); 239 assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch"); 240 241 // Prefix for temporary file name. 242 std::string Prefix = llvm::sys::path::stem(Inputs[0].getFilename()).str(); 243 if (!C.getDriver().isSaveTempsEnabled()) 244 Prefix += "-" + SubArchName; 245 246 // Each command outputs different files. 247 const char *LLVMLinkCommand = 248 constructLLVMLinkCommand(C, JA, Inputs, Args, SubArchName, Prefix); 249 const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, SubArchName, 250 Prefix, LLVMLinkCommand); 251 if (C.getDriver().isSaveTempsEnabled()) 252 constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand, 253 /*OutputIsAsm=*/true); 254 const char *LlcCommand = 255 constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand); 256 constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); 257 } 258 259 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple, 260 const ToolChain &HostTC, const ArgList &Args) 261 : ToolChain(D, Triple, Args), HostTC(HostTC) { 262 // Lookup binaries into the driver directory, this is used to 263 // discover the clang-offload-bundler executable. 264 getProgramPaths().push_back(getDriver().Dir); 265 } 266 267 void HIPToolChain::addClangTargetOptions( 268 const llvm::opt::ArgList &DriverArgs, 269 llvm::opt::ArgStringList &CC1Args, 270 Action::OffloadKind DeviceOffloadingKind) const { 271 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); 272 273 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); 274 assert(!GpuArch.empty() && "Must have an explicit GPU arch."); 275 (void) GpuArch; 276 assert(DeviceOffloadingKind == Action::OFK_HIP && 277 "Only HIP offloading kinds are supported for GPUs."); 278 279 CC1Args.push_back("-target-cpu"); 280 CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch)); 281 CC1Args.push_back("-fcuda-is-device"); 282 283 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, 284 options::OPT_fno_cuda_flush_denormals_to_zero, false)) 285 CC1Args.push_back("-fcuda-flush-denormals-to-zero"); 286 287 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, 288 options::OPT_fno_cuda_approx_transcendentals, false)) 289 CC1Args.push_back("-fcuda-approx-transcendentals"); 290 291 if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, 292 false)) 293 CC1Args.push_back("-fgpu-rdc"); 294 295 if (DriverArgs.hasFlag(options::OPT_fgpu_allow_device_init, 296 options::OPT_fno_gpu_allow_device_init, false)) 297 CC1Args.push_back("-fgpu-allow-device-init"); 298 299 CC1Args.push_back("-fcuda-allow-variadic-functions"); 300 301 // Default to "hidden" visibility, as object level linking will not be 302 // supported for the foreseeable future. 303 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, 304 options::OPT_fvisibility_ms_compat)) { 305 CC1Args.append({"-fvisibility", "hidden"}); 306 CC1Args.push_back("-fapply-global-visibility-to-externs"); 307 } 308 309 if (DriverArgs.hasArg(options::OPT_nogpulib)) 310 return; 311 ArgStringList LibraryPaths; 312 313 // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. 314 for (auto Path : 315 DriverArgs.getAllArgValues(options::OPT_hip_device_lib_path_EQ)) 316 LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); 317 318 addDirectoryList(DriverArgs, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH"); 319 320 llvm::SmallVector<std::string, 10> BCLibs; 321 322 // Add bitcode library in --hip-device-lib. 323 for (auto Lib : DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ)) { 324 BCLibs.push_back(DriverArgs.MakeArgString(Lib)); 325 } 326 327 // If --hip-device-lib is not set, add the default bitcode libraries. 328 if (BCLibs.empty()) { 329 // Get the bc lib file name for ISA version. For example, 330 // gfx803 => oclc_isa_version_803.amdgcn.bc. 331 std::string GFXVersion = GpuArch.drop_front(3).str(); 332 std::string ISAVerBC = "oclc_isa_version_" + GFXVersion + ".amdgcn.bc"; 333 334 llvm::StringRef FlushDenormalControlBC; 335 if (DriverArgs.hasArg(options::OPT_fcuda_flush_denormals_to_zero)) 336 FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc"; 337 else 338 FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc"; 339 340 llvm::StringRef WaveFrontSizeBC; 341 if (stoi(GFXVersion) < 1000) 342 WaveFrontSizeBC = "oclc_wavefrontsize64_on.amdgcn.bc"; 343 else 344 WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc"; 345 346 BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc", 347 "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", 348 FlushDenormalControlBC, 349 "oclc_correctly_rounded_sqrt_on.amdgcn.bc", 350 "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC, 351 WaveFrontSizeBC}); 352 } 353 for (auto Lib : BCLibs) 354 addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib); 355 } 356 357 llvm::opt::DerivedArgList * 358 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, 359 StringRef BoundArch, 360 Action::OffloadKind DeviceOffloadKind) const { 361 DerivedArgList *DAL = 362 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); 363 if (!DAL) 364 DAL = new DerivedArgList(Args.getBaseArgs()); 365 366 const OptTable &Opts = getDriver().getOpts(); 367 368 for (Arg *A : Args) { 369 if (A->getOption().matches(options::OPT_Xarch__)) { 370 // Skip this argument unless the architecture matches BoundArch. 371 if (BoundArch.empty() || A->getValue(0) != BoundArch) 372 continue; 373 374 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1)); 375 unsigned Prev = Index; 376 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index)); 377 378 // If the argument parsing failed or more than one argument was 379 // consumed, the -Xarch_ argument's parameter tried to consume 380 // extra arguments. Emit an error and ignore. 381 // 382 // We also want to disallow any options which would alter the 383 // driver behavior; that isn't going to work in our model. We 384 // use isDriverOption() as an approximation, although things 385 // like -O4 are going to slip through. 386 if (!XarchArg || Index > Prev + 1) { 387 getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args) 388 << A->getAsString(Args); 389 continue; 390 } else if (XarchArg->getOption().hasFlag(options::DriverOption)) { 391 getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver) 392 << A->getAsString(Args); 393 continue; 394 } 395 XarchArg->setBaseArg(A); 396 A = XarchArg.release(); 397 DAL->AddSynthesizedArg(A); 398 } 399 DAL->append(A); 400 } 401 402 if (!BoundArch.empty()) { 403 DAL->eraseArg(options::OPT_march_EQ); 404 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch); 405 } 406 407 return DAL; 408 } 409 410 Tool *HIPToolChain::buildLinker() const { 411 assert(getTriple().getArch() == llvm::Triple::amdgcn); 412 return new tools::AMDGCN::Linker(*this); 413 } 414 415 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { 416 HostTC.addClangWarningOptions(CC1Args); 417 } 418 419 ToolChain::CXXStdlibType 420 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const { 421 return HostTC.GetCXXStdlibType(Args); 422 } 423 424 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, 425 ArgStringList &CC1Args) const { 426 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); 427 } 428 429 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, 430 ArgStringList &CC1Args) const { 431 HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args); 432 } 433 434 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, 435 ArgStringList &CC1Args) const { 436 HostTC.AddIAMCUIncludeArgs(Args, CC1Args); 437 } 438 439 SanitizerMask HIPToolChain::getSupportedSanitizers() const { 440 // The HIPToolChain only supports sanitizers in the sense that it allows 441 // sanitizer arguments on the command line if they are supported by the host 442 // toolchain. The HIPToolChain will actually ignore any command line 443 // arguments for any of these "supported" sanitizers. That means that no 444 // sanitization of device code is actually supported at this time. 445 // 446 // This behavior is necessary because the host and device toolchains 447 // invocations often share the command line, so the device toolchain must 448 // tolerate flags meant only for the host toolchain. 449 return HostTC.getSupportedSanitizers(); 450 } 451 452 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D, 453 const ArgList &Args) const { 454 return HostTC.computeMSVCVersion(D, Args); 455 } 456