1 //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HIP.h" 10 #include "AMDGPU.h" 11 #include "CommonArgs.h" 12 #include "InputInfo.h" 13 #include "clang/Basic/Cuda.h" 14 #include "clang/Basic/TargetID.h" 15 #include "clang/Driver/Compilation.h" 16 #include "clang/Driver/Driver.h" 17 #include "clang/Driver/DriverDiagnostic.h" 18 #include "clang/Driver/Options.h" 19 #include "llvm/Support/Alignment.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/Path.h" 22 #include "llvm/Support/TargetParser.h" 23 24 using namespace clang::driver; 25 using namespace clang::driver::toolchains; 26 using namespace clang::driver::tools; 27 using namespace clang; 28 using namespace llvm::opt; 29 30 #if defined(_WIN32) || defined(_WIN64) 31 #define NULL_FILE "nul" 32 #else 33 #define NULL_FILE "/dev/null" 34 #endif 35 36 namespace { 37 const unsigned HIPCodeObjectAlign = 4096; 38 } // namespace 39 40 void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, 41 const InputInfoList &Inputs, 42 const InputInfo &Output, 43 const llvm::opt::ArgList &Args) const { 44 // Construct lld command. 45 // The output from ld.lld is an HSA code object file. 46 ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared", 47 "-plugin-opt=-amdgpu-internalize-symbols"}; 48 49 auto &TC = getToolChain(); 50 auto &D = TC.getDriver(); 51 assert(!Inputs.empty() && "Must have at least one input."); 52 bool IsThinLTO = D.getLTOMode(/*IsOffload=*/true) == LTOK_Thin; 53 addLTOOptions(TC, Args, LldArgs, Output, Inputs[0], IsThinLTO); 54 55 // Extract all the -m options 56 std::vector<llvm::StringRef> Features; 57 amdgpu::getAMDGPUTargetFeatures(D, TC.getTriple(), Args, Features); 58 59 // Add features to mattr such as cumode 60 std::string MAttrString = "-plugin-opt=-mattr="; 61 for (auto OneFeature : unifyTargetFeatures(Features)) { 62 MAttrString.append(Args.MakeArgString(OneFeature)); 63 if (OneFeature != Features.back()) 64 MAttrString.append(","); 65 } 66 if (!Features.empty()) 67 LldArgs.push_back(Args.MakeArgString(MAttrString)); 68 69 // ToDo: Remove this option after AMDGPU backend supports ISA-level linking. 70 // Since AMDGPU backend currently does not support ISA-level linking, all 71 // called functions need to be imported. 72 if (IsThinLTO) 73 LldArgs.push_back(Args.MakeArgString("-plugin-opt=-force-import-all")); 74 75 for (const Arg *A : Args.filtered(options::OPT_mllvm)) { 76 LldArgs.push_back( 77 Args.MakeArgString(Twine("-plugin-opt=") + A->getValue(0))); 78 } 79 80 if (C.getDriver().isSaveTempsEnabled()) 81 LldArgs.push_back("-save-temps"); 82 83 addLinkerCompressDebugSectionsOption(TC, Args, LldArgs); 84 85 LldArgs.append({"-o", Output.getFilename()}); 86 for (auto Input : Inputs) 87 LldArgs.push_back(Input.getFilename()); 88 89 if (Args.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, 90 false)) 91 llvm::for_each(TC.getHIPDeviceLibs(Args), [&](StringRef BCFile) { 92 LldArgs.push_back(Args.MakeArgString(BCFile)); 93 }); 94 95 const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); 96 C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), 97 Lld, LldArgs, Inputs, Output)); 98 } 99 100 // Construct a clang-offload-bundler command to bundle code objects for 101 // different GPU's into a HIP fat binary. 102 void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, 103 StringRef OutputFileName, const InputInfoList &Inputs, 104 const llvm::opt::ArgList &Args, const Tool& T) { 105 // Construct clang-offload-bundler command to bundle object files for 106 // for different GPU archs. 107 ArgStringList BundlerArgs; 108 BundlerArgs.push_back(Args.MakeArgString("-type=o")); 109 BundlerArgs.push_back( 110 Args.MakeArgString("-bundle-align=" + Twine(HIPCodeObjectAlign))); 111 112 // ToDo: Remove the dummy host binary entry which is required by 113 // clang-offload-bundler. 114 std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux"; 115 std::string BundlerInputArg = "-inputs=" NULL_FILE; 116 117 // For code object version 2 and 3, the offload kind in bundle ID is 'hip' 118 // for backward compatibility. For code object version 4 and greater, the 119 // offload kind in bundle ID is 'hipv4'. 120 std::string OffloadKind = "hip"; 121 if (getAMDGPUCodeObjectVersion(C.getDriver(), Args) >= 4) 122 OffloadKind = OffloadKind + "v4"; 123 for (const auto &II : Inputs) { 124 const auto* A = II.getAction(); 125 BundlerTargetArg = BundlerTargetArg + "," + OffloadKind + 126 "-amdgcn-amd-amdhsa--" + 127 StringRef(A->getOffloadingArch()).str(); 128 BundlerInputArg = BundlerInputArg + "," + II.getFilename(); 129 } 130 BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); 131 BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); 132 133 std::string Output = std::string(OutputFileName); 134 auto BundlerOutputArg = 135 Args.MakeArgString(std::string("-outputs=").append(Output)); 136 BundlerArgs.push_back(BundlerOutputArg); 137 138 const char *Bundler = Args.MakeArgString( 139 T.getToolChain().GetProgramPath("clang-offload-bundler")); 140 C.addCommand(std::make_unique<Command>( 141 JA, T, ResponseFileSupport::None(), Bundler, BundlerArgs, Inputs, 142 InputInfo(&JA, Args.MakeArgString(Output)))); 143 } 144 145 /// Add Generated HIP Object File which has device images embedded into the 146 /// host to the argument list for linking. Using MC directives, embed the 147 /// device code and also define symbols required by the code generation so that 148 /// the image can be retrieved at runtime. 149 void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary( 150 Compilation &C, const InputInfo &Output, 151 const InputInfoList &Inputs, const ArgList &Args, 152 const JobAction &JA) const { 153 const ToolChain &TC = getToolChain(); 154 std::string Name = 155 std::string(llvm::sys::path::stem(Output.getFilename())); 156 157 // Create Temp Object File Generator, 158 // Offload Bundled file and Bundled Object file. 159 // Keep them if save-temps is enabled. 160 const char *McinFile; 161 const char *BundleFile; 162 if (C.getDriver().isSaveTempsEnabled()) { 163 McinFile = C.getArgs().MakeArgString(Name + ".mcin"); 164 BundleFile = C.getArgs().MakeArgString(Name + ".hipfb"); 165 } else { 166 auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin"); 167 McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin)); 168 auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb"); 169 BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb)); 170 } 171 constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this); 172 173 // Create a buffer to write the contents of the temp obj generator. 174 std::string ObjBuffer; 175 llvm::raw_string_ostream ObjStream(ObjBuffer); 176 177 // Add MC directives to embed target binaries. We ensure that each 178 // section and image is 16-byte aligned. This is not mandatory, but 179 // increases the likelihood of data to be aligned with a cache block 180 // in several main host machines. 181 ObjStream << "# HIP Object Generator\n"; 182 ObjStream << "# *** Automatically generated by Clang ***\n"; 183 ObjStream << " .type __hip_fatbin,@object\n"; 184 ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; 185 ObjStream << " .globl __hip_fatbin\n"; 186 ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign)) 187 << "\n"; 188 ObjStream << "__hip_fatbin:\n"; 189 ObjStream << " .incbin \"" << BundleFile << "\"\n"; 190 ObjStream.flush(); 191 192 // Dump the contents of the temp object file gen if the user requested that. 193 // We support this option to enable testing of behavior with -###. 194 if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script)) 195 llvm::errs() << ObjBuffer; 196 197 // Open script file and write the contents. 198 std::error_code EC; 199 llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None); 200 201 if (EC) { 202 C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); 203 return; 204 } 205 206 Objf << ObjBuffer; 207 208 ArgStringList McArgs{"-o", Output.getFilename(), 209 McinFile, "--filetype=obj"}; 210 const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc")); 211 C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(), 212 Mc, McArgs, Inputs, Output)); 213 } 214 215 // For amdgcn the inputs of the linker job are device bitcode and output is 216 // object file. It calls llvm-link, opt, llc, then lld steps. 217 void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, 218 const InputInfo &Output, 219 const InputInfoList &Inputs, 220 const ArgList &Args, 221 const char *LinkingOutput) const { 222 if (Inputs.size() > 0 && 223 Inputs[0].getType() == types::TY_Image && 224 JA.getType() == types::TY_Object) 225 return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA); 226 227 if (JA.getType() == types::TY_HIP_FATBIN) 228 return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); 229 230 return constructLldCommand(C, JA, Inputs, Output, Args); 231 } 232 233 HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple, 234 const ToolChain &HostTC, const ArgList &Args) 235 : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { 236 // Lookup binaries into the driver directory, this is used to 237 // discover the clang-offload-bundler executable. 238 getProgramPaths().push_back(getDriver().Dir); 239 } 240 241 void HIPToolChain::addClangTargetOptions( 242 const llvm::opt::ArgList &DriverArgs, 243 llvm::opt::ArgStringList &CC1Args, 244 Action::OffloadKind DeviceOffloadingKind) const { 245 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); 246 247 assert(DeviceOffloadingKind == Action::OFK_HIP && 248 "Only HIP offloading kinds are supported for GPUs."); 249 250 CC1Args.push_back("-fcuda-is-device"); 251 252 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals, 253 options::OPT_fno_cuda_approx_transcendentals, false)) 254 CC1Args.push_back("-fcuda-approx-transcendentals"); 255 256 if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, 257 false)) 258 CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); 259 260 StringRef MaxThreadsPerBlock = 261 DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); 262 if (!MaxThreadsPerBlock.empty()) { 263 std::string ArgStr = 264 std::string("--gpu-max-threads-per-block=") + MaxThreadsPerBlock.str(); 265 CC1Args.push_back(DriverArgs.MakeArgStringRef(ArgStr)); 266 } 267 268 CC1Args.push_back("-fcuda-allow-variadic-functions"); 269 270 // Default to "hidden" visibility, as object level linking will not be 271 // supported for the foreseeable future. 272 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, 273 options::OPT_fvisibility_ms_compat)) { 274 CC1Args.append({"-fvisibility", "hidden"}); 275 CC1Args.push_back("-fapply-global-visibility-to-externs"); 276 } 277 278 llvm::for_each(getHIPDeviceLibs(DriverArgs), [&](StringRef BCFile) { 279 CC1Args.push_back("-mlink-builtin-bitcode"); 280 CC1Args.push_back(DriverArgs.MakeArgString(BCFile)); 281 }); 282 } 283 284 llvm::opt::DerivedArgList * 285 HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, 286 StringRef BoundArch, 287 Action::OffloadKind DeviceOffloadKind) const { 288 DerivedArgList *DAL = 289 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); 290 if (!DAL) 291 DAL = new DerivedArgList(Args.getBaseArgs()); 292 293 const OptTable &Opts = getDriver().getOpts(); 294 295 for (Arg *A : Args) { 296 if (!shouldSkipArgument(A)) 297 DAL->append(A); 298 } 299 300 if (!BoundArch.empty()) { 301 DAL->eraseArg(options::OPT_mcpu_EQ); 302 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch); 303 checkTargetID(*DAL); 304 } 305 306 return DAL; 307 } 308 309 Tool *HIPToolChain::buildLinker() const { 310 assert(getTriple().getArch() == llvm::Triple::amdgcn); 311 return new tools::AMDGCN::Linker(*this); 312 } 313 314 void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { 315 HostTC.addClangWarningOptions(CC1Args); 316 } 317 318 ToolChain::CXXStdlibType 319 HIPToolChain::GetCXXStdlibType(const ArgList &Args) const { 320 return HostTC.GetCXXStdlibType(Args); 321 } 322 323 void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, 324 ArgStringList &CC1Args) const { 325 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); 326 } 327 328 void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, 329 ArgStringList &CC1Args) const { 330 HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args); 331 } 332 333 void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, 334 ArgStringList &CC1Args) const { 335 HostTC.AddIAMCUIncludeArgs(Args, CC1Args); 336 } 337 338 void HIPToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, 339 ArgStringList &CC1Args) const { 340 RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args); 341 } 342 343 SanitizerMask HIPToolChain::getSupportedSanitizers() const { 344 // The HIPToolChain only supports sanitizers in the sense that it allows 345 // sanitizer arguments on the command line if they are supported by the host 346 // toolchain. The HIPToolChain will actually ignore any command line 347 // arguments for any of these "supported" sanitizers. That means that no 348 // sanitization of device code is actually supported at this time. 349 // 350 // This behavior is necessary because the host and device toolchains 351 // invocations often share the command line, so the device toolchain must 352 // tolerate flags meant only for the host toolchain. 353 return HostTC.getSupportedSanitizers(); 354 } 355 356 VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D, 357 const ArgList &Args) const { 358 return HostTC.computeMSVCVersion(D, Args); 359 } 360 361 llvm::SmallVector<std::string, 12> 362 HIPToolChain::getHIPDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { 363 llvm::SmallVector<std::string, 12> BCLibs; 364 if (DriverArgs.hasArg(options::OPT_nogpulib)) 365 return {}; 366 ArgStringList LibraryPaths; 367 368 // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. 369 for (auto Path : RocmInstallation.getRocmDeviceLibPathArg()) 370 LibraryPaths.push_back(DriverArgs.MakeArgString(Path)); 371 372 addDirectoryList(DriverArgs, LibraryPaths, "", "HIP_DEVICE_LIB_PATH"); 373 374 // Maintain compatability with --hip-device-lib. 375 auto BCLibArgs = DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ); 376 if (!BCLibArgs.empty()) { 377 llvm::for_each(BCLibArgs, [&](StringRef BCName) { 378 StringRef FullName; 379 for (std::string LibraryPath : LibraryPaths) { 380 SmallString<128> Path(LibraryPath); 381 llvm::sys::path::append(Path, BCName); 382 FullName = Path; 383 if (llvm::sys::fs::exists(FullName)) { 384 BCLibs.push_back(FullName.str()); 385 return; 386 } 387 } 388 getDriver().Diag(diag::err_drv_no_such_file) << BCName; 389 }); 390 } else { 391 if (!RocmInstallation.hasDeviceLibrary()) { 392 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; 393 return {}; 394 } 395 StringRef GpuArch = getGPUArch(DriverArgs); 396 assert(!GpuArch.empty() && "Must have an explicit GPU arch."); 397 (void)GpuArch; 398 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); 399 const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); 400 401 std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); 402 if (LibDeviceFile.empty()) { 403 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch; 404 return {}; 405 } 406 407 // If --hip-device-lib is not set, add the default bitcode libraries. 408 // TODO: There are way too many flags that change this. Do we need to check 409 // them all? 410 bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, 411 options::OPT_fno_gpu_flush_denormals_to_zero, 412 getDefaultDenormsAreZeroForTarget(Kind)); 413 bool FiniteOnly = 414 DriverArgs.hasFlag(options::OPT_ffinite_math_only, 415 options::OPT_fno_finite_math_only, false); 416 bool UnsafeMathOpt = 417 DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, 418 options::OPT_fno_unsafe_math_optimizations, false); 419 bool FastRelaxedMath = DriverArgs.hasFlag( 420 options::OPT_ffast_math, options::OPT_fno_fast_math, false); 421 bool CorrectSqrt = DriverArgs.hasFlag( 422 options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, 423 options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); 424 bool Wave64 = isWave64(DriverArgs, Kind); 425 426 if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, 427 options::OPT_fno_gpu_sanitize, false)) { 428 auto AsanRTL = RocmInstallation.getAsanRTLPath(); 429 if (AsanRTL.empty()) { 430 unsigned DiagID = getDriver().getDiags().getCustomDiagID( 431 DiagnosticsEngine::Error, 432 "AMDGPU address sanitizer runtime library (asanrtl) is not found. " 433 "Please install ROCm device library which supports address " 434 "sanitizer"); 435 getDriver().Diag(DiagID); 436 return {}; 437 } else 438 BCLibs.push_back(AsanRTL.str()); 439 } 440 441 // Add the HIP specific bitcode library. 442 BCLibs.push_back(RocmInstallation.getHIPPath().str()); 443 444 // Add the generic set of libraries. 445 BCLibs.append(RocmInstallation.getCommonBitcodeLibs( 446 DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, 447 FastRelaxedMath, CorrectSqrt)); 448 449 // Add instrument lib. 450 auto InstLib = 451 DriverArgs.getLastArgValue(options::OPT_gpu_instrument_lib_EQ); 452 if (InstLib.empty()) 453 return BCLibs; 454 if (llvm::sys::fs::exists(InstLib)) 455 BCLibs.push_back(InstLib.str()); 456 else 457 getDriver().Diag(diag::err_drv_no_such_file) << InstLib; 458 } 459 460 return BCLibs; 461 } 462