10b57cec5SDimitry Andric //===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "Cuda.h" 100b57cec5SDimitry Andric #include "CommonArgs.h" 110b57cec5SDimitry Andric #include "clang/Basic/Cuda.h" 120b57cec5SDimitry Andric #include "clang/Config/config.h" 130b57cec5SDimitry Andric #include "clang/Driver/Compilation.h" 140b57cec5SDimitry Andric #include "clang/Driver/Distro.h" 150b57cec5SDimitry Andric #include "clang/Driver/Driver.h" 160b57cec5SDimitry Andric #include "clang/Driver/DriverDiagnostic.h" 17fe6060f1SDimitry Andric #include "clang/Driver/InputInfo.h" 180b57cec5SDimitry Andric #include "clang/Driver/Options.h" 19349cc55cSDimitry Andric #include "llvm/ADT/StringExtras.h" 200b57cec5SDimitry Andric #include "llvm/Option/ArgList.h" 210b57cec5SDimitry Andric #include "llvm/Support/FileSystem.h" 22bdd1243dSDimitry Andric #include "llvm/Support/FormatAdapters.h" 23bdd1243dSDimitry Andric #include "llvm/Support/FormatVariadic.h" 240b57cec5SDimitry Andric #include "llvm/Support/Path.h" 250b57cec5SDimitry Andric #include "llvm/Support/Process.h" 260b57cec5SDimitry Andric #include "llvm/Support/Program.h" 270b57cec5SDimitry Andric #include "llvm/Support/VirtualFileSystem.h" 2806c3fb27SDimitry Andric #include "llvm/TargetParser/Host.h" 2906c3fb27SDimitry Andric #include "llvm/TargetParser/TargetParser.h" 300b57cec5SDimitry Andric #include <system_error> 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric using namespace clang::driver; 330b57cec5SDimitry Andric using namespace clang::driver::toolchains; 340b57cec5SDimitry Andric using namespace clang::driver::tools; 350b57cec5SDimitry Andric using namespace clang; 360b57cec5SDimitry Andric using namespace llvm::opt; 370b57cec5SDimitry Andric 38eaeb601bSDimitry Andric namespace { 390b57cec5SDimitry Andric 40eaeb601bSDimitry Andric CudaVersion getCudaVersion(uint32_t raw_version) { 41eaeb601bSDimitry Andric if (raw_version < 7050) 42eaeb601bSDimitry Andric return CudaVersion::CUDA_70; 43eaeb601bSDimitry Andric if (raw_version < 8000) 44eaeb601bSDimitry Andric return CudaVersion::CUDA_75; 45eaeb601bSDimitry Andric if (raw_version < 9000) 46eaeb601bSDimitry Andric return CudaVersion::CUDA_80; 47eaeb601bSDimitry Andric if (raw_version < 9010) 48eaeb601bSDimitry Andric return CudaVersion::CUDA_90; 49eaeb601bSDimitry Andric if (raw_version < 9020) 50eaeb601bSDimitry Andric return CudaVersion::CUDA_91; 51eaeb601bSDimitry Andric if (raw_version < 10000) 52eaeb601bSDimitry Andric return CudaVersion::CUDA_92; 53eaeb601bSDimitry Andric if (raw_version < 10010) 54eaeb601bSDimitry Andric return CudaVersion::CUDA_100; 55eaeb601bSDimitry Andric if (raw_version < 10020) 56eaeb601bSDimitry Andric return CudaVersion::CUDA_101; 57eaeb601bSDimitry Andric if (raw_version < 11000) 58eaeb601bSDimitry Andric return CudaVersion::CUDA_102; 59eaeb601bSDimitry Andric if (raw_version < 11010) 60eaeb601bSDimitry Andric return CudaVersion::CUDA_110; 61fe6060f1SDimitry Andric if (raw_version < 11020) 62fe6060f1SDimitry Andric return CudaVersion::CUDA_111; 63349cc55cSDimitry Andric if (raw_version < 11030) 64349cc55cSDimitry Andric return CudaVersion::CUDA_112; 65349cc55cSDimitry Andric if (raw_version < 11040) 66349cc55cSDimitry Andric return CudaVersion::CUDA_113; 67349cc55cSDimitry Andric if (raw_version < 11050) 68349cc55cSDimitry Andric return CudaVersion::CUDA_114; 69349cc55cSDimitry Andric if (raw_version < 11060) 70349cc55cSDimitry Andric return CudaVersion::CUDA_115; 71bdd1243dSDimitry Andric if (raw_version < 11070) 72bdd1243dSDimitry Andric return CudaVersion::CUDA_116; 73bdd1243dSDimitry Andric if (raw_version < 11080) 74bdd1243dSDimitry Andric return CudaVersion::CUDA_117; 75bdd1243dSDimitry Andric if (raw_version < 11090) 76bdd1243dSDimitry Andric return CudaVersion::CUDA_118; 7706c3fb27SDimitry Andric if (raw_version < 12010) 7806c3fb27SDimitry Andric return CudaVersion::CUDA_120; 7906c3fb27SDimitry Andric if (raw_version < 12020) 8006c3fb27SDimitry Andric return CudaVersion::CUDA_121; 815f757f3fSDimitry Andric if (raw_version < 12030) 825f757f3fSDimitry Andric return CudaVersion::CUDA_122; 835f757f3fSDimitry Andric if (raw_version < 12040) 845f757f3fSDimitry Andric return CudaVersion::CUDA_123; 850fca6ea1SDimitry Andric if (raw_version < 12050) 860fca6ea1SDimitry Andric return CudaVersion::CUDA_124; 870fca6ea1SDimitry Andric if (raw_version < 12060) 880fca6ea1SDimitry Andric return CudaVersion::CUDA_125; 89349cc55cSDimitry Andric return CudaVersion::NEW; 90aec4c088SDimitry Andric } 91aec4c088SDimitry Andric 92349cc55cSDimitry Andric CudaVersion parseCudaHFile(llvm::StringRef Input) { 93eaeb601bSDimitry Andric // Helper lambda which skips the words if the line starts with them or returns 94bdd1243dSDimitry Andric // std::nullopt otherwise. 95eaeb601bSDimitry Andric auto StartsWithWords = 96eaeb601bSDimitry Andric [](llvm::StringRef Line, 97bdd1243dSDimitry Andric const SmallVector<StringRef, 3> words) -> std::optional<StringRef> { 98eaeb601bSDimitry Andric for (StringRef word : words) { 99eaeb601bSDimitry Andric if (!Line.consume_front(word)) 100eaeb601bSDimitry Andric return {}; 101eaeb601bSDimitry Andric Line = Line.ltrim(); 102eaeb601bSDimitry Andric } 103eaeb601bSDimitry Andric return Line; 104eaeb601bSDimitry Andric }; 105eaeb601bSDimitry Andric 106eaeb601bSDimitry Andric Input = Input.ltrim(); 107eaeb601bSDimitry Andric while (!Input.empty()) { 108eaeb601bSDimitry Andric if (auto Line = 109eaeb601bSDimitry Andric StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) { 110eaeb601bSDimitry Andric uint32_t RawVersion; 111eaeb601bSDimitry Andric Line->consumeInteger(10, RawVersion); 112349cc55cSDimitry Andric return getCudaVersion(RawVersion); 113eaeb601bSDimitry Andric } 114eaeb601bSDimitry Andric // Find next non-empty line. 115eaeb601bSDimitry Andric Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim(); 116eaeb601bSDimitry Andric } 117349cc55cSDimitry Andric return CudaVersion::UNKNOWN; 118eaeb601bSDimitry Andric } 119eaeb601bSDimitry Andric } // namespace 120eaeb601bSDimitry Andric 121aec4c088SDimitry Andric void CudaInstallationDetector::WarnIfUnsupportedVersion() { 122349cc55cSDimitry Andric if (Version > CudaVersion::PARTIALLY_SUPPORTED) { 123349cc55cSDimitry Andric std::string VersionString = CudaVersionToString(Version); 124349cc55cSDimitry Andric if (!VersionString.empty()) 125349cc55cSDimitry Andric VersionString.insert(0, " "); 126349cc55cSDimitry Andric D.Diag(diag::warn_drv_new_cuda_version) 127349cc55cSDimitry Andric << VersionString 128349cc55cSDimitry Andric << (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED) 129349cc55cSDimitry Andric << CudaVersionToString(CudaVersion::PARTIALLY_SUPPORTED); 130349cc55cSDimitry Andric } else if (Version > CudaVersion::FULLY_SUPPORTED) 131349cc55cSDimitry Andric D.Diag(diag::warn_drv_partially_supported_cuda_version) 132349cc55cSDimitry Andric << CudaVersionToString(Version); 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric CudaInstallationDetector::CudaInstallationDetector( 1360b57cec5SDimitry Andric const Driver &D, const llvm::Triple &HostTriple, 1370b57cec5SDimitry Andric const llvm::opt::ArgList &Args) 1380b57cec5SDimitry Andric : D(D) { 1390b57cec5SDimitry Andric struct Candidate { 1400b57cec5SDimitry Andric std::string Path; 1410b57cec5SDimitry Andric bool StrictChecking; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric Candidate(std::string Path, bool StrictChecking = false) 1440b57cec5SDimitry Andric : Path(Path), StrictChecking(StrictChecking) {} 1450b57cec5SDimitry Andric }; 1460b57cec5SDimitry Andric SmallVector<Candidate, 4> Candidates; 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric // In decreasing order so we prefer newer versions to older versions. 1490b57cec5SDimitry Andric std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"}; 1505ffd83dbSDimitry Andric auto &FS = D.getVFS(); 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) { 1530b57cec5SDimitry Andric Candidates.emplace_back( 1540b57cec5SDimitry Andric Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str()); 1550b57cec5SDimitry Andric } else if (HostTriple.isOSWindows()) { 1560b57cec5SDimitry Andric for (const char *Ver : Versions) 1570b57cec5SDimitry Andric Candidates.emplace_back( 1580b57cec5SDimitry Andric D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" + 1590b57cec5SDimitry Andric Ver); 1600b57cec5SDimitry Andric } else { 1610b57cec5SDimitry Andric if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) { 1620b57cec5SDimitry Andric // Try to find ptxas binary. If the executable is located in a directory 1630b57cec5SDimitry Andric // called 'bin/', its parent directory might be a good guess for a valid 1640b57cec5SDimitry Andric // CUDA installation. 1650b57cec5SDimitry Andric // However, some distributions might installs 'ptxas' to /usr/bin. In that 1660b57cec5SDimitry Andric // case the candidate would be '/usr' which passes the following checks 1670b57cec5SDimitry Andric // because '/usr/include' exists as well. To avoid this case, we always 1680b57cec5SDimitry Andric // check for the directory potentially containing files for libdevice, 1690b57cec5SDimitry Andric // even if the user passes -nocudalib. 1700b57cec5SDimitry Andric if (llvm::ErrorOr<std::string> ptxas = 1710b57cec5SDimitry Andric llvm::sys::findProgramByName("ptxas")) { 1720b57cec5SDimitry Andric SmallString<256> ptxasAbsolutePath; 1730b57cec5SDimitry Andric llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath); 1760b57cec5SDimitry Andric if (llvm::sys::path::filename(ptxasDir) == "bin") 1775ffd83dbSDimitry Andric Candidates.emplace_back( 1785ffd83dbSDimitry Andric std::string(llvm::sys::path::parent_path(ptxasDir)), 1790b57cec5SDimitry Andric /*StrictChecking=*/true); 1800b57cec5SDimitry Andric } 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric Candidates.emplace_back(D.SysRoot + "/usr/local/cuda"); 1840b57cec5SDimitry Andric for (const char *Ver : Versions) 1850b57cec5SDimitry Andric Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver); 1860b57cec5SDimitry Andric 1875ffd83dbSDimitry Andric Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple())); 188480093f4SDimitry Andric if (Dist.IsDebian() || Dist.IsUbuntu()) 1890b57cec5SDimitry Andric // Special case for Debian to have nvidia-cuda-toolkit work 1900b57cec5SDimitry Andric // out of the box. More info on http://bugs.debian.org/882505 1910b57cec5SDimitry Andric Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda"); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 194a7dea167SDimitry Andric bool NoCudaLib = Args.hasArg(options::OPT_nogpulib); 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric for (const auto &Candidate : Candidates) { 1970b57cec5SDimitry Andric InstallPath = Candidate.Path; 1985ffd83dbSDimitry Andric if (InstallPath.empty() || !FS.exists(InstallPath)) 1990b57cec5SDimitry Andric continue; 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric BinPath = InstallPath + "/bin"; 2020b57cec5SDimitry Andric IncludePath = InstallPath + "/include"; 2030b57cec5SDimitry Andric LibDevicePath = InstallPath + "/nvvm/libdevice"; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric if (!(FS.exists(IncludePath) && FS.exists(BinPath))) 2060b57cec5SDimitry Andric continue; 2070b57cec5SDimitry Andric bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking); 2080b57cec5SDimitry Andric if (CheckLibDevice && !FS.exists(LibDevicePath)) 2090b57cec5SDimitry Andric continue; 2100b57cec5SDimitry Andric 211349cc55cSDimitry Andric Version = CudaVersion::UNKNOWN; 212eaeb601bSDimitry Andric if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h")) 213349cc55cSDimitry Andric Version = parseCudaHFile((*CudaHFile)->getBuffer()); 214349cc55cSDimitry Andric // As the last resort, make an educated guess between CUDA-7.0, which had 215349cc55cSDimitry Andric // old-style libdevice bitcode, and an unknown recent CUDA version. 216349cc55cSDimitry Andric if (Version == CudaVersion::UNKNOWN) { 217349cc55cSDimitry Andric Version = FS.exists(LibDevicePath + "/libdevice.10.bc") 218349cc55cSDimitry Andric ? CudaVersion::NEW 219349cc55cSDimitry Andric : CudaVersion::CUDA_70; 2200b57cec5SDimitry Andric } 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric if (Version >= CudaVersion::CUDA_90) { 2230b57cec5SDimitry Andric // CUDA-9+ uses single libdevice file for all GPU variants. 2240b57cec5SDimitry Andric std::string FilePath = LibDevicePath + "/libdevice.10.bc"; 2250b57cec5SDimitry Andric if (FS.exists(FilePath)) { 2260fca6ea1SDimitry Andric for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST; 2270fca6ea1SDimitry Andric Arch < E; ++Arch) { 2280fca6ea1SDimitry Andric OffloadArch OA = static_cast<OffloadArch>(Arch); 2290fca6ea1SDimitry Andric if (!IsNVIDIAOffloadArch(OA)) 2305ffd83dbSDimitry Andric continue; 2310fca6ea1SDimitry Andric std::string OffloadArchName(OffloadArchToString(OA)); 2320fca6ea1SDimitry Andric LibDeviceMap[OffloadArchName] = FilePath; 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric } else { 2360b57cec5SDimitry Andric std::error_code EC; 2375ffd83dbSDimitry Andric for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC), 2385ffd83dbSDimitry Andric LE; 2390b57cec5SDimitry Andric !EC && LI != LE; LI = LI.increment(EC)) { 2400b57cec5SDimitry Andric StringRef FilePath = LI->path(); 2410b57cec5SDimitry Andric StringRef FileName = llvm::sys::path::filename(FilePath); 2420b57cec5SDimitry Andric // Process all bitcode filenames that look like 2430b57cec5SDimitry Andric // libdevice.compute_XX.YY.bc 2440b57cec5SDimitry Andric const StringRef LibDeviceName = "libdevice."; 2455f757f3fSDimitry Andric if (!(FileName.starts_with(LibDeviceName) && FileName.ends_with(".bc"))) 2460b57cec5SDimitry Andric continue; 2470b57cec5SDimitry Andric StringRef GpuArch = FileName.slice( 2480b57cec5SDimitry Andric LibDeviceName.size(), FileName.find('.', LibDeviceName.size())); 2490b57cec5SDimitry Andric LibDeviceMap[GpuArch] = FilePath.str(); 2500b57cec5SDimitry Andric // Insert map entries for specific devices with this compute 2510b57cec5SDimitry Andric // capability. NVCC's choice of the libdevice library version is 2520b57cec5SDimitry Andric // rather peculiar and depends on the CUDA version. 2530b57cec5SDimitry Andric if (GpuArch == "compute_20") { 2545ffd83dbSDimitry Andric LibDeviceMap["sm_20"] = std::string(FilePath); 2555ffd83dbSDimitry Andric LibDeviceMap["sm_21"] = std::string(FilePath); 2565ffd83dbSDimitry Andric LibDeviceMap["sm_32"] = std::string(FilePath); 2570b57cec5SDimitry Andric } else if (GpuArch == "compute_30") { 2585ffd83dbSDimitry Andric LibDeviceMap["sm_30"] = std::string(FilePath); 2590b57cec5SDimitry Andric if (Version < CudaVersion::CUDA_80) { 2605ffd83dbSDimitry Andric LibDeviceMap["sm_50"] = std::string(FilePath); 2615ffd83dbSDimitry Andric LibDeviceMap["sm_52"] = std::string(FilePath); 2625ffd83dbSDimitry Andric LibDeviceMap["sm_53"] = std::string(FilePath); 2630b57cec5SDimitry Andric } 2645ffd83dbSDimitry Andric LibDeviceMap["sm_60"] = std::string(FilePath); 2655ffd83dbSDimitry Andric LibDeviceMap["sm_61"] = std::string(FilePath); 2665ffd83dbSDimitry Andric LibDeviceMap["sm_62"] = std::string(FilePath); 2670b57cec5SDimitry Andric } else if (GpuArch == "compute_35") { 2685ffd83dbSDimitry Andric LibDeviceMap["sm_35"] = std::string(FilePath); 2695ffd83dbSDimitry Andric LibDeviceMap["sm_37"] = std::string(FilePath); 2700b57cec5SDimitry Andric } else if (GpuArch == "compute_50") { 2710b57cec5SDimitry Andric if (Version >= CudaVersion::CUDA_80) { 2725ffd83dbSDimitry Andric LibDeviceMap["sm_50"] = std::string(FilePath); 2735ffd83dbSDimitry Andric LibDeviceMap["sm_52"] = std::string(FilePath); 2745ffd83dbSDimitry Andric LibDeviceMap["sm_53"] = std::string(FilePath); 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric } 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric 2800b57cec5SDimitry Andric // Check that we have found at least one libdevice that we can link in if 2810b57cec5SDimitry Andric // -nocudalib hasn't been specified. 2820b57cec5SDimitry Andric if (LibDeviceMap.empty() && !NoCudaLib) 2830b57cec5SDimitry Andric continue; 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric IsValid = true; 2860b57cec5SDimitry Andric break; 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric void CudaInstallationDetector::AddCudaIncludeArgs( 2910b57cec5SDimitry Andric const ArgList &DriverArgs, ArgStringList &CC1Args) const { 2920b57cec5SDimitry Andric if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { 2930b57cec5SDimitry Andric // Add cuda_wrappers/* to our system include path. This lets us wrap 2940b57cec5SDimitry Andric // standard library headers. 2950b57cec5SDimitry Andric SmallString<128> P(D.ResourceDir); 2960b57cec5SDimitry Andric llvm::sys::path::append(P, "include"); 2970b57cec5SDimitry Andric llvm::sys::path::append(P, "cuda_wrappers"); 2980b57cec5SDimitry Andric CC1Args.push_back("-internal-isystem"); 2990b57cec5SDimitry Andric CC1Args.push_back(DriverArgs.MakeArgString(P)); 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric 3025ffd83dbSDimitry Andric if (DriverArgs.hasArg(options::OPT_nogpuinc)) 3030b57cec5SDimitry Andric return; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric if (!isValid()) { 3060b57cec5SDimitry Andric D.Diag(diag::err_drv_no_cuda_installation); 3070b57cec5SDimitry Andric return; 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric CC1Args.push_back("-include"); 3110b57cec5SDimitry Andric CC1Args.push_back("__clang_cuda_runtime_wrapper.h"); 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric void CudaInstallationDetector::CheckCudaVersionSupportsArch( 3150fca6ea1SDimitry Andric OffloadArch Arch) const { 3160fca6ea1SDimitry Andric if (Arch == OffloadArch::UNKNOWN || Version == CudaVersion::UNKNOWN || 317e8d8bef9SDimitry Andric ArchsWithBadVersion[(int)Arch]) 3180b57cec5SDimitry Andric return; 3190b57cec5SDimitry Andric 3200fca6ea1SDimitry Andric auto MinVersion = MinVersionForOffloadArch(Arch); 3210fca6ea1SDimitry Andric auto MaxVersion = MaxVersionForOffloadArch(Arch); 3220b57cec5SDimitry Andric if (Version < MinVersion || Version > MaxVersion) { 323e8d8bef9SDimitry Andric ArchsWithBadVersion[(int)Arch] = true; 3240b57cec5SDimitry Andric D.Diag(diag::err_drv_cuda_version_unsupported) 3250fca6ea1SDimitry Andric << OffloadArchToString(Arch) << CudaVersionToString(MinVersion) 3260b57cec5SDimitry Andric << CudaVersionToString(MaxVersion) << InstallPath 3270b57cec5SDimitry Andric << CudaVersionToString(Version); 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric void CudaInstallationDetector::print(raw_ostream &OS) const { 3320b57cec5SDimitry Andric if (isValid()) 3330b57cec5SDimitry Andric OS << "Found CUDA installation: " << InstallPath << ", version " 3340b57cec5SDimitry Andric << CudaVersionToString(Version) << "\n"; 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric namespace { 3380b57cec5SDimitry Andric /// Debug info level for the NVPTX devices. We may need to emit different debug 3390b57cec5SDimitry Andric /// info level for the host and for the device itselfi. This type controls 3400b57cec5SDimitry Andric /// emission of the debug info for the devices. It either prohibits disable info 3410b57cec5SDimitry Andric /// emission completely, or emits debug directives only, or emits same debug 3420b57cec5SDimitry Andric /// info as for the host. 3430b57cec5SDimitry Andric enum DeviceDebugInfoLevel { 3440b57cec5SDimitry Andric DisableDebugInfo, /// Do not emit debug info for the devices. 3450b57cec5SDimitry Andric DebugDirectivesOnly, /// Emit only debug directives. 3460b57cec5SDimitry Andric EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the 3470b57cec5SDimitry Andric /// host. 3480b57cec5SDimitry Andric }; 3490b57cec5SDimitry Andric } // anonymous namespace 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric /// Define debug info level for the NVPTX devices. If the debug info for both 3520b57cec5SDimitry Andric /// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If 3530b57cec5SDimitry Andric /// only debug directives are requested for the both host and device 3540b57cec5SDimitry Andric /// (-gline-directvies-only), or the debug info only for the device is disabled 3550b57cec5SDimitry Andric /// (optimization is on and --cuda-noopt-device-debug was not specified), the 3560b57cec5SDimitry Andric /// debug directves only must be emitted for the device. Otherwise, use the same 3570b57cec5SDimitry Andric /// debug info level just like for the host (with the limitations of only 3580b57cec5SDimitry Andric /// supported DWARF2 standard). 3590b57cec5SDimitry Andric static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) { 3600b57cec5SDimitry Andric const Arg *A = Args.getLastArg(options::OPT_O_Group); 3610b57cec5SDimitry Andric bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) || 3620b57cec5SDimitry Andric Args.hasFlag(options::OPT_cuda_noopt_device_debug, 3630b57cec5SDimitry Andric options::OPT_no_cuda_noopt_device_debug, 3640b57cec5SDimitry Andric /*Default=*/false); 3650b57cec5SDimitry Andric if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) { 3660b57cec5SDimitry Andric const Option &Opt = A->getOption(); 3670b57cec5SDimitry Andric if (Opt.matches(options::OPT_gN_Group)) { 3680b57cec5SDimitry Andric if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0)) 3690b57cec5SDimitry Andric return DisableDebugInfo; 3700b57cec5SDimitry Andric if (Opt.matches(options::OPT_gline_directives_only)) 3710b57cec5SDimitry Andric return DebugDirectivesOnly; 3720b57cec5SDimitry Andric } 3730b57cec5SDimitry Andric return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly; 3740b57cec5SDimitry Andric } 375e8d8bef9SDimitry Andric return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo; 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, 3790b57cec5SDimitry Andric const InputInfo &Output, 3800b57cec5SDimitry Andric const InputInfoList &Inputs, 3810b57cec5SDimitry Andric const ArgList &Args, 3820b57cec5SDimitry Andric const char *LinkingOutput) const { 3830b57cec5SDimitry Andric const auto &TC = 384bdd1243dSDimitry Andric static_cast<const toolchains::NVPTXToolChain &>(getToolChain()); 3850b57cec5SDimitry Andric assert(TC.getTriple().isNVPTX() && "Wrong platform"); 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric StringRef GPUArchName; 388bdd1243dSDimitry Andric // If this is a CUDA action we need to extract the device architecture 389bdd1243dSDimitry Andric // from the Job's associated architecture, otherwise use the -march=arch 390bdd1243dSDimitry Andric // option. This option may come from -Xopenmp-target flag or the default 391bdd1243dSDimitry Andric // value. 392bdd1243dSDimitry Andric if (JA.isDeviceOffloading(Action::OFK_Cuda)) { 393bdd1243dSDimitry Andric GPUArchName = JA.getOffloadingArch(); 394bdd1243dSDimitry Andric } else { 3950b57cec5SDimitry Andric GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); 3960fca6ea1SDimitry Andric if (GPUArchName.empty()) { 3970fca6ea1SDimitry Andric C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) 3980fca6ea1SDimitry Andric << getToolChain().getArchName() << getShortName(); 3990fca6ea1SDimitry Andric return; 4000fca6ea1SDimitry Andric } 401bdd1243dSDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric // Obtain architecture from the action. 4040fca6ea1SDimitry Andric OffloadArch gpu_arch = StringToOffloadArch(GPUArchName); 4050fca6ea1SDimitry Andric assert(gpu_arch != OffloadArch::UNKNOWN && 4060b57cec5SDimitry Andric "Device action expected to have an architecture."); 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric // Check that our installation's ptxas supports gpu_arch. 4090b57cec5SDimitry Andric if (!Args.hasArg(options::OPT_no_cuda_version_check)) { 4100b57cec5SDimitry Andric TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch); 4110b57cec5SDimitry Andric } 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric ArgStringList CmdArgs; 4140b57cec5SDimitry Andric CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); 4150b57cec5SDimitry Andric DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args); 4160b57cec5SDimitry Andric if (DIKind == EmitSameDebugInfoAsHost) { 4170b57cec5SDimitry Andric // ptxas does not accept -g option if optimization is enabled, so 4180b57cec5SDimitry Andric // we ignore the compiler's -O* options if we want debug info. 4190b57cec5SDimitry Andric CmdArgs.push_back("-g"); 4200b57cec5SDimitry Andric CmdArgs.push_back("--dont-merge-basicblocks"); 4210b57cec5SDimitry Andric CmdArgs.push_back("--return-at-end"); 4220b57cec5SDimitry Andric } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { 4230b57cec5SDimitry Andric // Map the -O we received to -O{0,1,2,3}. 4240b57cec5SDimitry Andric // 4250b57cec5SDimitry Andric // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's 4260b57cec5SDimitry Andric // default, so it may correspond more closely to the spirit of clang -O2. 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric // -O3 seems like the least-bad option when -Osomething is specified to 4290b57cec5SDimitry Andric // clang but it isn't handled below. 4300b57cec5SDimitry Andric StringRef OOpt = "3"; 4310b57cec5SDimitry Andric if (A->getOption().matches(options::OPT_O4) || 4320b57cec5SDimitry Andric A->getOption().matches(options::OPT_Ofast)) 4330b57cec5SDimitry Andric OOpt = "3"; 4340b57cec5SDimitry Andric else if (A->getOption().matches(options::OPT_O0)) 4350b57cec5SDimitry Andric OOpt = "0"; 4360b57cec5SDimitry Andric else if (A->getOption().matches(options::OPT_O)) { 4370b57cec5SDimitry Andric // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options. 4380b57cec5SDimitry Andric OOpt = llvm::StringSwitch<const char *>(A->getValue()) 4390b57cec5SDimitry Andric .Case("1", "1") 4400b57cec5SDimitry Andric .Case("2", "2") 4410b57cec5SDimitry Andric .Case("3", "3") 4420b57cec5SDimitry Andric .Case("s", "2") 4430b57cec5SDimitry Andric .Case("z", "2") 4440b57cec5SDimitry Andric .Default("2"); 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt)); 4470b57cec5SDimitry Andric } else { 4480b57cec5SDimitry Andric // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond 4490b57cec5SDimitry Andric // to no optimizations, but ptxas's default is -O3. 4500b57cec5SDimitry Andric CmdArgs.push_back("-O0"); 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric if (DIKind == DebugDirectivesOnly) 4530b57cec5SDimitry Andric CmdArgs.push_back("-lineinfo"); 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric // Pass -v to ptxas if it was passed to the driver. 4560b57cec5SDimitry Andric if (Args.hasArg(options::OPT_v)) 4570b57cec5SDimitry Andric CmdArgs.push_back("-v"); 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric CmdArgs.push_back("--gpu-name"); 4600fca6ea1SDimitry Andric CmdArgs.push_back(Args.MakeArgString(OffloadArchToString(gpu_arch))); 4610b57cec5SDimitry Andric CmdArgs.push_back("--output-file"); 4621ac55f4cSDimitry Andric std::string OutputFileName = TC.getInputFilename(Output); 463bdd1243dSDimitry Andric 464bdd1243dSDimitry Andric if (Output.isFilename() && OutputFileName != Output.getFilename()) 4651ac55f4cSDimitry Andric C.addTempFile(Args.MakeArgString(OutputFileName)); 466bdd1243dSDimitry Andric 4671ac55f4cSDimitry Andric CmdArgs.push_back(Args.MakeArgString(OutputFileName)); 4680b57cec5SDimitry Andric for (const auto &II : Inputs) 4690b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString(II.getFilename())); 4700b57cec5SDimitry Andric 4710b57cec5SDimitry Andric for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_ptxas)) 4720b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString(A)); 4730b57cec5SDimitry Andric 474bdd1243dSDimitry Andric bool Relocatable; 4750b57cec5SDimitry Andric if (JA.isOffloading(Action::OFK_OpenMP)) 4760b57cec5SDimitry Andric // In OpenMP we need to generate relocatable code. 4770b57cec5SDimitry Andric Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target, 4780b57cec5SDimitry Andric options::OPT_fnoopenmp_relocatable_target, 4790b57cec5SDimitry Andric /*Default=*/true); 4800b57cec5SDimitry Andric else if (JA.isOffloading(Action::OFK_Cuda)) 481bdd1243dSDimitry Andric // In CUDA we generate relocatable code by default. 482bdd1243dSDimitry Andric Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, 483bdd1243dSDimitry Andric /*Default=*/false); 484bdd1243dSDimitry Andric else 485bdd1243dSDimitry Andric // Otherwise, we are compiling directly and should create linkable output. 486bdd1243dSDimitry Andric Relocatable = true; 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric if (Relocatable) 4890b57cec5SDimitry Andric CmdArgs.push_back("-c"); 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric const char *Exec; 4920b57cec5SDimitry Andric if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ)) 4930b57cec5SDimitry Andric Exec = A->getValue(); 4940b57cec5SDimitry Andric else 4950b57cec5SDimitry Andric Exec = Args.MakeArgString(TC.GetProgramPath("ptxas")); 4965ffd83dbSDimitry Andric C.addCommand(std::make_unique<Command>( 4975ffd83dbSDimitry Andric JA, *this, 4985ffd83dbSDimitry Andric ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, 4995ffd83dbSDimitry Andric "--options-file"}, 500e8d8bef9SDimitry Andric Exec, CmdArgs, Inputs, Output)); 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030fca6ea1SDimitry Andric static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) { 5040fca6ea1SDimitry Andric // The new driver does not include PTX by default to avoid overhead. 5050fca6ea1SDimitry Andric bool includePTX = !Args.hasFlag(options::OPT_offload_new_driver, 5060fca6ea1SDimitry Andric options::OPT_no_offload_new_driver, false); 5070fca6ea1SDimitry Andric for (Arg *A : Args.filtered(options::OPT_cuda_include_ptx_EQ, 5080fca6ea1SDimitry Andric options::OPT_no_cuda_include_ptx_EQ)) { 5090b57cec5SDimitry Andric A->claim(); 5100b57cec5SDimitry Andric const StringRef ArchStr = A->getValue(); 5110fca6ea1SDimitry Andric if (A->getOption().matches(options::OPT_cuda_include_ptx_EQ) && 5120fca6ea1SDimitry Andric (ArchStr == "all" || ArchStr == InputArch)) 5130fca6ea1SDimitry Andric includePTX = true; 5140fca6ea1SDimitry Andric else if (A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ) && 5150fca6ea1SDimitry Andric (ArchStr == "all" || ArchStr == InputArch)) 5160fca6ea1SDimitry Andric includePTX = false; 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric return includePTX; 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric // All inputs to this linker must be from CudaDeviceActions, as we need to look 5220b57cec5SDimitry Andric // at the Inputs' Actions in order to figure out which GPU architecture they 5230b57cec5SDimitry Andric // correspond to. 524bdd1243dSDimitry Andric void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA, 5250b57cec5SDimitry Andric const InputInfo &Output, 5260b57cec5SDimitry Andric const InputInfoList &Inputs, 5270b57cec5SDimitry Andric const ArgList &Args, 5280b57cec5SDimitry Andric const char *LinkingOutput) const { 5290b57cec5SDimitry Andric const auto &TC = 5300b57cec5SDimitry Andric static_cast<const toolchains::CudaToolChain &>(getToolChain()); 5310b57cec5SDimitry Andric assert(TC.getTriple().isNVPTX() && "Wrong platform"); 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric ArgStringList CmdArgs; 5340b57cec5SDimitry Andric if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100) 5350b57cec5SDimitry Andric CmdArgs.push_back("--cuda"); 5360b57cec5SDimitry Andric CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32"); 5370b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString("--create")); 5380b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); 5390b57cec5SDimitry Andric if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost) 5400b57cec5SDimitry Andric CmdArgs.push_back("-g"); 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric for (const auto &II : Inputs) { 5430b57cec5SDimitry Andric auto *A = II.getAction(); 5440b57cec5SDimitry Andric assert(A->getInputs().size() == 1 && 5450b57cec5SDimitry Andric "Device offload action is expected to have a single input"); 5460b57cec5SDimitry Andric const char *gpu_arch_str = A->getOffloadingArch(); 5470b57cec5SDimitry Andric assert(gpu_arch_str && 5480b57cec5SDimitry Andric "Device action expected to have associated a GPU architecture!"); 5490fca6ea1SDimitry Andric OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str); 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric if (II.getType() == types::TY_PP_Asm && 5520b57cec5SDimitry Andric !shouldIncludePTX(Args, gpu_arch_str)) 5530b57cec5SDimitry Andric continue; 5540b57cec5SDimitry Andric // We need to pass an Arch of the form "sm_XX" for cubin files and 5550b57cec5SDimitry Andric // "compute_XX" for ptx. 5565ffd83dbSDimitry Andric const char *Arch = (II.getType() == types::TY_PP_Asm) 5570fca6ea1SDimitry Andric ? OffloadArchToVirtualArchString(gpu_arch) 5580b57cec5SDimitry Andric : gpu_arch_str; 55981ad6265SDimitry Andric CmdArgs.push_back( 56081ad6265SDimitry Andric Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + 56181ad6265SDimitry Andric ",file=" + getToolChain().getInputFilename(II))); 5620b57cec5SDimitry Andric } 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) 5650b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString(A)); 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary")); 5685ffd83dbSDimitry Andric C.addCommand(std::make_unique<Command>( 5695ffd83dbSDimitry Andric JA, *this, 5705ffd83dbSDimitry Andric ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, 5715ffd83dbSDimitry Andric "--options-file"}, 572e8d8bef9SDimitry Andric Exec, CmdArgs, Inputs, Output)); 5730b57cec5SDimitry Andric } 5740b57cec5SDimitry Andric 575bdd1243dSDimitry Andric void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, 5760b57cec5SDimitry Andric const InputInfo &Output, 5770b57cec5SDimitry Andric const InputInfoList &Inputs, 5780b57cec5SDimitry Andric const ArgList &Args, 5790b57cec5SDimitry Andric const char *LinkingOutput) const { 5800b57cec5SDimitry Andric const auto &TC = 581bdd1243dSDimitry Andric static_cast<const toolchains::NVPTXToolChain &>(getToolChain()); 5825f757f3fSDimitry Andric ArgStringList CmdArgs; 5835f757f3fSDimitry Andric 5840b57cec5SDimitry Andric assert(TC.getTriple().isNVPTX() && "Wrong platform"); 5850b57cec5SDimitry Andric 5865f757f3fSDimitry Andric assert((Output.isFilename() || Output.isNothing()) && "Invalid output."); 5870b57cec5SDimitry Andric if (Output.isFilename()) { 5880b57cec5SDimitry Andric CmdArgs.push_back("-o"); 5890b57cec5SDimitry Andric CmdArgs.push_back(Output.getFilename()); 590bdd1243dSDimitry Andric } 591bdd1243dSDimitry Andric 5920b57cec5SDimitry Andric if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost) 5930b57cec5SDimitry Andric CmdArgs.push_back("-g"); 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric if (Args.hasArg(options::OPT_v)) 5960b57cec5SDimitry Andric CmdArgs.push_back("-v"); 5970b57cec5SDimitry Andric 598bdd1243dSDimitry Andric StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); 5990fca6ea1SDimitry Andric if (GPUArch.empty()) { 6000fca6ea1SDimitry Andric C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) 6010fca6ea1SDimitry Andric << getToolChain().getArchName() << getShortName(); 6020fca6ea1SDimitry Andric return; 6030fca6ea1SDimitry Andric } 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric CmdArgs.push_back("-arch"); 6060b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString(GPUArch)); 6070b57cec5SDimitry Andric 6080fca6ea1SDimitry Andric if (Args.hasArg(options::OPT_ptxas_path_EQ)) 6090fca6ea1SDimitry Andric CmdArgs.push_back(Args.MakeArgString( 6100fca6ea1SDimitry Andric "--pxtas-path=" + Args.getLastArgValue(options::OPT_ptxas_path_EQ))); 6110fca6ea1SDimitry Andric 612*62987288SDimitry Andric if (Args.hasArg(options::OPT_cuda_path_EQ)) 613*62987288SDimitry Andric CmdArgs.push_back(Args.MakeArgString( 614*62987288SDimitry Andric "--cuda-path=" + Args.getLastArgValue(options::OPT_cuda_path_EQ))); 615*62987288SDimitry Andric 6160b57cec5SDimitry Andric // Add paths specified in LIBRARY_PATH environment variable as -L options. 6170b57cec5SDimitry Andric addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH"); 6180b57cec5SDimitry Andric 6190fca6ea1SDimitry Andric // Add standard library search paths passed on the command line. 6200fca6ea1SDimitry Andric Args.AddAllArgs(CmdArgs, options::OPT_L); 6210fca6ea1SDimitry Andric getToolChain().AddFilePathLibArgs(Args, CmdArgs); 6220fca6ea1SDimitry Andric AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); 6230fca6ea1SDimitry Andric 6240fca6ea1SDimitry Andric if (C.getDriver().isUsingLTO()) 6250fca6ea1SDimitry Andric addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], 6260fca6ea1SDimitry Andric C.getDriver().getLTOMode() == LTOK_Thin); 6270fca6ea1SDimitry Andric 6280b57cec5SDimitry Andric // Add paths for the default clang library path. 6290b57cec5SDimitry Andric SmallString<256> DefaultLibPath = 6300b57cec5SDimitry Andric llvm::sys::path::parent_path(TC.getDriver().Dir); 631bdd1243dSDimitry Andric llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME); 6320b57cec5SDimitry Andric CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); 6330b57cec5SDimitry Andric 6345ffd83dbSDimitry Andric C.addCommand(std::make_unique<Command>( 6355ffd83dbSDimitry Andric JA, *this, 6365ffd83dbSDimitry Andric ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, 6375ffd83dbSDimitry Andric "--options-file"}, 6380fca6ea1SDimitry Andric Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper")), 6390fca6ea1SDimitry Andric CmdArgs, Inputs, Output)); 6400b57cec5SDimitry Andric } 6410b57cec5SDimitry Andric 64281ad6265SDimitry Andric void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, 64381ad6265SDimitry Andric const llvm::opt::ArgList &Args, 64481ad6265SDimitry Andric std::vector<StringRef> &Features) { 64581ad6265SDimitry Andric if (Args.hasArg(options::OPT_cuda_feature_EQ)) { 64681ad6265SDimitry Andric StringRef PtxFeature = 64781ad6265SDimitry Andric Args.getLastArgValue(options::OPT_cuda_feature_EQ, "+ptx42"); 64881ad6265SDimitry Andric Features.push_back(Args.MakeArgString(PtxFeature)); 64981ad6265SDimitry Andric return; 65081ad6265SDimitry Andric } 65181ad6265SDimitry Andric CudaInstallationDetector CudaInstallation(D, Triple, Args); 65281ad6265SDimitry Andric 65381ad6265SDimitry Andric // New CUDA versions often introduce new instructions that are only supported 65481ad6265SDimitry Andric // by new PTX version, so we need to raise PTX level to enable them in NVPTX 65581ad6265SDimitry Andric // back-end. 65681ad6265SDimitry Andric const char *PtxFeature = nullptr; 65781ad6265SDimitry Andric switch (CudaInstallation.version()) { 65881ad6265SDimitry Andric #define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \ 65981ad6265SDimitry Andric case CudaVersion::CUDA_##CUDA_VER: \ 66081ad6265SDimitry Andric PtxFeature = "+ptx" #PTX_VER; \ 66181ad6265SDimitry Andric break; 6620fca6ea1SDimitry Andric CASE_CUDA_VERSION(125, 85); 6630fca6ea1SDimitry Andric CASE_CUDA_VERSION(124, 84); 6645f757f3fSDimitry Andric CASE_CUDA_VERSION(123, 83); 6655f757f3fSDimitry Andric CASE_CUDA_VERSION(122, 82); 66606c3fb27SDimitry Andric CASE_CUDA_VERSION(121, 81); 66706c3fb27SDimitry Andric CASE_CUDA_VERSION(120, 80); 668bdd1243dSDimitry Andric CASE_CUDA_VERSION(118, 78); 669bdd1243dSDimitry Andric CASE_CUDA_VERSION(117, 77); 670bdd1243dSDimitry Andric CASE_CUDA_VERSION(116, 76); 67181ad6265SDimitry Andric CASE_CUDA_VERSION(115, 75); 67281ad6265SDimitry Andric CASE_CUDA_VERSION(114, 74); 67381ad6265SDimitry Andric CASE_CUDA_VERSION(113, 73); 67481ad6265SDimitry Andric CASE_CUDA_VERSION(112, 72); 67581ad6265SDimitry Andric CASE_CUDA_VERSION(111, 71); 67681ad6265SDimitry Andric CASE_CUDA_VERSION(110, 70); 67781ad6265SDimitry Andric CASE_CUDA_VERSION(102, 65); 67881ad6265SDimitry Andric CASE_CUDA_VERSION(101, 64); 67981ad6265SDimitry Andric CASE_CUDA_VERSION(100, 63); 68081ad6265SDimitry Andric CASE_CUDA_VERSION(92, 61); 68181ad6265SDimitry Andric CASE_CUDA_VERSION(91, 61); 68281ad6265SDimitry Andric CASE_CUDA_VERSION(90, 60); 68381ad6265SDimitry Andric #undef CASE_CUDA_VERSION 68481ad6265SDimitry Andric default: 68581ad6265SDimitry Andric PtxFeature = "+ptx42"; 68681ad6265SDimitry Andric } 68781ad6265SDimitry Andric Features.push_back(PtxFeature); 68881ad6265SDimitry Andric } 68981ad6265SDimitry Andric 690bdd1243dSDimitry Andric /// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This 691bdd1243dSDimitry Andric /// operates as a stand-alone version of the NVPTX tools without the host 692bdd1243dSDimitry Andric /// toolchain. 693bdd1243dSDimitry Andric NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, 694bdd1243dSDimitry Andric const llvm::Triple &HostTriple, 69506c3fb27SDimitry Andric const ArgList &Args, bool Freestanding = false) 69606c3fb27SDimitry Andric : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args), 69706c3fb27SDimitry Andric Freestanding(Freestanding) { 69806c3fb27SDimitry Andric if (CudaInstallation.isValid()) 6995ffd83dbSDimitry Andric getProgramPaths().push_back(std::string(CudaInstallation.getBinPath())); 7000b57cec5SDimitry Andric // Lookup binaries into the driver directory, this is used to 70106c3fb27SDimitry Andric // discover the 'nvptx-arch' executable. 7020b57cec5SDimitry Andric getProgramPaths().push_back(getDriver().Dir); 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric 705bdd1243dSDimitry Andric /// We only need the host triple to locate the CUDA binary utilities, use the 706bdd1243dSDimitry Andric /// system's default triple if not provided. 707bdd1243dSDimitry Andric NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, 708bdd1243dSDimitry Andric const ArgList &Args) 70906c3fb27SDimitry Andric : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args, 71006c3fb27SDimitry Andric /*Freestanding=*/true) {} 7110b57cec5SDimitry Andric 712bdd1243dSDimitry Andric llvm::opt::DerivedArgList * 713bdd1243dSDimitry Andric NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, 714bdd1243dSDimitry Andric StringRef BoundArch, 7150fca6ea1SDimitry Andric Action::OffloadKind OffloadKind) const { 7160fca6ea1SDimitry Andric DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, OffloadKind); 717bdd1243dSDimitry Andric if (!DAL) 718bdd1243dSDimitry Andric DAL = new DerivedArgList(Args.getBaseArgs()); 719bdd1243dSDimitry Andric 720bdd1243dSDimitry Andric const OptTable &Opts = getDriver().getOpts(); 721bdd1243dSDimitry Andric 722bdd1243dSDimitry Andric for (Arg *A : Args) 723bdd1243dSDimitry Andric if (!llvm::is_contained(*DAL, A)) 724bdd1243dSDimitry Andric DAL->append(A); 725bdd1243dSDimitry Andric 7260fca6ea1SDimitry Andric if (!DAL->hasArg(options::OPT_march_EQ) && OffloadKind != Action::OFK_None) { 727bdd1243dSDimitry Andric DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), 7280fca6ea1SDimitry Andric OffloadArchToString(OffloadArch::CudaDefault)); 7290fca6ea1SDimitry Andric } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "generic" && 7300fca6ea1SDimitry Andric OffloadKind == Action::OFK_None) { 7310fca6ea1SDimitry Andric DAL->eraseArg(options::OPT_march_EQ); 7320fca6ea1SDimitry Andric } else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") { 7330fca6ea1SDimitry Andric auto GPUsOrErr = getSystemGPUArchs(Args); 7340fca6ea1SDimitry Andric if (!GPUsOrErr) { 7350fca6ea1SDimitry Andric getDriver().Diag(diag::err_drv_undetermined_gpu_arch) 7360fca6ea1SDimitry Andric << getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march"; 7370fca6ea1SDimitry Andric } else { 7380fca6ea1SDimitry Andric if (GPUsOrErr->size() > 1) 7390fca6ea1SDimitry Andric getDriver().Diag(diag::warn_drv_multi_gpu_arch) 7400fca6ea1SDimitry Andric << getArchName() << llvm::join(*GPUsOrErr, ", ") << "-march"; 7410fca6ea1SDimitry Andric DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), 7420fca6ea1SDimitry Andric Args.MakeArgString(GPUsOrErr->front())); 7430fca6ea1SDimitry Andric } 7440fca6ea1SDimitry Andric } 745bdd1243dSDimitry Andric 746bdd1243dSDimitry Andric return DAL; 7470b57cec5SDimitry Andric } 7480b57cec5SDimitry Andric 74906c3fb27SDimitry Andric void NVPTXToolChain::addClangTargetOptions( 75006c3fb27SDimitry Andric const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, 75106c3fb27SDimitry Andric Action::OffloadKind DeviceOffloadingKind) const { 75206c3fb27SDimitry Andric // If we are compiling with a standalone NVPTX toolchain we want to try to 75306c3fb27SDimitry Andric // mimic a standard environment as much as possible. So we enable lowering 75406c3fb27SDimitry Andric // ctor / dtor functions to global symbols that can be registered. 75506c3fb27SDimitry Andric if (Freestanding) 75606c3fb27SDimitry Andric CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"}); 75706c3fb27SDimitry Andric } 75806c3fb27SDimitry Andric 759bdd1243dSDimitry Andric bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const { 760bdd1243dSDimitry Andric const Option &O = A->getOption(); 761bdd1243dSDimitry Andric return (O.matches(options::OPT_gN_Group) && 762bdd1243dSDimitry Andric !O.matches(options::OPT_gmodules)) || 763bdd1243dSDimitry Andric O.matches(options::OPT_g_Flag) || 764bdd1243dSDimitry Andric O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) || 765bdd1243dSDimitry Andric O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) || 766bdd1243dSDimitry Andric O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) || 767bdd1243dSDimitry Andric O.matches(options::OPT_gdwarf_5) || 768bdd1243dSDimitry Andric O.matches(options::OPT_gcolumn_info); 769bdd1243dSDimitry Andric } 770bdd1243dSDimitry Andric 771bdd1243dSDimitry Andric void NVPTXToolChain::adjustDebugInfoKind( 77206c3fb27SDimitry Andric llvm::codegenoptions::DebugInfoKind &DebugInfoKind, 77306c3fb27SDimitry Andric const ArgList &Args) const { 774bdd1243dSDimitry Andric switch (mustEmitDebugInfo(Args)) { 775bdd1243dSDimitry Andric case DisableDebugInfo: 77606c3fb27SDimitry Andric DebugInfoKind = llvm::codegenoptions::NoDebugInfo; 777bdd1243dSDimitry Andric break; 778bdd1243dSDimitry Andric case DebugDirectivesOnly: 77906c3fb27SDimitry Andric DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly; 780bdd1243dSDimitry Andric break; 781bdd1243dSDimitry Andric case EmitSameDebugInfoAsHost: 782bdd1243dSDimitry Andric // Use same debug info level as the host. 783bdd1243dSDimitry Andric break; 784bdd1243dSDimitry Andric } 785bdd1243dSDimitry Andric } 786bdd1243dSDimitry Andric 7870fca6ea1SDimitry Andric Expected<SmallVector<std::string>> 7880fca6ea1SDimitry Andric NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const { 7890fca6ea1SDimitry Andric // Detect NVIDIA GPUs availible on the system. 7900fca6ea1SDimitry Andric std::string Program; 7910fca6ea1SDimitry Andric if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ)) 7920fca6ea1SDimitry Andric Program = A->getValue(); 7930fca6ea1SDimitry Andric else 7940fca6ea1SDimitry Andric Program = GetProgramPath("nvptx-arch"); 7950fca6ea1SDimitry Andric 7960fca6ea1SDimitry Andric auto StdoutOrErr = executeToolChainProgram(Program, /*SecondsToWait=*/10); 7970fca6ea1SDimitry Andric if (!StdoutOrErr) 7980fca6ea1SDimitry Andric return StdoutOrErr.takeError(); 7990fca6ea1SDimitry Andric 8000fca6ea1SDimitry Andric SmallVector<std::string, 1> GPUArchs; 8010fca6ea1SDimitry Andric for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n")) 8020fca6ea1SDimitry Andric if (!Arch.empty()) 8030fca6ea1SDimitry Andric GPUArchs.push_back(Arch.str()); 8040fca6ea1SDimitry Andric 8050fca6ea1SDimitry Andric if (GPUArchs.empty()) 8060fca6ea1SDimitry Andric return llvm::createStringError(std::error_code(), 8070fca6ea1SDimitry Andric "No NVIDIA GPU detected in the system"); 8080fca6ea1SDimitry Andric 8090fca6ea1SDimitry Andric return std::move(GPUArchs); 8100fca6ea1SDimitry Andric } 8110fca6ea1SDimitry Andric 812bdd1243dSDimitry Andric /// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary, 813bdd1243dSDimitry Andric /// which isn't properly a linker but nonetheless performs the step of stitching 814bdd1243dSDimitry Andric /// together object files from the assembler into a single blob. 815bdd1243dSDimitry Andric 816bdd1243dSDimitry Andric CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, 817bdd1243dSDimitry Andric const ToolChain &HostTC, const ArgList &Args) 818bdd1243dSDimitry Andric : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {} 819bdd1243dSDimitry Andric 8200b57cec5SDimitry Andric void CudaToolChain::addClangTargetOptions( 821bdd1243dSDimitry Andric const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, 8220b57cec5SDimitry Andric Action::OffloadKind DeviceOffloadingKind) const { 8230b57cec5SDimitry Andric HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); 8240b57cec5SDimitry Andric 8250b57cec5SDimitry Andric StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); 8260b57cec5SDimitry Andric assert(!GpuArch.empty() && "Must have an explicit GPU arch."); 8270b57cec5SDimitry Andric assert((DeviceOffloadingKind == Action::OFK_OpenMP || 8280b57cec5SDimitry Andric DeviceOffloadingKind == Action::OFK_Cuda) && 8290b57cec5SDimitry Andric "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs."); 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric if (DeviceOffloadingKind == Action::OFK_Cuda) { 832349cc55cSDimitry Andric CC1Args.append( 833349cc55cSDimitry Andric {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"}); 8340b57cec5SDimitry Andric 83506c3fb27SDimitry Andric // Unsized function arguments used for variadics were introduced in CUDA-9.0 83606c3fb27SDimitry Andric // We still do not support generating code that actually uses variadic 83706c3fb27SDimitry Andric // arguments yet, but we do need to allow parsing them as recent CUDA 83806c3fb27SDimitry Andric // headers rely on that. https://github.com/llvm/llvm-project/issues/58410 83906c3fb27SDimitry Andric if (CudaInstallation.version() >= CudaVersion::CUDA_90) 84006c3fb27SDimitry Andric CC1Args.push_back("-fcuda-allow-variadic-functions"); 8410b57cec5SDimitry Andric } 8420b57cec5SDimitry Andric 843a7dea167SDimitry Andric if (DriverArgs.hasArg(options::OPT_nogpulib)) 8440b57cec5SDimitry Andric return; 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric if (DeviceOffloadingKind == Action::OFK_OpenMP && 8470b57cec5SDimitry Andric DriverArgs.hasArg(options::OPT_S)) 8480b57cec5SDimitry Andric return; 8490b57cec5SDimitry Andric 850fe6060f1SDimitry Andric std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); 851fe6060f1SDimitry Andric if (LibDeviceFile.empty()) { 8520b57cec5SDimitry Andric getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch; 8530b57cec5SDimitry Andric return; 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric CC1Args.push_back("-mlink-builtin-bitcode"); 8570b57cec5SDimitry Andric CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); 8580b57cec5SDimitry Andric 859fe6060f1SDimitry Andric clang::CudaVersion CudaInstallationVersion = CudaInstallation.version(); 860e8d8bef9SDimitry Andric 8610b57cec5SDimitry Andric if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr, 8620b57cec5SDimitry Andric options::OPT_fno_cuda_short_ptr, false)) 8630b57cec5SDimitry Andric CC1Args.append({"-mllvm", "--nvptx-short-ptr"}); 8640b57cec5SDimitry Andric 865fe6060f1SDimitry Andric if (CudaInstallationVersion >= CudaVersion::UNKNOWN) 866fe6060f1SDimitry Andric CC1Args.push_back( 867fe6060f1SDimitry Andric DriverArgs.MakeArgString(Twine("-target-sdk-version=") + 868fe6060f1SDimitry Andric CudaVersionToString(CudaInstallationVersion))); 8690b57cec5SDimitry Andric 8700b57cec5SDimitry Andric if (DeviceOffloadingKind == Action::OFK_OpenMP) { 871fe6060f1SDimitry Andric if (CudaInstallationVersion < CudaVersion::CUDA_92) { 872fe6060f1SDimitry Andric getDriver().Diag( 873fe6060f1SDimitry Andric diag::err_drv_omp_offload_target_cuda_version_not_support) 874fe6060f1SDimitry Andric << CudaVersionToString(CudaInstallationVersion); 875fe6060f1SDimitry Andric return; 8760b57cec5SDimitry Andric } 8770b57cec5SDimitry Andric 8781fd87a68SDimitry Andric // Link the bitcode library late if we're using device LTO. 8791fd87a68SDimitry Andric if (getDriver().isUsingLTO(/* IsOffload */ true)) 8801fd87a68SDimitry Andric return; 8811fd87a68SDimitry Andric 88281ad6265SDimitry Andric addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(), 8830fca6ea1SDimitry Andric getTriple(), HostTC); 8840b57cec5SDimitry Andric } 885e8d8bef9SDimitry Andric } 8860b57cec5SDimitry Andric 8875ffd83dbSDimitry Andric llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType( 8885ffd83dbSDimitry Andric const llvm::opt::ArgList &DriverArgs, const JobAction &JA, 8895ffd83dbSDimitry Andric const llvm::fltSemantics *FPType) const { 8905ffd83dbSDimitry Andric if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { 8915ffd83dbSDimitry Andric if (FPType && FPType == &llvm::APFloat::IEEEsingle() && 892fe6060f1SDimitry Andric DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, 893fe6060f1SDimitry Andric options::OPT_fno_gpu_flush_denormals_to_zero, false)) 8945ffd83dbSDimitry Andric return llvm::DenormalMode::getPreserveSign(); 8955ffd83dbSDimitry Andric } 8965ffd83dbSDimitry Andric 8975ffd83dbSDimitry Andric assert(JA.getOffloadingDeviceKind() != Action::OFK_Host); 8985ffd83dbSDimitry Andric return llvm::DenormalMode::getIEEE(); 8995ffd83dbSDimitry Andric } 9005ffd83dbSDimitry Andric 9010b57cec5SDimitry Andric void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs, 9020b57cec5SDimitry Andric ArgStringList &CC1Args) const { 9030b57cec5SDimitry Andric // Check our CUDA version if we're going to include the CUDA headers. 9045ffd83dbSDimitry Andric if (!DriverArgs.hasArg(options::OPT_nogpuinc) && 9050b57cec5SDimitry Andric !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) { 9060b57cec5SDimitry Andric StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ); 9070b57cec5SDimitry Andric assert(!Arch.empty() && "Must have an explicit GPU arch."); 9080fca6ea1SDimitry Andric CudaInstallation.CheckCudaVersionSupportsArch(StringToOffloadArch(Arch)); 9090b57cec5SDimitry Andric } 9100b57cec5SDimitry Andric CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args); 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric 913bdd1243dSDimitry Andric std::string CudaToolChain::getInputFilename(const InputInfo &Input) const { 914bdd1243dSDimitry Andric // Only object files are changed, for example assembly files keep their .s 915bdd1243dSDimitry Andric // extensions. If the user requested device-only compilation don't change it. 916bdd1243dSDimitry Andric if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly()) 917bdd1243dSDimitry Andric return ToolChain::getInputFilename(Input); 918bdd1243dSDimitry Andric 9190fca6ea1SDimitry Andric return ToolChain::getInputFilename(Input); 920bdd1243dSDimitry Andric } 921bdd1243dSDimitry Andric 9220b57cec5SDimitry Andric llvm::opt::DerivedArgList * 9230b57cec5SDimitry Andric CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, 9240b57cec5SDimitry Andric StringRef BoundArch, 9250b57cec5SDimitry Andric Action::OffloadKind DeviceOffloadKind) const { 9260b57cec5SDimitry Andric DerivedArgList *DAL = 9270b57cec5SDimitry Andric HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); 9280b57cec5SDimitry Andric if (!DAL) 9290b57cec5SDimitry Andric DAL = new DerivedArgList(Args.getBaseArgs()); 9300b57cec5SDimitry Andric 9310b57cec5SDimitry Andric const OptTable &Opts = getDriver().getOpts(); 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric // For OpenMP device offloading, append derived arguments. Make sure 9340b57cec5SDimitry Andric // flags are not duplicated. 9350b57cec5SDimitry Andric // Also append the compute capability. 9360b57cec5SDimitry Andric if (DeviceOffloadKind == Action::OFK_OpenMP) { 937349cc55cSDimitry Andric for (Arg *A : Args) 938349cc55cSDimitry Andric if (!llvm::is_contained(*DAL, A)) 9390b57cec5SDimitry Andric DAL->append(A); 9400b57cec5SDimitry Andric 941bdd1243dSDimitry Andric if (!DAL->hasArg(options::OPT_march_EQ)) { 942bdd1243dSDimitry Andric StringRef Arch = BoundArch; 943bdd1243dSDimitry Andric if (Arch.empty()) { 944bdd1243dSDimitry Andric auto ArchsOrErr = getSystemGPUArchs(Args); 945bdd1243dSDimitry Andric if (!ArchsOrErr) { 946bdd1243dSDimitry Andric std::string ErrMsg = 947bdd1243dSDimitry Andric llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError())); 948bdd1243dSDimitry Andric getDriver().Diag(diag::err_drv_undetermined_gpu_arch) 949bdd1243dSDimitry Andric << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march"; 9500fca6ea1SDimitry Andric Arch = OffloadArchToString(OffloadArch::CudaDefault); 951bdd1243dSDimitry Andric } else { 952bdd1243dSDimitry Andric Arch = Args.MakeArgString(ArchsOrErr->front()); 953bdd1243dSDimitry Andric } 954bdd1243dSDimitry Andric } 955bdd1243dSDimitry Andric DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch); 956bdd1243dSDimitry Andric } 9570b57cec5SDimitry Andric 9580b57cec5SDimitry Andric return DAL; 9590b57cec5SDimitry Andric } 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric for (Arg *A : Args) { 9620fca6ea1SDimitry Andric // Make sure flags are not duplicated. 9630fca6ea1SDimitry Andric if (!llvm::is_contained(*DAL, A)) { 9640b57cec5SDimitry Andric DAL->append(A); 9650b57cec5SDimitry Andric } 9660fca6ea1SDimitry Andric } 9670b57cec5SDimitry Andric 9680b57cec5SDimitry Andric if (!BoundArch.empty()) { 9690b57cec5SDimitry Andric DAL->eraseArg(options::OPT_march_EQ); 970bdd1243dSDimitry Andric DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), 971bdd1243dSDimitry Andric BoundArch); 9720b57cec5SDimitry Andric } 9730b57cec5SDimitry Andric return DAL; 9740b57cec5SDimitry Andric } 9750b57cec5SDimitry Andric 976bdd1243dSDimitry Andric Tool *NVPTXToolChain::buildAssembler() const { 977bdd1243dSDimitry Andric return new tools::NVPTX::Assembler(*this); 978bdd1243dSDimitry Andric } 979bdd1243dSDimitry Andric 980bdd1243dSDimitry Andric Tool *NVPTXToolChain::buildLinker() const { 981bdd1243dSDimitry Andric return new tools::NVPTX::Linker(*this); 982bdd1243dSDimitry Andric } 983bdd1243dSDimitry Andric 9840b57cec5SDimitry Andric Tool *CudaToolChain::buildAssembler() const { 9850b57cec5SDimitry Andric return new tools::NVPTX::Assembler(*this); 9860b57cec5SDimitry Andric } 9870b57cec5SDimitry Andric 9880b57cec5SDimitry Andric Tool *CudaToolChain::buildLinker() const { 989bdd1243dSDimitry Andric return new tools::NVPTX::FatBinary(*this); 9900b57cec5SDimitry Andric } 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andric void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { 9930b57cec5SDimitry Andric HostTC.addClangWarningOptions(CC1Args); 9940b57cec5SDimitry Andric } 9950b57cec5SDimitry Andric 9960b57cec5SDimitry Andric ToolChain::CXXStdlibType 9970b57cec5SDimitry Andric CudaToolChain::GetCXXStdlibType(const ArgList &Args) const { 9980b57cec5SDimitry Andric return HostTC.GetCXXStdlibType(Args); 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, 10020b57cec5SDimitry Andric ArgStringList &CC1Args) const { 10030b57cec5SDimitry Andric HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); 1004349cc55cSDimitry Andric 1005349cc55cSDimitry Andric if (!DriverArgs.hasArg(options::OPT_nogpuinc) && CudaInstallation.isValid()) 1006349cc55cSDimitry Andric CC1Args.append( 1007349cc55cSDimitry Andric {"-internal-isystem", 1008349cc55cSDimitry Andric DriverArgs.MakeArgString(CudaInstallation.getIncludePath())}); 10090b57cec5SDimitry Andric } 10100b57cec5SDimitry Andric 10110b57cec5SDimitry Andric void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, 10120b57cec5SDimitry Andric ArgStringList &CC1Args) const { 10130b57cec5SDimitry Andric HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args); 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args, 10170b57cec5SDimitry Andric ArgStringList &CC1Args) const { 10180b57cec5SDimitry Andric HostTC.AddIAMCUIncludeArgs(Args, CC1Args); 10190b57cec5SDimitry Andric } 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric SanitizerMask CudaToolChain::getSupportedSanitizers() const { 10220b57cec5SDimitry Andric // The CudaToolChain only supports sanitizers in the sense that it allows 10230b57cec5SDimitry Andric // sanitizer arguments on the command line if they are supported by the host 10240b57cec5SDimitry Andric // toolchain. The CudaToolChain will actually ignore any command line 10250b57cec5SDimitry Andric // arguments for any of these "supported" sanitizers. That means that no 10260b57cec5SDimitry Andric // sanitization of device code is actually supported at this time. 10270b57cec5SDimitry Andric // 10280b57cec5SDimitry Andric // This behavior is necessary because the host and device toolchains 10290b57cec5SDimitry Andric // invocations often share the command line, so the device toolchain must 10300b57cec5SDimitry Andric // tolerate flags meant only for the host toolchain. 10310b57cec5SDimitry Andric return HostTC.getSupportedSanitizers(); 10320b57cec5SDimitry Andric } 10330b57cec5SDimitry Andric 10340b57cec5SDimitry Andric VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D, 10350b57cec5SDimitry Andric const ArgList &Args) const { 10360b57cec5SDimitry Andric return HostTC.computeMSVCVersion(D, Args); 10370b57cec5SDimitry Andric } 1038