Driver/ToolChains/Cuda.cpp

7330f729Sjoerg//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
7330f729Sjoerg//
7330f729Sjoerg// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7330f729Sjoerg// See https://llvm.org/LICENSE.txt for license information.
7330f729Sjoerg// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7330f729Sjoerg//
7330f729Sjoerg//===----------------------------------------------------------------------===//
7330f729Sjoerg
7330f729Sjoerg#include "Cuda.h"
7330f729Sjoerg#include "CommonArgs.h"
7330f729Sjoerg#include "InputInfo.h"
7330f729Sjoerg#include "clang/Basic/Cuda.h"
7330f729Sjoerg#include "clang/Config/config.h"
7330f729Sjoerg#include "clang/Driver/Compilation.h"
7330f729Sjoerg#include "clang/Driver/Distro.h"
7330f729Sjoerg#include "clang/Driver/Driver.h"
7330f729Sjoerg#include "clang/Driver/DriverDiagnostic.h"
7330f729Sjoerg#include "clang/Driver/Options.h"
*e038c9c4Sjoerg#include "llvm/ADT/Optional.h"
7330f729Sjoerg#include "llvm/Option/ArgList.h"
7330f729Sjoerg#include "llvm/Support/FileSystem.h"
*e038c9c4Sjoerg#include "llvm/Support/Host.h"
7330f729Sjoerg#include "llvm/Support/Path.h"
7330f729Sjoerg#include "llvm/Support/Process.h"
7330f729Sjoerg#include "llvm/Support/Program.h"
*e038c9c4Sjoerg#include "llvm/Support/TargetParser.h"
7330f729Sjoerg#include "llvm/Support/VirtualFileSystem.h"
7330f729Sjoerg#include <system_error>
7330f729Sjoerg
7330f729Sjoergusing namespace clang::driver;
7330f729Sjoergusing namespace clang::driver::toolchains;
7330f729Sjoergusing namespace clang::driver::tools;
7330f729Sjoergusing namespace clang;
7330f729Sjoergusing namespace llvm::opt;
7330f729Sjoerg
*e038c9c4Sjoergnamespace {
*e038c9c4Sjoergstruct CudaVersionInfo {
*e038c9c4Sjoerg  std::string DetectedVersion;
*e038c9c4Sjoerg  CudaVersion Version;
*e038c9c4Sjoerg};
7330f729Sjoerg// Parses the contents of version.txt in an CUDA installation.  It should
7330f729Sjoerg// contain one line of the from e.g. "CUDA Version 7.5.2".
*e038c9c4SjoergCudaVersionInfo parseCudaVersionFile(llvm::StringRef V) {
*e038c9c4Sjoerg  V = V.trim();
7330f729Sjoerg  if (!V.startswith("CUDA Version "))
*e038c9c4Sjoerg    return {V.str(), CudaVersion::UNKNOWN};
7330f729Sjoerg  V = V.substr(strlen("CUDA Version "));
*e038c9c4Sjoerg  SmallVector<StringRef,4> VersionParts;
*e038c9c4Sjoerg  V.split(VersionParts, '.');
*e038c9c4Sjoerg  return {"version.txt: " + V.str() + ".",
*e038c9c4Sjoerg          VersionParts.size() < 2
*e038c9c4Sjoerg              ? CudaVersion::UNKNOWN
*e038c9c4Sjoerg              : CudaStringToVersion(
*e038c9c4Sjoerg                    join_items(".", VersionParts[0], VersionParts[1]))};
7330f729Sjoerg}
*e038c9c4Sjoerg
*e038c9c4SjoergCudaVersion getCudaVersion(uint32_t raw_version) {
*e038c9c4Sjoerg  if (raw_version < 7050)
*e038c9c4Sjoerg    return CudaVersion::CUDA_70;
*e038c9c4Sjoerg  if (raw_version < 8000)
7330f729Sjoerg    return CudaVersion::CUDA_75;
*e038c9c4Sjoerg  if (raw_version < 9000)
7330f729Sjoerg    return CudaVersion::CUDA_80;
*e038c9c4Sjoerg  if (raw_version < 9010)
7330f729Sjoerg    return CudaVersion::CUDA_90;
*e038c9c4Sjoerg  if (raw_version < 9020)
7330f729Sjoerg    return CudaVersion::CUDA_91;
*e038c9c4Sjoerg  if (raw_version < 10000)
7330f729Sjoerg    return CudaVersion::CUDA_92;
*e038c9c4Sjoerg  if (raw_version < 10010)
7330f729Sjoerg    return CudaVersion::CUDA_100;
*e038c9c4Sjoerg  if (raw_version < 10020)
7330f729Sjoerg    return CudaVersion::CUDA_101;
*e038c9c4Sjoerg  if (raw_version < 11000)
*e038c9c4Sjoerg    return CudaVersion::CUDA_102;
*e038c9c4Sjoerg  if (raw_version < 11010)
*e038c9c4Sjoerg    return CudaVersion::CUDA_110;
*e038c9c4Sjoerg  if (raw_version < 11020)
*e038c9c4Sjoerg    return CudaVersion::CUDA_111;
*e038c9c4Sjoerg  return CudaVersion::LATEST;
*e038c9c4Sjoerg}
*e038c9c4Sjoerg
*e038c9c4SjoergCudaVersionInfo parseCudaHFile(llvm::StringRef Input) {
*e038c9c4Sjoerg  // Helper lambda which skips the words if the line starts with them or returns
*e038c9c4Sjoerg  // None otherwise.
*e038c9c4Sjoerg  auto StartsWithWords =
*e038c9c4Sjoerg      [](llvm::StringRef Line,
*e038c9c4Sjoerg         const SmallVector<StringRef, 3> words) -> llvm::Optional<StringRef> {
*e038c9c4Sjoerg    for (StringRef word : words) {
*e038c9c4Sjoerg      if (!Line.consume_front(word))
*e038c9c4Sjoerg        return {};
*e038c9c4Sjoerg      Line = Line.ltrim();
*e038c9c4Sjoerg    }
*e038c9c4Sjoerg    return Line;
*e038c9c4Sjoerg  };
*e038c9c4Sjoerg
*e038c9c4Sjoerg  Input = Input.ltrim();
*e038c9c4Sjoerg  while (!Input.empty()) {
*e038c9c4Sjoerg    if (auto Line =
*e038c9c4Sjoerg            StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
*e038c9c4Sjoerg      uint32_t RawVersion;
*e038c9c4Sjoerg      Line->consumeInteger(10, RawVersion);
*e038c9c4Sjoerg      return {"cuda.h: CUDA_VERSION=" + Twine(RawVersion).str() + ".",
*e038c9c4Sjoerg              getCudaVersion(RawVersion)};
*e038c9c4Sjoerg    }
*e038c9c4Sjoerg    // Find next non-empty line.
*e038c9c4Sjoerg    Input = Input.drop_front(Input.find_first_of("\n\r")).ltrim();
*e038c9c4Sjoerg  }
*e038c9c4Sjoerg  return {"cuda.h: CUDA_VERSION not found.", CudaVersion::UNKNOWN};
*e038c9c4Sjoerg}
*e038c9c4Sjoerg} // namespace
*e038c9c4Sjoerg
*e038c9c4Sjoergvoid CudaInstallationDetector::WarnIfUnsupportedVersion() {
*e038c9c4Sjoerg  if (DetectedVersionIsNotSupported)
*e038c9c4Sjoerg    D.Diag(diag::warn_drv_unknown_cuda_version)
*e038c9c4Sjoerg        << DetectedVersion
*e038c9c4Sjoerg        << CudaVersionToString(CudaVersion::LATEST_SUPPORTED);
7330f729Sjoerg}
7330f729Sjoerg
7330f729SjoergCudaInstallationDetector::CudaInstallationDetector(
7330f729Sjoerg    const Driver &D, const llvm::Triple &HostTriple,
7330f729Sjoerg    const llvm::opt::ArgList &Args)
7330f729Sjoerg    : D(D) {
7330f729Sjoerg  struct Candidate {
7330f729Sjoerg    std::string Path;
7330f729Sjoerg    bool StrictChecking;
7330f729Sjoerg
7330f729Sjoerg    Candidate(std::string Path, bool StrictChecking = false)
7330f729Sjoerg        : Path(Path), StrictChecking(StrictChecking) {}
7330f729Sjoerg  };
7330f729Sjoerg  SmallVector<Candidate, 4> Candidates;
7330f729Sjoerg
7330f729Sjoerg  // In decreasing order so we prefer newer versions to older versions.
7330f729Sjoerg  std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
*e038c9c4Sjoerg  auto &FS = D.getVFS();
7330f729Sjoerg
7330f729Sjoerg  if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
7330f729Sjoerg    Candidates.emplace_back(
7330f729Sjoerg        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
7330f729Sjoerg  } else if (HostTriple.isOSWindows()) {
7330f729Sjoerg    for (const char *Ver : Versions)
7330f729Sjoerg      Candidates.emplace_back(
7330f729Sjoerg          D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
7330f729Sjoerg          Ver);
7330f729Sjoerg  } else {
7330f729Sjoerg    if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
7330f729Sjoerg      // Try to find ptxas binary. If the executable is located in a directory
7330f729Sjoerg      // called 'bin/', its parent directory might be a good guess for a valid
7330f729Sjoerg      // CUDA installation.
7330f729Sjoerg      // However, some distributions might installs 'ptxas' to /usr/bin. In that
7330f729Sjoerg      // case the candidate would be '/usr' which passes the following checks
7330f729Sjoerg      // because '/usr/include' exists as well. To avoid this case, we always
7330f729Sjoerg      // check for the directory potentially containing files for libdevice,
7330f729Sjoerg      // even if the user passes -nocudalib.
7330f729Sjoerg      if (llvm::ErrorOr<std::string> ptxas =
7330f729Sjoerg              llvm::sys::findProgramByName("ptxas")) {
7330f729Sjoerg        SmallString<256> ptxasAbsolutePath;
7330f729Sjoerg        llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
7330f729Sjoerg
7330f729Sjoerg        StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
7330f729Sjoerg        if (llvm::sys::path::filename(ptxasDir) == "bin")
*e038c9c4Sjoerg          Candidates.emplace_back(
*e038c9c4Sjoerg              std::string(llvm::sys::path::parent_path(ptxasDir)),
7330f729Sjoerg              /*StrictChecking=*/true);
7330f729Sjoerg      }
7330f729Sjoerg    }
7330f729Sjoerg
7330f729Sjoerg    Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
7330f729Sjoerg    for (const char *Ver : Versions)
7330f729Sjoerg      Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
7330f729Sjoerg
*e038c9c4Sjoerg    Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
*e038c9c4Sjoerg    if (Dist.IsDebian() || Dist.IsUbuntu())
7330f729Sjoerg      // Special case for Debian to have nvidia-cuda-toolkit work
7330f729Sjoerg      // out of the box. More info on http://bugs.debian.org/882505
7330f729Sjoerg      Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
7330f729Sjoerg
7330f729Sjoerg  for (const auto &Candidate : Candidates) {
7330f729Sjoerg    InstallPath = Candidate.Path;
*e038c9c4Sjoerg    if (InstallPath.empty() || !FS.exists(InstallPath))
7330f729Sjoerg      continue;
7330f729Sjoerg
7330f729Sjoerg    BinPath = InstallPath + "/bin";
7330f729Sjoerg    IncludePath = InstallPath + "/include";
7330f729Sjoerg    LibDevicePath = InstallPath + "/nvvm/libdevice";
7330f729Sjoerg
7330f729Sjoerg    if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
7330f729Sjoerg      continue;
7330f729Sjoerg    bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
7330f729Sjoerg    if (CheckLibDevice && !FS.exists(LibDevicePath))
7330f729Sjoerg      continue;
7330f729Sjoerg
7330f729Sjoerg    // On Linux, we have both lib and lib64 directories, and we need to choose
7330f729Sjoerg    // based on our triple.  On MacOS, we have only a lib directory.
7330f729Sjoerg    //
7330f729Sjoerg    // It's sufficient for our purposes to be flexible: If both lib and lib64
7330f729Sjoerg    // exist, we choose whichever one matches our triple.  Otherwise, if only
7330f729Sjoerg    // lib exists, we use it.
7330f729Sjoerg    if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
7330f729Sjoerg      LibPath = InstallPath + "/lib64";
7330f729Sjoerg    else if (FS.exists(InstallPath + "/lib"))
7330f729Sjoerg      LibPath = InstallPath + "/lib";
7330f729Sjoerg    else
7330f729Sjoerg      continue;
7330f729Sjoerg
*e038c9c4Sjoerg    CudaVersionInfo VersionInfo = {"", CudaVersion::UNKNOWN};
*e038c9c4Sjoerg    if (auto VersionFile = FS.getBufferForFile(InstallPath + "/version.txt"))
*e038c9c4Sjoerg      VersionInfo = parseCudaVersionFile((*VersionFile)->getBuffer());
*e038c9c4Sjoerg    // If version file didn't give us the version, try to find it in cuda.h
*e038c9c4Sjoerg    if (VersionInfo.Version == CudaVersion::UNKNOWN)
*e038c9c4Sjoerg      if (auto CudaHFile = FS.getBufferForFile(InstallPath + "/include/cuda.h"))
*e038c9c4Sjoerg        VersionInfo = parseCudaHFile((*CudaHFile)->getBuffer());
*e038c9c4Sjoerg    // As the last resort, make an educated guess between CUDA-7.0, (which had
*e038c9c4Sjoerg    // no version.txt file and had old-style libdevice bitcode ) and an unknown
*e038c9c4Sjoerg    // recent CUDA version (no version.txt, new style bitcode).
*e038c9c4Sjoerg    if (VersionInfo.Version == CudaVersion::UNKNOWN) {
*e038c9c4Sjoerg      VersionInfo.Version = (FS.exists(LibDevicePath + "/libdevice.10.bc"))
*e038c9c4Sjoerg                                ? Version = CudaVersion::LATEST
*e038c9c4Sjoerg                                : Version = CudaVersion::CUDA_70;
*e038c9c4Sjoerg      VersionInfo.DetectedVersion =
*e038c9c4Sjoerg          "No version found in version.txt or cuda.h.";
7330f729Sjoerg    }
7330f729Sjoerg
*e038c9c4Sjoerg    Version = VersionInfo.Version;
*e038c9c4Sjoerg    DetectedVersion = VersionInfo.DetectedVersion;
*e038c9c4Sjoerg
*e038c9c4Sjoerg    // TODO(tra): remove the warning once we have all features of 10.2
*e038c9c4Sjoerg    // and 11.0 implemented.
*e038c9c4Sjoerg    DetectedVersionIsNotSupported = Version > CudaVersion::LATEST_SUPPORTED;
*e038c9c4Sjoerg
7330f729Sjoerg    if (Version >= CudaVersion::CUDA_90) {
7330f729Sjoerg      // CUDA-9+ uses single libdevice file for all GPU variants.
7330f729Sjoerg      std::string FilePath = LibDevicePath + "/libdevice.10.bc";
7330f729Sjoerg      if (FS.exists(FilePath)) {
*e038c9c4Sjoerg        for (int Arch = (int)CudaArch::SM_30, E = (int)CudaArch::LAST; Arch < E;
*e038c9c4Sjoerg             ++Arch) {
*e038c9c4Sjoerg          CudaArch GpuArch = static_cast<CudaArch>(Arch);
*e038c9c4Sjoerg          if (!IsNVIDIAGpuArch(GpuArch))
*e038c9c4Sjoerg            continue;
*e038c9c4Sjoerg          std::string GpuArchName(CudaArchToString(GpuArch));
7330f729Sjoerg          LibDeviceMap[GpuArchName] = FilePath;
7330f729Sjoerg        }
7330f729Sjoerg      }
7330f729Sjoerg    } else {
7330f729Sjoerg      std::error_code EC;
*e038c9c4Sjoerg      for (llvm::vfs::directory_iterator LI = FS.dir_begin(LibDevicePath, EC),
*e038c9c4Sjoerg                                         LE;
7330f729Sjoerg           !EC && LI != LE; LI = LI.increment(EC)) {
7330f729Sjoerg        StringRef FilePath = LI->path();
7330f729Sjoerg        StringRef FileName = llvm::sys::path::filename(FilePath);
7330f729Sjoerg        // Process all bitcode filenames that look like
7330f729Sjoerg        // libdevice.compute_XX.YY.bc
7330f729Sjoerg        const StringRef LibDeviceName = "libdevice.";
7330f729Sjoerg        if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
7330f729Sjoerg          continue;
7330f729Sjoerg        StringRef GpuArch = FileName.slice(
7330f729Sjoerg            LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
7330f729Sjoerg        LibDeviceMap[GpuArch] = FilePath.str();
7330f729Sjoerg        // Insert map entries for specific devices with this compute
7330f729Sjoerg        // capability. NVCC's choice of the libdevice library version is
7330f729Sjoerg        // rather peculiar and depends on the CUDA version.
7330f729Sjoerg        if (GpuArch == "compute_20") {
*e038c9c4Sjoerg          LibDeviceMap["sm_20"] = std::string(FilePath);
*e038c9c4Sjoerg          LibDeviceMap["sm_21"] = std::string(FilePath);
*e038c9c4Sjoerg          LibDeviceMap["sm_32"] = std::string(FilePath);
7330f729Sjoerg        } else if (GpuArch == "compute_30") {
*e038c9c4Sjoerg          LibDeviceMap["sm_30"] = std::string(FilePath);
7330f729Sjoerg          if (Version < CudaVersion::CUDA_80) {
*e038c9c4Sjoerg            LibDeviceMap["sm_50"] = std::string(FilePath);
*e038c9c4Sjoerg            LibDeviceMap["sm_52"] = std::string(FilePath);
*e038c9c4Sjoerg            LibDeviceMap["sm_53"] = std::string(FilePath);
7330f729Sjoerg          }
*e038c9c4Sjoerg          LibDeviceMap["sm_60"] = std::string(FilePath);
*e038c9c4Sjoerg          LibDeviceMap["sm_61"] = std::string(FilePath);
*e038c9c4Sjoerg          LibDeviceMap["sm_62"] = std::string(FilePath);
7330f729Sjoerg        } else if (GpuArch == "compute_35") {
*e038c9c4Sjoerg          LibDeviceMap["sm_35"] = std::string(FilePath);
*e038c9c4Sjoerg          LibDeviceMap["sm_37"] = std::string(FilePath);
7330f729Sjoerg        } else if (GpuArch == "compute_50") {
7330f729Sjoerg          if (Version >= CudaVersion::CUDA_80) {
*e038c9c4Sjoerg            LibDeviceMap["sm_50"] = std::string(FilePath);
*e038c9c4Sjoerg            LibDeviceMap["sm_52"] = std::string(FilePath);
*e038c9c4Sjoerg            LibDeviceMap["sm_53"] = std::string(FilePath);
7330f729Sjoerg          }
7330f729Sjoerg        }
7330f729Sjoerg      }
7330f729Sjoerg    }
7330f729Sjoerg
7330f729Sjoerg    // Check that we have found at least one libdevice that we can link in if
7330f729Sjoerg    // -nocudalib hasn't been specified.
7330f729Sjoerg    if (LibDeviceMap.empty() && !NoCudaLib)
7330f729Sjoerg      continue;
7330f729Sjoerg
7330f729Sjoerg    IsValid = true;
7330f729Sjoerg    break;
7330f729Sjoerg  }
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaInstallationDetector::AddCudaIncludeArgs(
7330f729Sjoerg    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
7330f729Sjoerg  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
7330f729Sjoerg    // Add cuda_wrappers/* to our system include path.  This lets us wrap
7330f729Sjoerg    // standard library headers.
7330f729Sjoerg    SmallString<128> P(D.ResourceDir);
7330f729Sjoerg    llvm::sys::path::append(P, "include");
7330f729Sjoerg    llvm::sys::path::append(P, "cuda_wrappers");
7330f729Sjoerg    CC1Args.push_back("-internal-isystem");
7330f729Sjoerg    CC1Args.push_back(DriverArgs.MakeArgString(P));
7330f729Sjoerg  }
7330f729Sjoerg
*e038c9c4Sjoerg  if (DriverArgs.hasArg(options::OPT_nogpuinc))
7330f729Sjoerg    return;
7330f729Sjoerg
7330f729Sjoerg  if (!isValid()) {
7330f729Sjoerg    D.Diag(diag::err_drv_no_cuda_installation);
7330f729Sjoerg    return;
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  CC1Args.push_back("-internal-isystem");
7330f729Sjoerg  CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
7330f729Sjoerg  CC1Args.push_back("-include");
7330f729Sjoerg  CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaInstallationDetector::CheckCudaVersionSupportsArch(
7330f729Sjoerg    CudaArch Arch) const {
7330f729Sjoerg  if (Arch == CudaArch::UNKNOWN || Version == CudaVersion::UNKNOWN ||
*e038c9c4Sjoerg      ArchsWithBadVersion[(int)Arch])
7330f729Sjoerg    return;
7330f729Sjoerg
7330f729Sjoerg  auto MinVersion = MinVersionForCudaArch(Arch);
7330f729Sjoerg  auto MaxVersion = MaxVersionForCudaArch(Arch);
7330f729Sjoerg  if (Version < MinVersion || Version > MaxVersion) {
*e038c9c4Sjoerg    ArchsWithBadVersion[(int)Arch] = true;
7330f729Sjoerg    D.Diag(diag::err_drv_cuda_version_unsupported)
7330f729Sjoerg        << CudaArchToString(Arch) << CudaVersionToString(MinVersion)
7330f729Sjoerg        << CudaVersionToString(MaxVersion) << InstallPath
7330f729Sjoerg        << CudaVersionToString(Version);
7330f729Sjoerg  }
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaInstallationDetector::print(raw_ostream &OS) const {
7330f729Sjoerg  if (isValid())
7330f729Sjoerg    OS << "Found CUDA installation: " << InstallPath << ", version "
7330f729Sjoerg       << CudaVersionToString(Version) << "\n";
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergnamespace {
7330f729Sjoerg/// Debug info level for the NVPTX devices. We may need to emit different debug
7330f729Sjoerg/// info level for the host and for the device itselfi. This type controls
7330f729Sjoerg/// emission of the debug info for the devices. It either prohibits disable info
7330f729Sjoerg/// emission completely, or emits debug directives only, or emits same debug
7330f729Sjoerg/// info as for the host.
7330f729Sjoergenum DeviceDebugInfoLevel {
7330f729Sjoerg  DisableDebugInfo,        /// Do not emit debug info for the devices.
7330f729Sjoerg  DebugDirectivesOnly,     /// Emit only debug directives.
7330f729Sjoerg  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
7330f729Sjoerg                           /// host.
7330f729Sjoerg};
7330f729Sjoerg} // anonymous namespace
7330f729Sjoerg
7330f729Sjoerg/// Define debug info level for the NVPTX devices. If the debug info for both
7330f729Sjoerg/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
7330f729Sjoerg/// only debug directives are requested for the both host and device
7330f729Sjoerg/// (-gline-directvies-only), or the debug info only for the device is disabled
7330f729Sjoerg/// (optimization is on and --cuda-noopt-device-debug was not specified), the
7330f729Sjoerg/// debug directves only must be emitted for the device. Otherwise, use the same
7330f729Sjoerg/// debug info level just like for the host (with the limitations of only
7330f729Sjoerg/// supported DWARF2 standard).
7330f729Sjoergstatic DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
7330f729Sjoerg  const Arg *A = Args.getLastArg(options::OPT_O_Group);
7330f729Sjoerg  bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
7330f729Sjoerg                        Args.hasFlag(options::OPT_cuda_noopt_device_debug,
7330f729Sjoerg                                     options::OPT_no_cuda_noopt_device_debug,
7330f729Sjoerg                                     /*Default=*/false);
7330f729Sjoerg  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
7330f729Sjoerg    const Option &Opt = A->getOption();
7330f729Sjoerg    if (Opt.matches(options::OPT_gN_Group)) {
7330f729Sjoerg      if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
7330f729Sjoerg        return DisableDebugInfo;
7330f729Sjoerg      if (Opt.matches(options::OPT_gline_directives_only))
7330f729Sjoerg        return DebugDirectivesOnly;
7330f729Sjoerg    }
7330f729Sjoerg    return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
7330f729Sjoerg  }
*e038c9c4Sjoerg  return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
7330f729Sjoerg                                    const InputInfo &Output,
7330f729Sjoerg                                    const InputInfoList &Inputs,
7330f729Sjoerg                                    const ArgList &Args,
7330f729Sjoerg                                    const char *LinkingOutput) const {
7330f729Sjoerg  const auto &TC =
7330f729Sjoerg      static_cast<const toolchains::CudaToolChain &>(getToolChain());
7330f729Sjoerg  assert(TC.getTriple().isNVPTX() && "Wrong platform");
7330f729Sjoerg
7330f729Sjoerg  StringRef GPUArchName;
7330f729Sjoerg  // If this is an OpenMP action we need to extract the device architecture
7330f729Sjoerg  // from the -march=arch option. This option may come from -Xopenmp-target
7330f729Sjoerg  // flag or the default value.
7330f729Sjoerg  if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
7330f729Sjoerg    GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
7330f729Sjoerg    assert(!GPUArchName.empty() && "Must have an architecture passed in.");
7330f729Sjoerg  } else
7330f729Sjoerg    GPUArchName = JA.getOffloadingArch();
7330f729Sjoerg
7330f729Sjoerg  // Obtain architecture from the action.
7330f729Sjoerg  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
7330f729Sjoerg  assert(gpu_arch != CudaArch::UNKNOWN &&
7330f729Sjoerg         "Device action expected to have an architecture.");
7330f729Sjoerg
7330f729Sjoerg  // Check that our installation's ptxas supports gpu_arch.
7330f729Sjoerg  if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
7330f729Sjoerg    TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  ArgStringList CmdArgs;
7330f729Sjoerg  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
7330f729Sjoerg  DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
7330f729Sjoerg  if (DIKind == EmitSameDebugInfoAsHost) {
7330f729Sjoerg    // ptxas does not accept -g option if optimization is enabled, so
7330f729Sjoerg    // we ignore the compiler's -O* options if we want debug info.
7330f729Sjoerg    CmdArgs.push_back("-g");
7330f729Sjoerg    CmdArgs.push_back("--dont-merge-basicblocks");
7330f729Sjoerg    CmdArgs.push_back("--return-at-end");
7330f729Sjoerg  } else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
7330f729Sjoerg    // Map the -O we received to -O{0,1,2,3}.
7330f729Sjoerg    //
7330f729Sjoerg    // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
7330f729Sjoerg    // default, so it may correspond more closely to the spirit of clang -O2.
7330f729Sjoerg
7330f729Sjoerg    // -O3 seems like the least-bad option when -Osomething is specified to
7330f729Sjoerg    // clang but it isn't handled below.
7330f729Sjoerg    StringRef OOpt = "3";
7330f729Sjoerg    if (A->getOption().matches(options::OPT_O4) ||
7330f729Sjoerg        A->getOption().matches(options::OPT_Ofast))
7330f729Sjoerg      OOpt = "3";
7330f729Sjoerg    else if (A->getOption().matches(options::OPT_O0))
7330f729Sjoerg      OOpt = "0";
7330f729Sjoerg    else if (A->getOption().matches(options::OPT_O)) {
7330f729Sjoerg      // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
7330f729Sjoerg      OOpt = llvm::StringSwitch<const char *>(A->getValue())
7330f729Sjoerg                 .Case("1", "1")
7330f729Sjoerg                 .Case("2", "2")
7330f729Sjoerg                 .Case("3", "3")
7330f729Sjoerg                 .Case("s", "2")
7330f729Sjoerg                 .Case("z", "2")
7330f729Sjoerg                 .Default("2");
7330f729Sjoerg    }
7330f729Sjoerg    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
7330f729Sjoerg  } else {
7330f729Sjoerg    // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
7330f729Sjoerg    // to no optimizations, but ptxas's default is -O3.
7330f729Sjoerg    CmdArgs.push_back("-O0");
7330f729Sjoerg  }
7330f729Sjoerg  if (DIKind == DebugDirectivesOnly)
7330f729Sjoerg    CmdArgs.push_back("-lineinfo");
7330f729Sjoerg
7330f729Sjoerg  // Pass -v to ptxas if it was passed to the driver.
7330f729Sjoerg  if (Args.hasArg(options::OPT_v))
7330f729Sjoerg    CmdArgs.push_back("-v");
7330f729Sjoerg
7330f729Sjoerg  CmdArgs.push_back("--gpu-name");
7330f729Sjoerg  CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
7330f729Sjoerg  CmdArgs.push_back("--output-file");
7330f729Sjoerg  CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
7330f729Sjoerg  for (const auto& II : Inputs)
7330f729Sjoerg    CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
7330f729Sjoerg
7330f729Sjoerg  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
7330f729Sjoerg    CmdArgs.push_back(Args.MakeArgString(A));
7330f729Sjoerg
7330f729Sjoerg  bool Relocatable = false;
7330f729Sjoerg  if (JA.isOffloading(Action::OFK_OpenMP))
7330f729Sjoerg    // In OpenMP we need to generate relocatable code.
7330f729Sjoerg    Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
7330f729Sjoerg                               options::OPT_fnoopenmp_relocatable_target,
7330f729Sjoerg                               /*Default=*/true);
7330f729Sjoerg  else if (JA.isOffloading(Action::OFK_Cuda))
7330f729Sjoerg    Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
7330f729Sjoerg                               options::OPT_fno_gpu_rdc, /*Default=*/false);
7330f729Sjoerg
7330f729Sjoerg  if (Relocatable)
7330f729Sjoerg    CmdArgs.push_back("-c");
7330f729Sjoerg
7330f729Sjoerg  const char *Exec;
7330f729Sjoerg  if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
7330f729Sjoerg    Exec = A->getValue();
7330f729Sjoerg  else
7330f729Sjoerg    Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
*e038c9c4Sjoerg  C.addCommand(std::make_unique<Command>(
*e038c9c4Sjoerg      JA, *this,
*e038c9c4Sjoerg      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
*e038c9c4Sjoerg                          "--options-file"},
*e038c9c4Sjoerg      Exec, CmdArgs, Inputs, Output));
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergstatic bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
7330f729Sjoerg  bool includePTX = true;
7330f729Sjoerg  for (Arg *A : Args) {
7330f729Sjoerg    if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
7330f729Sjoerg          A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
7330f729Sjoerg      continue;
7330f729Sjoerg    A->claim();
7330f729Sjoerg    const StringRef ArchStr = A->getValue();
7330f729Sjoerg    if (ArchStr == "all" || ArchStr == gpu_arch) {
7330f729Sjoerg      includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
7330f729Sjoerg      continue;
7330f729Sjoerg    }
7330f729Sjoerg  }
7330f729Sjoerg  return includePTX;
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoerg// All inputs to this linker must be from CudaDeviceActions, as we need to look
7330f729Sjoerg// at the Inputs' Actions in order to figure out which GPU architecture they
7330f729Sjoerg// correspond to.
7330f729Sjoergvoid NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
7330f729Sjoerg                                 const InputInfo &Output,
7330f729Sjoerg                                 const InputInfoList &Inputs,
7330f729Sjoerg                                 const ArgList &Args,
7330f729Sjoerg                                 const char *LinkingOutput) const {
7330f729Sjoerg  const auto &TC =
7330f729Sjoerg      static_cast<const toolchains::CudaToolChain &>(getToolChain());
7330f729Sjoerg  assert(TC.getTriple().isNVPTX() && "Wrong platform");
7330f729Sjoerg
7330f729Sjoerg  ArgStringList CmdArgs;
7330f729Sjoerg  if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
7330f729Sjoerg    CmdArgs.push_back("--cuda");
7330f729Sjoerg  CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
7330f729Sjoerg  CmdArgs.push_back(Args.MakeArgString("--create"));
7330f729Sjoerg  CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
7330f729Sjoerg  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
7330f729Sjoerg    CmdArgs.push_back("-g");
7330f729Sjoerg
7330f729Sjoerg  for (const auto& II : Inputs) {
7330f729Sjoerg    auto *A = II.getAction();
7330f729Sjoerg    assert(A->getInputs().size() == 1 &&
7330f729Sjoerg           "Device offload action is expected to have a single input");
7330f729Sjoerg    const char *gpu_arch_str = A->getOffloadingArch();
7330f729Sjoerg    assert(gpu_arch_str &&
7330f729Sjoerg           "Device action expected to have associated a GPU architecture!");
7330f729Sjoerg    CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
7330f729Sjoerg
7330f729Sjoerg    if (II.getType() == types::TY_PP_Asm &&
7330f729Sjoerg        !shouldIncludePTX(Args, gpu_arch_str))
7330f729Sjoerg      continue;
7330f729Sjoerg    // We need to pass an Arch of the form "sm_XX" for cubin files and
7330f729Sjoerg    // "compute_XX" for ptx.
*e038c9c4Sjoerg    const char *Arch = (II.getType() == types::TY_PP_Asm)
*e038c9c4Sjoerg                           ? CudaArchToVirtualArchString(gpu_arch)
7330f729Sjoerg                           : gpu_arch_str;
7330f729Sjoerg    CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
7330f729Sjoerg                                         Arch + ",file=" + II.getFilename()));
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
7330f729Sjoerg    CmdArgs.push_back(Args.MakeArgString(A));
7330f729Sjoerg
7330f729Sjoerg  const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
*e038c9c4Sjoerg  C.addCommand(std::make_unique<Command>(
*e038c9c4Sjoerg      JA, *this,
*e038c9c4Sjoerg      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
*e038c9c4Sjoerg                          "--options-file"},
*e038c9c4Sjoerg      Exec, CmdArgs, Inputs, Output));
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
7330f729Sjoerg                                       const InputInfo &Output,
7330f729Sjoerg                                       const InputInfoList &Inputs,
7330f729Sjoerg                                       const ArgList &Args,
7330f729Sjoerg                                       const char *LinkingOutput) const {
7330f729Sjoerg  const auto &TC =
7330f729Sjoerg      static_cast<const toolchains::CudaToolChain &>(getToolChain());
7330f729Sjoerg  assert(TC.getTriple().isNVPTX() && "Wrong platform");
7330f729Sjoerg
7330f729Sjoerg  ArgStringList CmdArgs;
7330f729Sjoerg
7330f729Sjoerg  // OpenMP uses nvlink to link cubin files. The result will be embedded in the
7330f729Sjoerg  // host binary by the host linker.
7330f729Sjoerg  assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
7330f729Sjoerg         "CUDA toolchain not expected for an OpenMP host device.");
7330f729Sjoerg
7330f729Sjoerg  if (Output.isFilename()) {
7330f729Sjoerg    CmdArgs.push_back("-o");
7330f729Sjoerg    CmdArgs.push_back(Output.getFilename());
7330f729Sjoerg  } else
7330f729Sjoerg    assert(Output.isNothing() && "Invalid output.");
7330f729Sjoerg  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
7330f729Sjoerg    CmdArgs.push_back("-g");
7330f729Sjoerg
7330f729Sjoerg  if (Args.hasArg(options::OPT_v))
7330f729Sjoerg    CmdArgs.push_back("-v");
7330f729Sjoerg
7330f729Sjoerg  StringRef GPUArch =
7330f729Sjoerg      Args.getLastArgValue(options::OPT_march_EQ);
7330f729Sjoerg  assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
7330f729Sjoerg
7330f729Sjoerg  CmdArgs.push_back("-arch");
7330f729Sjoerg  CmdArgs.push_back(Args.MakeArgString(GPUArch));
7330f729Sjoerg
7330f729Sjoerg  // Add paths specified in LIBRARY_PATH environment variable as -L options.
7330f729Sjoerg  addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
7330f729Sjoerg
7330f729Sjoerg  // Add paths for the default clang library path.
7330f729Sjoerg  SmallString<256> DefaultLibPath =
7330f729Sjoerg      llvm::sys::path::parent_path(TC.getDriver().Dir);
7330f729Sjoerg  llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
7330f729Sjoerg  CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
7330f729Sjoerg
7330f729Sjoerg  for (const auto &II : Inputs) {
7330f729Sjoerg    if (II.getType() == types::TY_LLVM_IR ||
7330f729Sjoerg        II.getType() == types::TY_LTO_IR ||
7330f729Sjoerg        II.getType() == types::TY_LTO_BC ||
7330f729Sjoerg        II.getType() == types::TY_LLVM_BC) {
7330f729Sjoerg      C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
7330f729Sjoerg          << getToolChain().getTripleString();
7330f729Sjoerg      continue;
7330f729Sjoerg    }
7330f729Sjoerg
7330f729Sjoerg    // Currently, we only pass the input files to the linker, we do not pass
7330f729Sjoerg    // any libraries that may be valid only for the host.
7330f729Sjoerg    if (!II.isFilename())
7330f729Sjoerg      continue;
7330f729Sjoerg
7330f729Sjoerg    const char *CubinF = C.addTempFile(
7330f729Sjoerg        C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
7330f729Sjoerg
7330f729Sjoerg    CmdArgs.push_back(CubinF);
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  const char *Exec =
7330f729Sjoerg      Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
*e038c9c4Sjoerg  C.addCommand(std::make_unique<Command>(
*e038c9c4Sjoerg      JA, *this,
*e038c9c4Sjoerg      ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
*e038c9c4Sjoerg                          "--options-file"},
*e038c9c4Sjoerg      Exec, CmdArgs, Inputs, Output));
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoerg/// CUDA toolchain.  Our assembler is ptxas, and our "linker" is fatbinary,
7330f729Sjoerg/// which isn't properly a linker but nonetheless performs the step of stitching
7330f729Sjoerg/// together object files from the assembler into a single blob.
7330f729Sjoerg
7330f729SjoergCudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
7330f729Sjoerg                             const ToolChain &HostTC, const ArgList &Args,
7330f729Sjoerg                             const Action::OffloadKind OK)
7330f729Sjoerg    : ToolChain(D, Triple, Args), HostTC(HostTC),
7330f729Sjoerg      CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
*e038c9c4Sjoerg  if (CudaInstallation.isValid()) {
*e038c9c4Sjoerg    CudaInstallation.WarnIfUnsupportedVersion();
*e038c9c4Sjoerg    getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
*e038c9c4Sjoerg  }
7330f729Sjoerg  // Lookup binaries into the driver directory, this is used to
7330f729Sjoerg  // discover the clang-offload-bundler executable.
7330f729Sjoerg  getProgramPaths().push_back(getDriver().Dir);
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergstd::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
7330f729Sjoerg  // Only object files are changed, for example assembly files keep their .s
7330f729Sjoerg  // extensions. CUDA also continues to use .o as they don't use nvlink but
7330f729Sjoerg  // fatbinary.
7330f729Sjoerg  if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
7330f729Sjoerg    return ToolChain::getInputFilename(Input);
7330f729Sjoerg
7330f729Sjoerg  // Replace extension for object files with cubin because nvlink relies on
7330f729Sjoerg  // these particular file names.
7330f729Sjoerg  SmallString<256> Filename(ToolChain::getInputFilename(Input));
7330f729Sjoerg  llvm::sys::path::replace_extension(Filename, "cubin");
*e038c9c4Sjoerg  return std::string(Filename.str());
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaToolChain::addClangTargetOptions(
7330f729Sjoerg    const llvm::opt::ArgList &DriverArgs,
7330f729Sjoerg    llvm::opt::ArgStringList &CC1Args,
7330f729Sjoerg    Action::OffloadKind DeviceOffloadingKind) const {
7330f729Sjoerg  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
7330f729Sjoerg
7330f729Sjoerg  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
7330f729Sjoerg  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
7330f729Sjoerg  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
7330f729Sjoerg          DeviceOffloadingKind == Action::OFK_Cuda) &&
7330f729Sjoerg         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
7330f729Sjoerg
7330f729Sjoerg  if (DeviceOffloadingKind == Action::OFK_Cuda) {
7330f729Sjoerg    CC1Args.push_back("-fcuda-is-device");
7330f729Sjoerg
7330f729Sjoerg    if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
7330f729Sjoerg                           options::OPT_fno_cuda_approx_transcendentals, false))
7330f729Sjoerg      CC1Args.push_back("-fcuda-approx-transcendentals");
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  if (DriverArgs.hasArg(options::OPT_nogpulib))
7330f729Sjoerg    return;
7330f729Sjoerg
7330f729Sjoerg  if (DeviceOffloadingKind == Action::OFK_OpenMP &&
7330f729Sjoerg      DriverArgs.hasArg(options::OPT_S))
7330f729Sjoerg    return;
7330f729Sjoerg
*e038c9c4Sjoerg  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
*e038c9c4Sjoerg  if (LibDeviceFile.empty()) {
7330f729Sjoerg    getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
7330f729Sjoerg    return;
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  CC1Args.push_back("-mlink-builtin-bitcode");
7330f729Sjoerg  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
7330f729Sjoerg
*e038c9c4Sjoerg  clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
*e038c9c4Sjoerg
7330f729Sjoerg  // New CUDA versions often introduce new instructions that are only supported
7330f729Sjoerg  // by new PTX version, so we need to raise PTX level to enable them in NVPTX
7330f729Sjoerg  // back-end.
7330f729Sjoerg  const char *PtxFeature = nullptr;
*e038c9c4Sjoerg  switch (CudaInstallationVersion) {
*e038c9c4Sjoerg#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER)                                   \
*e038c9c4Sjoerg  case CudaVersion::CUDA_##CUDA_VER:                                           \
*e038c9c4Sjoerg    PtxFeature = "+ptx" #PTX_VER;                                              \
7330f729Sjoerg    break;
*e038c9c4Sjoerg    CASE_CUDA_VERSION(112, 72);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(111, 71);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(110, 70);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(102, 65);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(101, 64);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(100, 63);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(92, 61);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(91, 61);
*e038c9c4Sjoerg    CASE_CUDA_VERSION(90, 60);
*e038c9c4Sjoerg#undef CASE_CUDA_VERSION
7330f729Sjoerg  default:
7330f729Sjoerg    PtxFeature = "+ptx42";
7330f729Sjoerg  }
7330f729Sjoerg  CC1Args.append({"-target-feature", PtxFeature});
7330f729Sjoerg  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
7330f729Sjoerg                         options::OPT_fno_cuda_short_ptr, false))
7330f729Sjoerg    CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
7330f729Sjoerg
*e038c9c4Sjoerg  if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
*e038c9c4Sjoerg    CC1Args.push_back(
*e038c9c4Sjoerg        DriverArgs.MakeArgString(Twine("-target-sdk-version=") +
*e038c9c4Sjoerg                                 CudaVersionToString(CudaInstallationVersion)));
7330f729Sjoerg
7330f729Sjoerg  if (DeviceOffloadingKind == Action::OFK_OpenMP) {
*e038c9c4Sjoerg    if (CudaInstallationVersion < CudaVersion::CUDA_92) {
*e038c9c4Sjoerg      getDriver().Diag(
*e038c9c4Sjoerg          diag::err_drv_omp_offload_target_cuda_version_not_support)
*e038c9c4Sjoerg          << CudaVersionToString(CudaInstallationVersion);
*e038c9c4Sjoerg      return;
7330f729Sjoerg    }
7330f729Sjoerg
*e038c9c4Sjoerg    std::string BitcodeSuffix = "nvptx-" + GpuArch.str();
*e038c9c4Sjoerg    addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
*e038c9c4Sjoerg                       getTriple());
*e038c9c4Sjoerg  }
*e038c9c4Sjoerg}
7330f729Sjoerg
*e038c9c4Sjoergllvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
*e038c9c4Sjoerg    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
*e038c9c4Sjoerg    const llvm::fltSemantics *FPType) const {
*e038c9c4Sjoerg  if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
*e038c9c4Sjoerg    if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
*e038c9c4Sjoerg        DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
*e038c9c4Sjoerg                           options::OPT_fno_gpu_flush_denormals_to_zero, false))
*e038c9c4Sjoerg      return llvm::DenormalMode::getPreserveSign();
7330f729Sjoerg  }
*e038c9c4Sjoerg
*e038c9c4Sjoerg  assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
*e038c9c4Sjoerg  return llvm::DenormalMode::getIEEE();
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergbool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
7330f729Sjoerg  const Option &O = A->getOption();
7330f729Sjoerg  return (O.matches(options::OPT_gN_Group) &&
7330f729Sjoerg          !O.matches(options::OPT_gmodules)) ||
7330f729Sjoerg         O.matches(options::OPT_g_Flag) ||
7330f729Sjoerg         O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
7330f729Sjoerg         O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
7330f729Sjoerg         O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
7330f729Sjoerg         O.matches(options::OPT_gdwarf_5) ||
7330f729Sjoerg         O.matches(options::OPT_gcolumn_info);
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaToolChain::adjustDebugInfoKind(
7330f729Sjoerg    codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
7330f729Sjoerg  switch (mustEmitDebugInfo(Args)) {
7330f729Sjoerg  case DisableDebugInfo:
7330f729Sjoerg    DebugInfoKind = codegenoptions::NoDebugInfo;
7330f729Sjoerg    break;
7330f729Sjoerg  case DebugDirectivesOnly:
7330f729Sjoerg    DebugInfoKind = codegenoptions::DebugDirectivesOnly;
7330f729Sjoerg    break;
7330f729Sjoerg  case EmitSameDebugInfoAsHost:
7330f729Sjoerg    // Use same debug info level as the host.
7330f729Sjoerg    break;
7330f729Sjoerg  }
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
7330f729Sjoerg                                       ArgStringList &CC1Args) const {
7330f729Sjoerg  // Check our CUDA version if we're going to include the CUDA headers.
*e038c9c4Sjoerg  if (!DriverArgs.hasArg(options::OPT_nogpuinc) &&
7330f729Sjoerg      !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
7330f729Sjoerg    StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
7330f729Sjoerg    assert(!Arch.empty() && "Must have an explicit GPU arch.");
7330f729Sjoerg    CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
7330f729Sjoerg  }
7330f729Sjoerg  CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergllvm::opt::DerivedArgList *
7330f729SjoergCudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
7330f729Sjoerg                             StringRef BoundArch,
7330f729Sjoerg                             Action::OffloadKind DeviceOffloadKind) const {
7330f729Sjoerg  DerivedArgList *DAL =
7330f729Sjoerg      HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
7330f729Sjoerg  if (!DAL)
7330f729Sjoerg    DAL = new DerivedArgList(Args.getBaseArgs());
7330f729Sjoerg
7330f729Sjoerg  const OptTable &Opts = getDriver().getOpts();
7330f729Sjoerg
7330f729Sjoerg  // For OpenMP device offloading, append derived arguments. Make sure
7330f729Sjoerg  // flags are not duplicated.
7330f729Sjoerg  // Also append the compute capability.
7330f729Sjoerg  if (DeviceOffloadKind == Action::OFK_OpenMP) {
7330f729Sjoerg    for (Arg *A : Args) {
7330f729Sjoerg      bool IsDuplicate = false;
7330f729Sjoerg      for (Arg *DALArg : *DAL) {
7330f729Sjoerg        if (A == DALArg) {
7330f729Sjoerg          IsDuplicate = true;
7330f729Sjoerg          break;
7330f729Sjoerg        }
7330f729Sjoerg      }
7330f729Sjoerg      if (!IsDuplicate)
7330f729Sjoerg        DAL->append(A);
7330f729Sjoerg    }
7330f729Sjoerg
7330f729Sjoerg    StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
7330f729Sjoerg    if (Arch.empty())
7330f729Sjoerg      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
7330f729Sjoerg                        CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
7330f729Sjoerg
7330f729Sjoerg    return DAL;
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  for (Arg *A : Args) {
7330f729Sjoerg    DAL->append(A);
7330f729Sjoerg  }
7330f729Sjoerg
7330f729Sjoerg  if (!BoundArch.empty()) {
7330f729Sjoerg    DAL->eraseArg(options::OPT_march_EQ);
7330f729Sjoerg    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
7330f729Sjoerg  }
7330f729Sjoerg  return DAL;
7330f729Sjoerg}
7330f729Sjoerg
7330f729SjoergTool *CudaToolChain::buildAssembler() const {
7330f729Sjoerg  return new tools::NVPTX::Assembler(*this);
7330f729Sjoerg}
7330f729Sjoerg
7330f729SjoergTool *CudaToolChain::buildLinker() const {
7330f729Sjoerg  if (OK == Action::OFK_OpenMP)
7330f729Sjoerg    return new tools::NVPTX::OpenMPLinker(*this);
7330f729Sjoerg  return new tools::NVPTX::Linker(*this);
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
7330f729Sjoerg  HostTC.addClangWarningOptions(CC1Args);
7330f729Sjoerg}
7330f729Sjoerg
7330f729SjoergToolChain::CXXStdlibType
7330f729SjoergCudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
7330f729Sjoerg  return HostTC.GetCXXStdlibType(Args);
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
7330f729Sjoerg                                              ArgStringList &CC1Args) const {
7330f729Sjoerg  HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
7330f729Sjoerg                                                 ArgStringList &CC1Args) const {
7330f729Sjoerg  HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
7330f729Sjoerg}
7330f729Sjoerg
7330f729Sjoergvoid CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
7330f729Sjoerg                                        ArgStringList &CC1Args) const {
7330f729Sjoerg  HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
7330f729Sjoerg}
7330f729Sjoerg
7330f729SjoergSanitizerMask CudaToolChain::getSupportedSanitizers() const {
7330f729Sjoerg  // The CudaToolChain only supports sanitizers in the sense that it allows
7330f729Sjoerg  // sanitizer arguments on the command line if they are supported by the host
7330f729Sjoerg  // toolchain. The CudaToolChain will actually ignore any command line
7330f729Sjoerg  // arguments for any of these "supported" sanitizers. That means that no
7330f729Sjoerg  // sanitization of device code is actually supported at this time.
7330f729Sjoerg  //
7330f729Sjoerg  // This behavior is necessary because the host and device toolchains
7330f729Sjoerg  // invocations often share the command line, so the device toolchain must
7330f729Sjoerg  // tolerate flags meant only for the host toolchain.
7330f729Sjoerg  return HostTC.getSupportedSanitizers();
7330f729Sjoerg}
7330f729Sjoerg
7330f729SjoergVersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
7330f729Sjoerg                                               const ArgList &Args) const {
7330f729Sjoerg  return HostTC.computeMSVCVersion(D, Args);
7330f729Sjoerg}