1 //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements NVPTX TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "NVPTX.h" 14 #include "Targets.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/MacroBuilder.h" 17 #include "clang/Basic/TargetBuiltins.h" 18 #include "llvm/ADT/StringSwitch.h" 19 20 using namespace clang; 21 using namespace clang::targets; 22 23 static constexpr Builtin::Info BuiltinInfo[] = { 24 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 25 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 26 #include "clang/Basic/BuiltinsNVPTX.inc" 27 }; 28 29 const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"}; 30 31 NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, 32 const TargetOptions &Opts, 33 unsigned TargetPointerWidth) 34 : TargetInfo(Triple) { 35 assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && 36 "NVPTX only supports 32- and 64-bit modes."); 37 38 PTXVersion = 32; 39 for (const StringRef Feature : Opts.FeaturesAsWritten) { 40 int PTXV; 41 if (!Feature.starts_with("+ptx") || 42 Feature.drop_front(4).getAsInteger(10, PTXV)) 43 continue; 44 PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? 45 } 46 47 TLSSupported = false; 48 VLASupported = false; 49 AddrSpaceMap = &NVPTXAddrSpaceMap; 50 UseAddrSpaceMapMangling = true; 51 // __bf16 is always available as a load/store only type. 52 BFloat16Width = BFloat16Align = 16; 53 BFloat16Format = &llvm::APFloat::BFloat(); 54 55 // Define available target features 56 // These must be defined in sorted order! 57 NoAsmVariants = true; 58 GPU = OffloadArch::UNUSED; 59 60 // PTX supports f16 as a fundamental type. 61 HasLegalHalfType = true; 62 HasFloat16 = true; 63 64 if (TargetPointerWidth == 32) 65 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 66 else if (Opts.NVPTXUseShortPointers) 67 resetDataLayout( 68 "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 69 else 70 resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64"); 71 72 // If possible, get a TargetInfo for our host triple, so we can match its 73 // types. 74 llvm::Triple HostTriple(Opts.HostTriple); 75 if (!HostTriple.isNVPTX()) 76 HostTarget = AllocateTarget(llvm::Triple(Opts.HostTriple), Opts); 77 78 // If no host target, make some guesses about the data layout and return. 79 if (!HostTarget) { 80 LongWidth = LongAlign = TargetPointerWidth; 81 PointerWidth = PointerAlign = TargetPointerWidth; 82 switch (TargetPointerWidth) { 83 case 32: 84 SizeType = TargetInfo::UnsignedInt; 85 PtrDiffType = TargetInfo::SignedInt; 86 IntPtrType = TargetInfo::SignedInt; 87 break; 88 case 64: 89 SizeType = TargetInfo::UnsignedLong; 90 PtrDiffType = TargetInfo::SignedLong; 91 IntPtrType = TargetInfo::SignedLong; 92 break; 93 default: 94 llvm_unreachable("TargetPointerWidth must be 32 or 64"); 95 } 96 97 MaxAtomicInlineWidth = TargetPointerWidth; 98 return; 99 } 100 101 // Copy properties from host target. 102 PointerWidth = HostTarget->getPointerWidth(LangAS::Default); 103 PointerAlign = HostTarget->getPointerAlign(LangAS::Default); 104 BoolWidth = HostTarget->getBoolWidth(); 105 BoolAlign = HostTarget->getBoolAlign(); 106 IntWidth = HostTarget->getIntWidth(); 107 IntAlign = HostTarget->getIntAlign(); 108 HalfWidth = HostTarget->getHalfWidth(); 109 HalfAlign = HostTarget->getHalfAlign(); 110 FloatWidth = HostTarget->getFloatWidth(); 111 FloatAlign = HostTarget->getFloatAlign(); 112 DoubleWidth = HostTarget->getDoubleWidth(); 113 DoubleAlign = HostTarget->getDoubleAlign(); 114 LongWidth = HostTarget->getLongWidth(); 115 LongAlign = HostTarget->getLongAlign(); 116 LongLongWidth = HostTarget->getLongLongWidth(); 117 LongLongAlign = HostTarget->getLongLongAlign(); 118 MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0, 119 /* HasNonWeakDef = */ true); 120 NewAlign = HostTarget->getNewAlign(); 121 DefaultAlignForAttributeAligned = 122 HostTarget->getDefaultAlignForAttributeAligned(); 123 SizeType = HostTarget->getSizeType(); 124 IntMaxType = HostTarget->getIntMaxType(); 125 PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default); 126 IntPtrType = HostTarget->getIntPtrType(); 127 WCharType = HostTarget->getWCharType(); 128 WIntType = HostTarget->getWIntType(); 129 Char16Type = HostTarget->getChar16Type(); 130 Char32Type = HostTarget->getChar32Type(); 131 Int64Type = HostTarget->getInt64Type(); 132 SigAtomicType = HostTarget->getSigAtomicType(); 133 ProcessIDType = HostTarget->getProcessIDType(); 134 135 UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); 136 UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); 137 UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); 138 ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); 139 140 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and 141 // we need those macros to be identical on host and device, because (among 142 // other things) they affect which standard library classes are defined, and 143 // we need all classes to be defined on both the host and device. 144 MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); 145 146 // Properties intentionally not copied from host: 147 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the 148 // host/device boundary. 149 // - SuitableAlign: Not visible across the host/device boundary, and may 150 // correctly be different on host/device, e.g. if host has wider vector 151 // types than device. 152 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same 153 // as its double type, but that's not necessarily true on the host. 154 // TODO: nvcc emits a warning when using long double on device; we should 155 // do the same. 156 } 157 158 ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { 159 return llvm::ArrayRef(GCCRegNames); 160 } 161 162 bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { 163 return llvm::StringSwitch<bool>(Feature) 164 .Cases("ptx", "nvptx", true) 165 .Default(false); 166 } 167 168 void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, 169 MacroBuilder &Builder) const { 170 Builder.defineMacro("__PTX__"); 171 Builder.defineMacro("__NVPTX__"); 172 173 // Skip setting architecture dependent macros if undefined. 174 if (GPU == OffloadArch::UNUSED && !HostTarget) 175 return; 176 177 if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { 178 // Set __CUDA_ARCH__ for the GPU specified. 179 std::string CUDAArchCode = [this] { 180 switch (GPU) { 181 case OffloadArch::GFX600: 182 case OffloadArch::GFX601: 183 case OffloadArch::GFX602: 184 case OffloadArch::GFX700: 185 case OffloadArch::GFX701: 186 case OffloadArch::GFX702: 187 case OffloadArch::GFX703: 188 case OffloadArch::GFX704: 189 case OffloadArch::GFX705: 190 case OffloadArch::GFX801: 191 case OffloadArch::GFX802: 192 case OffloadArch::GFX803: 193 case OffloadArch::GFX805: 194 case OffloadArch::GFX810: 195 case OffloadArch::GFX9_GENERIC: 196 case OffloadArch::GFX900: 197 case OffloadArch::GFX902: 198 case OffloadArch::GFX904: 199 case OffloadArch::GFX906: 200 case OffloadArch::GFX908: 201 case OffloadArch::GFX909: 202 case OffloadArch::GFX90a: 203 case OffloadArch::GFX90c: 204 case OffloadArch::GFX9_4_GENERIC: 205 case OffloadArch::GFX940: 206 case OffloadArch::GFX941: 207 case OffloadArch::GFX942: 208 case OffloadArch::GFX950: 209 case OffloadArch::GFX10_1_GENERIC: 210 case OffloadArch::GFX1010: 211 case OffloadArch::GFX1011: 212 case OffloadArch::GFX1012: 213 case OffloadArch::GFX1013: 214 case OffloadArch::GFX10_3_GENERIC: 215 case OffloadArch::GFX1030: 216 case OffloadArch::GFX1031: 217 case OffloadArch::GFX1032: 218 case OffloadArch::GFX1033: 219 case OffloadArch::GFX1034: 220 case OffloadArch::GFX1035: 221 case OffloadArch::GFX1036: 222 case OffloadArch::GFX11_GENERIC: 223 case OffloadArch::GFX1100: 224 case OffloadArch::GFX1101: 225 case OffloadArch::GFX1102: 226 case OffloadArch::GFX1103: 227 case OffloadArch::GFX1150: 228 case OffloadArch::GFX1151: 229 case OffloadArch::GFX1152: 230 case OffloadArch::GFX1153: 231 case OffloadArch::GFX12_GENERIC: 232 case OffloadArch::GFX1200: 233 case OffloadArch::GFX1201: 234 case OffloadArch::AMDGCNSPIRV: 235 case OffloadArch::Generic: 236 case OffloadArch::LAST: 237 break; 238 case OffloadArch::UNKNOWN: 239 assert(false && "No GPU arch when compiling CUDA device code."); 240 return ""; 241 case OffloadArch::UNUSED: 242 case OffloadArch::SM_20: 243 return "200"; 244 case OffloadArch::SM_21: 245 return "210"; 246 case OffloadArch::SM_30: 247 return "300"; 248 case OffloadArch::SM_32_: 249 return "320"; 250 case OffloadArch::SM_35: 251 return "350"; 252 case OffloadArch::SM_37: 253 return "370"; 254 case OffloadArch::SM_50: 255 return "500"; 256 case OffloadArch::SM_52: 257 return "520"; 258 case OffloadArch::SM_53: 259 return "530"; 260 case OffloadArch::SM_60: 261 return "600"; 262 case OffloadArch::SM_61: 263 return "610"; 264 case OffloadArch::SM_62: 265 return "620"; 266 case OffloadArch::SM_70: 267 return "700"; 268 case OffloadArch::SM_72: 269 return "720"; 270 case OffloadArch::SM_75: 271 return "750"; 272 case OffloadArch::SM_80: 273 return "800"; 274 case OffloadArch::SM_86: 275 return "860"; 276 case OffloadArch::SM_87: 277 return "870"; 278 case OffloadArch::SM_89: 279 return "890"; 280 case OffloadArch::SM_90: 281 case OffloadArch::SM_90a: 282 return "900"; 283 case OffloadArch::SM_100: 284 case OffloadArch::SM_100a: 285 return "1000"; 286 } 287 llvm_unreachable("unhandled OffloadArch"); 288 }(); 289 Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); 290 if (GPU == OffloadArch::SM_90a) 291 Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1"); 292 if (GPU == OffloadArch::SM_100a) 293 Builder.defineMacro("__CUDA_ARCH_FEAT_SM100_ALL", "1"); 294 } 295 } 296 297 ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { 298 return llvm::ArrayRef(BuiltinInfo, 299 clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin); 300 } 301