1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements AMDGPU TargetInfo objects. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "clang/Basic/Builtins.h" 15 #include "clang/Basic/CodeGenOptions.h" 16 #include "clang/Basic/Diagnostic.h" 17 #include "clang/Basic/LangOptions.h" 18 #include "clang/Basic/MacroBuilder.h" 19 #include "clang/Basic/TargetBuiltins.h" 20 #include "llvm/ADT/SmallString.h" 21 using namespace clang; 22 using namespace clang::targets; 23 24 namespace clang { 25 namespace targets { 26 27 // If you edit the description strings, make sure you update 28 // getPointerWidthV(). 29 30 static const char *const DataLayoutStringR600 = 31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"; 33 34 static const char *const DataLayoutStringAMDGCN = 35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" 36 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" 37 "32-v48:64-v96:128" 38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" 39 "-ni:7:8:9"; 40 41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { 42 llvm::AMDGPUAS::FLAT_ADDRESS, // Default 43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 44 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 45 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 46 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 47 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 48 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 49 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 50 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 51 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 52 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 53 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global 54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device 55 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host 56 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local 57 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private 58 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 59 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 60 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 61 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 62 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant 63 }; 64 65 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { 66 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default 67 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global 68 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local 69 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant 70 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private 71 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic 72 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device 73 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host 74 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device 75 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant 76 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared 77 // SYCL address space values for this map are dummy 78 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global 79 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device 80 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host 81 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local 82 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private 83 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr 84 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr 85 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 86 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared 87 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant 88 }; 89 } // namespace targets 90 } // namespace clang 91 92 static constexpr Builtin::Info BuiltinInfo[] = { 93 #define BUILTIN(ID, TYPE, ATTRS) \ 94 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 95 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ 96 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, 97 #include "clang/Basic/BuiltinsAMDGPU.def" 98 }; 99 100 const char *const AMDGPUTargetInfo::GCCRegNames[] = { 101 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", 102 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", 103 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", 104 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", 105 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", 106 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", 107 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", 108 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", 109 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", 110 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", 111 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", 112 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", 113 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", 114 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", 115 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", 116 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", 117 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", 118 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", 119 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", 120 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", 121 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", 122 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", 123 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", 124 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", 125 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", 126 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", 127 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", 128 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", 129 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", 130 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", 131 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", 132 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", 133 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", 134 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", 135 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", 136 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", 137 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", 138 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", 139 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", 140 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", 141 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", 142 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", 143 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", 144 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", 145 "flat_scratch_lo", "flat_scratch_hi", 146 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", 147 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", 148 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", 149 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", 150 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", 151 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", 152 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", 153 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", 154 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", 155 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", 156 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", 157 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", 158 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", 159 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", 160 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", 161 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", 162 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", 163 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", 164 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", 165 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", 166 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", 167 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", 168 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", 169 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", 170 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", 171 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", 172 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", 173 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", 174 "a252", "a253", "a254", "a255" 175 }; 176 177 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { 178 return llvm::ArrayRef(GCCRegNames); 179 } 180 181 bool AMDGPUTargetInfo::initFeatureMap( 182 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, 183 const std::vector<std::string> &FeatureVec) const { 184 185 using namespace llvm::AMDGPU; 186 fillAMDGPUFeatureMap(CPU, getTriple(), Features); 187 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) 188 return false; 189 190 // TODO: Should move this logic into TargetParser 191 auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features); 192 switch (HasError.first) { 193 default: 194 break; 195 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION: 196 Diags.Report(diag::err_invalid_feature_combination) << HasError.second; 197 return false; 198 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE: 199 Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second; 200 return false; 201 } 202 203 return true; 204 } 205 206 void AMDGPUTargetInfo::fillValidCPUList( 207 SmallVectorImpl<StringRef> &Values) const { 208 if (isAMDGCN(getTriple())) 209 llvm::AMDGPU::fillValidArchListAMDGCN(Values); 210 else 211 llvm::AMDGPU::fillValidArchListR600(Values); 212 } 213 214 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { 215 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; 216 } 217 218 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, 219 const TargetOptions &Opts) 220 : TargetInfo(Triple), 221 GPUKind(isAMDGCN(Triple) ? 222 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) : 223 llvm::AMDGPU::parseArchR600(Opts.CPU)), 224 GPUFeatures(isAMDGCN(Triple) ? 225 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) : 226 llvm::AMDGPU::getArchAttrR600(GPUKind)) { 227 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN 228 : DataLayoutStringR600); 229 230 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || 231 !isAMDGCN(Triple)); 232 UseAddrSpaceMapMangling = true; 233 234 if (isAMDGCN(Triple)) { 235 // __bf16 is always available as a load/store only type on AMDGCN. 236 BFloat16Width = BFloat16Align = 16; 237 BFloat16Format = &llvm::APFloat::BFloat(); 238 } 239 240 HasLegalHalfType = true; 241 HasFloat16 = true; 242 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64; 243 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; 244 245 // Set pointer width and alignment for the generic address space. 246 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default); 247 if (getMaxPointerWidth() == 64) { 248 LongWidth = LongAlign = 64; 249 SizeType = UnsignedLong; 250 PtrDiffType = SignedLong; 251 IntPtrType = SignedLong; 252 } 253 254 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; 255 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); 256 for (auto F : {"image-insts", "gws"}) 257 ReadOnlyFeatures.insert(F); 258 HalfArgsAndReturns = true; 259 } 260 261 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { 262 TargetInfo::adjust(Diags, Opts); 263 // ToDo: There are still a few places using default address space as private 264 // address space in OpenCL, which needs to be cleaned up, then the references 265 // to OpenCL can be removed from the following line. 266 setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) || 267 !isAMDGCN(getTriple())); 268 } 269 270 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { 271 return llvm::ArrayRef(BuiltinInfo, 272 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin); 273 } 274 275 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, 276 MacroBuilder &Builder) const { 277 Builder.defineMacro("__AMD__"); 278 Builder.defineMacro("__AMDGPU__"); 279 280 if (isAMDGCN(getTriple())) 281 Builder.defineMacro("__AMDGCN__"); 282 else 283 Builder.defineMacro("__R600__"); 284 285 // Legacy HIP host code relies on these default attributes to be defined. 286 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice; 287 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost) 288 return; 289 290 llvm::SmallString<16> CanonName = 291 (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind) 292 : getArchNameR600(GPUKind)); 293 294 // Sanitize the name of generic targets. 295 // e.g. gfx10-1-generic -> gfx10_1_generic 296 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && 297 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { 298 std::replace(CanonName.begin(), CanonName.end(), '-', '_'); 299 } 300 301 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__")); 302 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ 303 if (isAMDGCN(getTriple()) && !IsHIPHost) { 304 assert(StringRef(CanonName).starts_with("gfx") && 305 "Invalid amdgcn canonical name"); 306 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind); 307 Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) + 308 Twine("__")); 309 Builder.defineMacro("__amdgcn_processor__", 310 Twine("\"") + Twine(CanonName) + Twine("\"")); 311 Builder.defineMacro("__amdgcn_target_id__", 312 Twine("\"") + Twine(*getTargetID()) + Twine("\"")); 313 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { 314 auto Loc = OffloadArchFeatures.find(F); 315 if (Loc != OffloadArchFeatures.end()) { 316 std::string NewF = F.str(); 317 std::replace(NewF.begin(), NewF.end(), '-', '_'); 318 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) + 319 Twine("__"), 320 Loc->second ? "1" : "0"); 321 } 322 } 323 } 324 325 if (AllowAMDGPUUnsafeFPAtomics) 326 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__"); 327 328 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be 329 // removed in the near future. 330 if (hasFMAF()) 331 Builder.defineMacro("__HAS_FMAF__"); 332 if (hasFastFMAF()) 333 Builder.defineMacro("FP_FAST_FMAF"); 334 if (hasLDEXPF()) 335 Builder.defineMacro("__HAS_LDEXPF__"); 336 if (hasFP64()) 337 Builder.defineMacro("__HAS_FP64__"); 338 if (hasFastFMA()) 339 Builder.defineMacro("FP_FAST_FMA"); 340 341 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize), 342 "compile-time-constant access to the wavefront size will " 343 "be removed in a future release"); 344 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize), 345 "compile-time-constant access to the wavefront size will " 346 "be removed in a future release"); 347 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode)); 348 } 349 350 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { 351 assert(HalfFormat == Aux->HalfFormat); 352 assert(FloatFormat == Aux->FloatFormat); 353 assert(DoubleFormat == Aux->DoubleFormat); 354 355 // On x86_64 long double is 80-bit extended precision format, which is 356 // not supported by AMDGPU. 128-bit floating point format is also not 357 // supported by AMDGPU. Therefore keep its own format for these two types. 358 auto SaveLongDoubleFormat = LongDoubleFormat; 359 auto SaveFloat128Format = Float128Format; 360 auto SaveLongDoubleWidth = LongDoubleWidth; 361 auto SaveLongDoubleAlign = LongDoubleAlign; 362 copyAuxTarget(Aux); 363 LongDoubleFormat = SaveLongDoubleFormat; 364 Float128Format = SaveFloat128Format; 365 LongDoubleWidth = SaveLongDoubleWidth; 366 LongDoubleAlign = SaveLongDoubleAlign; 367 // For certain builtin types support on the host target, claim they are 368 // support to pass the compilation of the host code during the device-side 369 // compilation. 370 // FIXME: As the side effect, we also accept `__float128` uses in the device 371 // code. To rejct these builtin types supported in the host target but not in 372 // the device target, one approach would support `device_builtin` attribute 373 // so that we could tell the device builtin types from the host ones. The 374 // also solves the different representations of the same builtin type, such 375 // as `size_t` in the MSVC environment. 376 if (Aux->hasFloat128Type()) { 377 HasFloat128 = true; 378 Float128Format = DoubleFormat; 379 } 380 } 381