xref: /llvm-project/clang/lib/Basic/Targets/AMDGPU.cpp (revision d92bac8a3ebb19106f6bca6b7613a27c52cb48ab)
1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 #include "llvm/ADT/SmallString.h"
21 using namespace clang;
22 using namespace clang::targets;
23 
24 namespace clang {
25 namespace targets {
26 
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29 
30 static const char *const DataLayoutStringR600 =
31     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33 
34 static const char *const DataLayoutStringAMDGCN =
35     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36     "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
37     "32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39     "-ni:7:8:9";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     llvm::AMDGPUAS::FLAT_ADDRESS,     // Default
43     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
44     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
45     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
46     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
47     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
48     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
49     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
50     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
51     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
52     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
53     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global
54     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_device
55     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_host
56     llvm::AMDGPUAS::LOCAL_ADDRESS,    // sycl_local
57     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // sycl_private
58     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_sptr
59     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_uptr
60     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr64
61     llvm::AMDGPUAS::FLAT_ADDRESS,     // hlsl_groupshared
62     llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
63 };
64 
65 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
66     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // Default
67     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
68     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
69     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
70     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
71     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
72     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
73     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
74     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
75     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
76     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
77     // SYCL address space values for this map are dummy
78     llvm::AMDGPUAS::FLAT_ADDRESS,     // sycl_global
79     llvm::AMDGPUAS::FLAT_ADDRESS,     // sycl_global_device
80     llvm::AMDGPUAS::FLAT_ADDRESS,     // sycl_global_host
81     llvm::AMDGPUAS::FLAT_ADDRESS,     // sycl_local
82     llvm::AMDGPUAS::FLAT_ADDRESS,     // sycl_private
83     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_sptr
84     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_uptr
85     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr64
86     llvm::AMDGPUAS::FLAT_ADDRESS,     // hlsl_groupshared
87     llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
88 };
89 } // namespace targets
90 } // namespace clang
91 
92 static constexpr Builtin::Info BuiltinInfo[] = {
93 #define BUILTIN(ID, TYPE, ATTRS)                                               \
94   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
95 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
96   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
97 #include "clang/Basic/BuiltinsAMDGPU.def"
98 };
99 
100 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
101   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
102   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
103   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
104   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
105   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
106   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
107   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
108   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
109   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
110   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
111   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
112   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
113   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
114   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
115   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
116   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
117   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
118   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
119   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
120   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
121   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
122   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
123   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
124   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
125   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
126   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
127   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
128   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
129   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
130   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
131   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
132   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
133   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
134   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
135   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
136   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
137   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
138   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
139   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
140   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
141   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
142   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
143   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
144   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
145   "flat_scratch_lo", "flat_scratch_hi",
146   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
147   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
148   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
149   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
150   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
151   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
152   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
153   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
154   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
155   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
156   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
157   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
158   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
159   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
160   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
161   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
162   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
163   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
164   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
165   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
166   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
167   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
168   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
169   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
170   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
171   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
172   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
173   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
174   "a252", "a253", "a254", "a255"
175 };
176 
177 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
178   return llvm::ArrayRef(GCCRegNames);
179 }
180 
181 bool AMDGPUTargetInfo::initFeatureMap(
182     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
183     const std::vector<std::string> &FeatureVec) const {
184 
185   using namespace llvm::AMDGPU;
186   fillAMDGPUFeatureMap(CPU, getTriple(), Features);
187   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
188     return false;
189 
190   // TODO: Should move this logic into TargetParser
191   auto HasError = insertWaveSizeFeature(CPU, getTriple(), Features);
192   switch (HasError.first) {
193   default:
194     break;
195   case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
196     Diags.Report(diag::err_invalid_feature_combination) << HasError.second;
197     return false;
198   case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
199     Diags.Report(diag::err_opt_not_valid_on_target) << HasError.second;
200     return false;
201   }
202 
203   return true;
204 }
205 
206 void AMDGPUTargetInfo::fillValidCPUList(
207     SmallVectorImpl<StringRef> &Values) const {
208   if (isAMDGCN(getTriple()))
209     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
210   else
211     llvm::AMDGPU::fillValidArchListR600(Values);
212 }
213 
214 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
215   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
216 }
217 
218 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
219                                    const TargetOptions &Opts)
220     : TargetInfo(Triple),
221       GPUKind(isAMDGCN(Triple) ?
222               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
223               llvm::AMDGPU::parseArchR600(Opts.CPU)),
224       GPUFeatures(isAMDGCN(Triple) ?
225                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
226                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
227   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
228                                         : DataLayoutStringR600);
229 
230   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
231                      !isAMDGCN(Triple));
232   UseAddrSpaceMapMangling = true;
233 
234   if (isAMDGCN(Triple)) {
235     // __bf16 is always available as a load/store only type on AMDGCN.
236     BFloat16Width = BFloat16Align = 16;
237     BFloat16Format = &llvm::APFloat::BFloat();
238   }
239 
240   HasLegalHalfType = true;
241   HasFloat16 = true;
242   WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
243   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
244 
245   // Set pointer width and alignment for the generic address space.
246   PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
247   if (getMaxPointerWidth() == 64) {
248     LongWidth = LongAlign = 64;
249     SizeType = UnsignedLong;
250     PtrDiffType = SignedLong;
251     IntPtrType = SignedLong;
252   }
253 
254   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
255   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
256   for (auto F : {"image-insts", "gws"})
257     ReadOnlyFeatures.insert(F);
258   HalfArgsAndReturns = true;
259 }
260 
261 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
262   TargetInfo::adjust(Diags, Opts);
263   // ToDo: There are still a few places using default address space as private
264   // address space in OpenCL, which needs to be cleaned up, then the references
265   // to OpenCL can be removed from the following line.
266   setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
267                      !isAMDGCN(getTriple()));
268 }
269 
270 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
271   return llvm::ArrayRef(BuiltinInfo,
272                         clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
273 }
274 
275 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
276                                         MacroBuilder &Builder) const {
277   Builder.defineMacro("__AMD__");
278   Builder.defineMacro("__AMDGPU__");
279 
280   if (isAMDGCN(getTriple()))
281     Builder.defineMacro("__AMDGCN__");
282   else
283     Builder.defineMacro("__R600__");
284 
285   // Legacy HIP host code relies on these default attributes to be defined.
286   bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
287   if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
288     return;
289 
290   llvm::SmallString<16> CanonName =
291       (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
292                              : getArchNameR600(GPUKind));
293 
294   // Sanitize the name of generic targets.
295   // e.g. gfx10-1-generic -> gfx10_1_generic
296   if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
297       GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
298     std::replace(CanonName.begin(), CanonName.end(), '-', '_');
299   }
300 
301   Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
302   // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
303   if (isAMDGCN(getTriple()) && !IsHIPHost) {
304     assert(StringRef(CanonName).starts_with("gfx") &&
305            "Invalid amdgcn canonical name");
306     StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
307     Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
308                         Twine("__"));
309     Builder.defineMacro("__amdgcn_processor__",
310                         Twine("\"") + Twine(CanonName) + Twine("\""));
311     Builder.defineMacro("__amdgcn_target_id__",
312                         Twine("\"") + Twine(*getTargetID()) + Twine("\""));
313     for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
314       auto Loc = OffloadArchFeatures.find(F);
315       if (Loc != OffloadArchFeatures.end()) {
316         std::string NewF = F.str();
317         std::replace(NewF.begin(), NewF.end(), '-', '_');
318         Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
319                                 Twine("__"),
320                             Loc->second ? "1" : "0");
321       }
322     }
323   }
324 
325   if (AllowAMDGPUUnsafeFPAtomics)
326     Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
327 
328   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
329   // removed in the near future.
330   if (hasFMAF())
331     Builder.defineMacro("__HAS_FMAF__");
332   if (hasFastFMAF())
333     Builder.defineMacro("FP_FAST_FMAF");
334   if (hasLDEXPF())
335     Builder.defineMacro("__HAS_LDEXPF__");
336   if (hasFP64())
337     Builder.defineMacro("__HAS_FP64__");
338   if (hasFastFMA())
339     Builder.defineMacro("FP_FAST_FMA");
340 
341   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize),
342                       "compile-time-constant access to the wavefront size will "
343                       "be removed in a future release");
344   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize),
345                       "compile-time-constant access to the wavefront size will "
346                       "be removed in a future release");
347   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
348 }
349 
350 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
351   assert(HalfFormat == Aux->HalfFormat);
352   assert(FloatFormat == Aux->FloatFormat);
353   assert(DoubleFormat == Aux->DoubleFormat);
354 
355   // On x86_64 long double is 80-bit extended precision format, which is
356   // not supported by AMDGPU. 128-bit floating point format is also not
357   // supported by AMDGPU. Therefore keep its own format for these two types.
358   auto SaveLongDoubleFormat = LongDoubleFormat;
359   auto SaveFloat128Format = Float128Format;
360   auto SaveLongDoubleWidth = LongDoubleWidth;
361   auto SaveLongDoubleAlign = LongDoubleAlign;
362   copyAuxTarget(Aux);
363   LongDoubleFormat = SaveLongDoubleFormat;
364   Float128Format = SaveFloat128Format;
365   LongDoubleWidth = SaveLongDoubleWidth;
366   LongDoubleAlign = SaveLongDoubleAlign;
367   // For certain builtin types support on the host target, claim they are
368   // support to pass the compilation of the host code during the device-side
369   // compilation.
370   // FIXME: As the side effect, we also accept `__float128` uses in the device
371   // code. To rejct these builtin types supported in the host target but not in
372   // the device target, one approach would support `device_builtin` attribute
373   // so that we could tell the device builtin types from the host ones. The
374   // also solves the different representations of the same builtin type, such
375   // as `size_t` in the MSVC environment.
376   if (Aux->hasFloat128Type()) {
377     HasFloat128 = true;
378     Float128Format = DoubleFormat;
379   }
380 }
381