xref: /llvm-project/clang/lib/Basic/Targets/AMDGPU.h (revision ca79ff07d8ae7a0c2531bfdb1cb623e25e5bd486)
1 //===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
14 #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
15 
16 #include "clang/Basic/TargetID.h"
17 #include "clang/Basic/TargetInfo.h"
18 #include "clang/Basic/TargetOptions.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Support/AMDGPUAddrSpace.h"
21 #include "llvm/Support/Compiler.h"
22 #include "llvm/TargetParser/TargetParser.h"
23 #include "llvm/TargetParser/Triple.h"
24 #include <optional>
25 
26 namespace clang {
27 namespace targets {
28 
29 class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
30 
31   static const char *const GCCRegNames[];
32 
33   static const LangASMap AMDGPUDefIsGenMap;
34   static const LangASMap AMDGPUDefIsPrivMap;
35 
36   llvm::AMDGPU::GPUKind GPUKind;
37   unsigned GPUFeatures;
38   unsigned WavefrontSize;
39 
40   /// Whether to use cumode or WGP mode. True for cumode. False for WGP mode.
41   bool CUMode;
42 
43   /// Whether having image instructions.
44   bool HasImage = false;
45 
46   /// Target ID is device name followed by optional feature name postfixed
47   /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-.
48   /// If the target ID contains feature+, map it to true.
49   /// If the target ID contains feature-, map it to false.
50   /// If the target ID does not contain a feature (default), do not map it.
51   llvm::StringMap<bool> OffloadArchFeatures;
52   std::string TargetID;
53 
54   bool hasFP64() const {
55     return getTriple().getArch() == llvm::Triple::amdgcn ||
56            !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64);
57   }
58 
59   /// Has fast fma f32
60   bool hasFastFMAF() const {
61     return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32);
62   }
63 
64   /// Has fast fma f64
65   bool hasFastFMA() const {
66     return getTriple().getArch() == llvm::Triple::amdgcn;
67   }
68 
69   bool hasFMAF() const {
70     return getTriple().getArch() == llvm::Triple::amdgcn ||
71            !!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA);
72   }
73 
74   bool hasFullRateDenormalsF32() const {
75     return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
76   }
77 
78   bool hasLDEXPF() const {
79     return getTriple().getArch() == llvm::Triple::amdgcn ||
80            !!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP);
81   }
82 
83   static bool isAMDGCN(const llvm::Triple &TT) {
84     return TT.getArch() == llvm::Triple::amdgcn;
85   }
86 
87   static bool isR600(const llvm::Triple &TT) {
88     return TT.getArch() == llvm::Triple::r600;
89   }
90 
91 public:
92   AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
93 
94   void setAddressSpaceMap(bool DefaultIsPrivate);
95 
96   void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override;
97 
98   uint64_t getPointerWidthV(LangAS AS) const override {
99     if (isR600(getTriple()))
100       return 32;
101     unsigned TargetAS = getTargetAddressSpace(AS);
102 
103     if (TargetAS == llvm::AMDGPUAS::PRIVATE_ADDRESS ||
104         TargetAS == llvm::AMDGPUAS::LOCAL_ADDRESS)
105       return 32;
106 
107     return 64;
108   }
109 
110   uint64_t getPointerAlignV(LangAS AddrSpace) const override {
111     return getPointerWidthV(AddrSpace);
112   }
113 
114   virtual bool isAddressSpaceSupersetOf(LangAS A, LangAS B) const override {
115     // The flat address space AS(0) is a superset of all the other address
116     // spaces used by the backend target.
117     return A == B ||
118            ((A == LangAS::Default ||
119              (isTargetAddressSpace(A) &&
120               toTargetAddressSpace(A) == llvm::AMDGPUAS::FLAT_ADDRESS)) &&
121             isTargetAddressSpace(B) &&
122             toTargetAddressSpace(B) >= llvm::AMDGPUAS::FLAT_ADDRESS &&
123             toTargetAddressSpace(B) <= llvm::AMDGPUAS::PRIVATE_ADDRESS &&
124             toTargetAddressSpace(B) != llvm::AMDGPUAS::REGION_ADDRESS);
125   }
126 
127   uint64_t getMaxPointerWidth() const override {
128     return getTriple().getArch() == llvm::Triple::amdgcn ? 64 : 32;
129   }
130 
131   bool hasBFloat16Type() const override { return isAMDGCN(getTriple()); }
132 
133   std::string_view getClobbers() const override { return ""; }
134 
135   ArrayRef<const char *> getGCCRegNames() const override;
136 
137   ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
138     return {};
139   }
140 
141   /// Accepted register names: (n, m is unsigned integer, n < m)
142   /// v
143   /// s
144   /// a
145   /// {vn}, {v[n]}
146   /// {sn}, {s[n]}
147   /// {an}, {a[n]}
148   /// {S} , where S is a special register name
149   ////{v[n:m]}
150   /// {s[n:m]}
151   /// {a[n:m]}
152   bool validateAsmConstraint(const char *&Name,
153                              TargetInfo::ConstraintInfo &Info) const override {
154     static const ::llvm::StringSet<> SpecialRegs({
155         "exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma",
156         "flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo",
157         "exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi",
158     });
159 
160     switch (*Name) {
161     case 'I':
162       Info.setRequiresImmediate(-16, 64);
163       return true;
164     case 'J':
165       Info.setRequiresImmediate(-32768, 32767);
166       return true;
167     case 'A':
168     case 'B':
169     case 'C':
170       Info.setRequiresImmediate();
171       return true;
172     default:
173       break;
174     }
175 
176     StringRef S(Name);
177 
178     if (S == "DA" || S == "DB") {
179       Name++;
180       Info.setRequiresImmediate();
181       return true;
182     }
183 
184     bool HasLeftParen = S.consume_front("{");
185     if (S.empty())
186       return false;
187     if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') {
188       if (!HasLeftParen)
189         return false;
190       auto E = S.find('}');
191       if (!SpecialRegs.count(S.substr(0, E)))
192         return false;
193       S = S.drop_front(E + 1);
194       if (!S.empty())
195         return false;
196       // Found {S} where S is a special register.
197       Info.setAllowsRegister();
198       Name = S.data() - 1;
199       return true;
200     }
201     S = S.drop_front();
202     if (!HasLeftParen) {
203       if (!S.empty())
204         return false;
205       // Found s, v or a.
206       Info.setAllowsRegister();
207       Name = S.data() - 1;
208       return true;
209     }
210     bool HasLeftBracket = S.consume_front("[");
211     unsigned long long N;
212     if (S.empty() || consumeUnsignedInteger(S, 10, N))
213       return false;
214     if (S.consume_front(":")) {
215       if (!HasLeftBracket)
216         return false;
217       unsigned long long M;
218       if (consumeUnsignedInteger(S, 10, M) || N >= M)
219         return false;
220     }
221     if (HasLeftBracket) {
222       if (!S.consume_front("]"))
223         return false;
224     }
225     if (!S.consume_front("}"))
226       return false;
227     if (!S.empty())
228       return false;
229     // Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]}
230     // or {a[n:m]}.
231     Info.setAllowsRegister();
232     Name = S.data() - 1;
233     return true;
234   }
235 
236   // \p Constraint will be left pointing at the last character of
237   // the constraint.  In practice, it won't be changed unless the
238   // constraint is longer than one character.
239   std::string convertConstraint(const char *&Constraint) const override {
240 
241     StringRef S(Constraint);
242     if (S == "DA" || S == "DB") {
243       return std::string("^") + std::string(Constraint++, 2);
244     }
245 
246     const char *Begin = Constraint;
247     TargetInfo::ConstraintInfo Info("", "");
248     if (validateAsmConstraint(Constraint, Info))
249       return std::string(Begin).substr(0, Constraint - Begin + 1);
250 
251     Constraint = Begin;
252     return std::string(1, *Constraint);
253   }
254 
255   bool
256   initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
257                  StringRef CPU,
258                  const std::vector<std::string> &FeatureVec) const override;
259 
260   ArrayRef<Builtin::Info> getTargetBuiltins() const override;
261 
262   bool useFP16ConversionIntrinsics() const override { return false; }
263 
264   void getTargetDefines(const LangOptions &Opts,
265                         MacroBuilder &Builder) const override;
266 
267   BuiltinVaListKind getBuiltinVaListKind() const override {
268     return TargetInfo::CharPtrBuiltinVaList;
269   }
270 
271   bool isValidCPUName(StringRef Name) const override {
272     if (getTriple().getArch() == llvm::Triple::amdgcn)
273       return llvm::AMDGPU::parseArchAMDGCN(Name) != llvm::AMDGPU::GK_NONE;
274     return llvm::AMDGPU::parseArchR600(Name) != llvm::AMDGPU::GK_NONE;
275   }
276 
277   void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
278 
279   bool setCPU(const std::string &Name) override {
280     if (getTriple().getArch() == llvm::Triple::amdgcn) {
281       GPUKind = llvm::AMDGPU::parseArchAMDGCN(Name);
282       GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(GPUKind);
283     } else {
284       GPUKind = llvm::AMDGPU::parseArchR600(Name);
285       GPUFeatures = llvm::AMDGPU::getArchAttrR600(GPUKind);
286     }
287 
288     return GPUKind != llvm::AMDGPU::GK_NONE;
289   }
290 
291   void setSupportedOpenCLOpts() override {
292     auto &Opts = getSupportedOpenCLOpts();
293     Opts["cl_clang_storage_class_specifiers"] = true;
294     Opts["__cl_clang_variadic_functions"] = true;
295     Opts["__cl_clang_function_pointers"] = true;
296     Opts["__cl_clang_non_portable_kernel_param_types"] = true;
297     Opts["__cl_clang_bitfields"] = true;
298 
299     bool IsAMDGCN = isAMDGCN(getTriple());
300 
301     Opts["cl_khr_fp64"] = hasFP64();
302     Opts["__opencl_c_fp64"] = hasFP64();
303 
304     if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) {
305       Opts["cl_khr_byte_addressable_store"] = true;
306       Opts["cl_khr_global_int32_base_atomics"] = true;
307       Opts["cl_khr_global_int32_extended_atomics"] = true;
308       Opts["cl_khr_local_int32_base_atomics"] = true;
309       Opts["cl_khr_local_int32_extended_atomics"] = true;
310     }
311 
312     if (IsAMDGCN) {
313       Opts["cl_khr_fp16"] = true;
314       Opts["cl_khr_int64_base_atomics"] = true;
315       Opts["cl_khr_int64_extended_atomics"] = true;
316       Opts["cl_khr_mipmap_image"] = true;
317       Opts["cl_khr_mipmap_image_writes"] = true;
318       Opts["cl_khr_subgroups"] = true;
319       Opts["cl_amd_media_ops"] = true;
320       Opts["cl_amd_media_ops2"] = true;
321 
322       Opts["__opencl_c_images"] = true;
323       Opts["__opencl_c_3d_image_writes"] = true;
324       Opts["cl_khr_3d_image_writes"] = true;
325     }
326   }
327 
328   LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override {
329     switch (TK) {
330     case OCLTK_Image:
331       return LangAS::opencl_constant;
332 
333     case OCLTK_ClkEvent:
334     case OCLTK_Queue:
335     case OCLTK_ReserveID:
336       return LangAS::opencl_global;
337 
338     default:
339       return TargetInfo::getOpenCLTypeAddrSpace(TK);
340     }
341   }
342 
343   LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override {
344     switch (AS) {
345     case 0:
346       return LangAS::opencl_generic;
347     case 1:
348       return LangAS::opencl_global;
349     case 3:
350       return LangAS::opencl_local;
351     case 4:
352       return LangAS::opencl_constant;
353     case 5:
354       return LangAS::opencl_private;
355     default:
356       return getLangASFromTargetAS(AS);
357     }
358   }
359 
360   LangAS getCUDABuiltinAddressSpace(unsigned AS) const override {
361     switch (AS) {
362     case 0:
363       return LangAS::Default;
364     case 1:
365       return LangAS::cuda_device;
366     case 3:
367       return LangAS::cuda_shared;
368     case 4:
369       return LangAS::cuda_constant;
370     default:
371       return getLangASFromTargetAS(AS);
372     }
373   }
374 
375   std::optional<LangAS> getConstantAddressSpace() const override {
376     return getLangASFromTargetAS(llvm::AMDGPUAS::CONSTANT_ADDRESS);
377   }
378 
379   const llvm::omp::GV &getGridValue() const override {
380     switch (WavefrontSize) {
381     case 32:
382       return llvm::omp::getAMDGPUGridValues<32>();
383     case 64:
384       return llvm::omp::getAMDGPUGridValues<64>();
385     default:
386       llvm_unreachable("getGridValue not implemented for this wavesize");
387     }
388   }
389 
390   /// \returns Target specific vtbl ptr address space.
391   unsigned getVtblPtrAddressSpace() const override {
392     return static_cast<unsigned>(llvm::AMDGPUAS::CONSTANT_ADDRESS);
393   }
394 
395   /// \returns If a target requires an address within a target specific address
396   /// space \p AddressSpace to be converted in order to be used, then return the
397   /// corresponding target specific DWARF address space.
398   ///
399   /// \returns Otherwise return std::nullopt and no conversion will be emitted
400   /// in the DWARF.
401   std::optional<unsigned>
402   getDWARFAddressSpace(unsigned AddressSpace) const override {
403     const unsigned DWARF_Private = 1;
404     const unsigned DWARF_Local = 2;
405     if (AddressSpace == llvm::AMDGPUAS::PRIVATE_ADDRESS) {
406       return DWARF_Private;
407     } else if (AddressSpace == llvm::AMDGPUAS::LOCAL_ADDRESS) {
408       return DWARF_Local;
409     } else {
410       return std::nullopt;
411     }
412   }
413 
414   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
415     switch (CC) {
416     default:
417       return CCCR_Warning;
418     case CC_C:
419     case CC_OpenCLKernel:
420     case CC_AMDGPUKernelCall:
421       return CCCR_OK;
422     }
423   }
424 
425   // In amdgcn target the null pointer in global, constant, and generic
426   // address space has value 0 but in private and local address space has
427   // value ~0.
428   uint64_t getNullPointerValue(LangAS AS) const override {
429     // FIXME: Also should handle region.
430     return (AS == LangAS::opencl_local || AS == LangAS::opencl_private ||
431             AS == LangAS::sycl_local || AS == LangAS::sycl_private)
432                ? ~0
433                : 0;
434   }
435 
436   void setAuxTarget(const TargetInfo *Aux) override;
437 
438   bool hasBitIntType() const override { return true; }
439 
440   // Record offload arch features since they are needed for defining the
441   // pre-defined macros.
442   bool handleTargetFeatures(std::vector<std::string> &Features,
443                             DiagnosticsEngine &Diags) override {
444     auto TargetIDFeatures =
445         getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind));
446     for (const auto &F : Features) {
447       assert(F.front() == '+' || F.front() == '-');
448       if (F == "+wavefrontsize64")
449         WavefrontSize = 64;
450       else if (F == "+cumode")
451         CUMode = true;
452       else if (F == "-cumode")
453         CUMode = false;
454       else if (F == "+image-insts")
455         HasImage = true;
456       bool IsOn = F.front() == '+';
457       StringRef Name = StringRef(F).drop_front();
458       if (!llvm::is_contained(TargetIDFeatures, Name))
459         continue;
460       assert(!OffloadArchFeatures.contains(Name));
461       OffloadArchFeatures[Name] = IsOn;
462     }
463     return true;
464   }
465 
466   std::optional<std::string> getTargetID() const override {
467     if (!isAMDGCN(getTriple()))
468       return std::nullopt;
469     // When -target-cpu is not set, we assume generic code that it is valid
470     // for all GPU and use an empty string as target ID to represent that.
471     if (GPUKind == llvm::AMDGPU::GK_NONE)
472       return std::string("");
473     return getCanonicalTargetID(getArchNameAMDGCN(GPUKind),
474                                 OffloadArchFeatures);
475   }
476 
477   bool hasHIPImageSupport() const override { return HasImage; }
478 
479   std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override {
480     // This is imprecise as the value can vary between 64, 128 (even 256!) bytes
481     // depending on the level of cache and the target architecture. We select
482     // the size that corresponds to the largest L1 cache line for all
483     // architectures.
484     return std::make_pair(128, 128);
485   }
486 };
487 
488 } // namespace targets
489 } // namespace clang
490 
491 #endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
492