xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
15ffd83dbSDimitry Andric //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
25ffd83dbSDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric ///
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// \brief Provides definitions for Target specific Grid Values
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
14fe6060f1SDimitry Andric #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
15fe6060f1SDimitry Andric #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
165ffd83dbSDimitry Andric 
175ffd83dbSDimitry Andric namespace llvm {
185ffd83dbSDimitry Andric 
195ffd83dbSDimitry Andric namespace omp {
205ffd83dbSDimitry Andric 
215ffd83dbSDimitry Andric /// \brief Defines various target-specific GPU grid values that must be
225ffd83dbSDimitry Andric ///        consistent between host RTL (plugin), device RTL, and clang.
235ffd83dbSDimitry Andric ///        We can change grid values for a "fat" binary so that different
245ffd83dbSDimitry Andric ///        passes get the correct values when generating code for a
255ffd83dbSDimitry Andric ///        multi-target binary. Both amdgcn and nvptx values are stored in
265ffd83dbSDimitry Andric ///        this file. In the future, should there be differences between GPUs
275ffd83dbSDimitry Andric ///        of the same architecture, then simply make a different array and
285ffd83dbSDimitry Andric ///        use the new array name.
295ffd83dbSDimitry Andric ///
305ffd83dbSDimitry Andric /// Example usage in clang:
31e8d8bef9SDimitry Andric ///   const unsigned slot_size =
32*349cc55cSDimitry Andric ///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
335ffd83dbSDimitry Andric ///
345ffd83dbSDimitry Andric /// Example usage in libomptarget/deviceRTLs:
35e8d8bef9SDimitry Andric ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
365ffd83dbSDimitry Andric ///   #ifdef __AMDGPU__
37*349cc55cSDimitry Andric ///     #define GRIDVAL AMDGPUGridValues
385ffd83dbSDimitry Andric ///   #else
39*349cc55cSDimitry Andric ///     #define GRIDVAL NVPTXGridValues
405ffd83dbSDimitry Andric ///   #endif
415ffd83dbSDimitry Andric ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
42*349cc55cSDimitry Andric ///   llvm::omp::GRIDVAL().GV_Warp_Size
435ffd83dbSDimitry Andric ///
445ffd83dbSDimitry Andric /// Example usage in libomptarget hsa plugin:
45e8d8bef9SDimitry Andric ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
46*349cc55cSDimitry Andric ///   #define GRIDVAL AMDGPUGridValues
475ffd83dbSDimitry Andric ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
48*349cc55cSDimitry Andric ///   llvm::omp::GRIDVAL().GV_Warp_Size
495ffd83dbSDimitry Andric ///
505ffd83dbSDimitry Andric /// Example usage in libomptarget cuda plugin:
51e8d8bef9SDimitry Andric ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
52*349cc55cSDimitry Andric ///    #define GRIDVAL NVPTXGridValues
535ffd83dbSDimitry Andric ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
54*349cc55cSDimitry Andric ///    llvm::omp::GRIDVAL().GV_Warp_Size
555ffd83dbSDimitry Andric ///
56*349cc55cSDimitry Andric 
57*349cc55cSDimitry Andric struct GV {
585ffd83dbSDimitry Andric   /// The size reserved for data in a shared memory slot.
59*349cc55cSDimitry Andric   const unsigned GV_Slot_Size;
605ffd83dbSDimitry Andric   /// The default value of maximum number of threads in a worker warp.
61*349cc55cSDimitry Andric   const unsigned GV_Warp_Size;
62*349cc55cSDimitry Andric 
63*349cc55cSDimitry Andric   constexpr unsigned warpSlotSize() const {
64*349cc55cSDimitry Andric     return GV_Warp_Size * GV_Slot_Size;
65*349cc55cSDimitry Andric   }
66*349cc55cSDimitry Andric 
675ffd83dbSDimitry Andric   /// the maximum number of teams.
68*349cc55cSDimitry Andric   const unsigned GV_Max_Teams;
695ffd83dbSDimitry Andric   // An alternative to the heavy data sharing infrastructure that uses global
705ffd83dbSDimitry Andric   // memory is one that uses device __shared__ memory.  The amount of such space
715ffd83dbSDimitry Andric   // (in bytes) reserved by the OpenMP runtime is noted here.
72*349cc55cSDimitry Andric   const unsigned GV_SimpleBufferSize;
735ffd83dbSDimitry Andric   // The absolute maximum team size for a working group
74*349cc55cSDimitry Andric   const unsigned GV_Max_WG_Size;
755ffd83dbSDimitry Andric   // The default maximum team size for a working group
76*349cc55cSDimitry Andric   const unsigned GV_Default_WG_Size;
77*349cc55cSDimitry Andric 
78*349cc55cSDimitry Andric   constexpr unsigned maxWarpNumber() const {
79*349cc55cSDimitry Andric     return GV_Max_WG_Size / GV_Warp_Size;
80*349cc55cSDimitry Andric   }
815ffd83dbSDimitry Andric };
825ffd83dbSDimitry Andric 
835ffd83dbSDimitry Andric /// For AMDGPU GPUs
84*349cc55cSDimitry Andric static constexpr GV AMDGPUGridValues64 = {
855ffd83dbSDimitry Andric     256,  // GV_Slot_Size
865ffd83dbSDimitry Andric     64,   // GV_Warp_Size
875ffd83dbSDimitry Andric     128,  // GV_Max_Teams
885ffd83dbSDimitry Andric     896,  // GV_SimpleBufferSize
895ffd83dbSDimitry Andric     1024, // GV_Max_WG_Size,
90*349cc55cSDimitry Andric     256,  // GV_Default_WG_Size
915ffd83dbSDimitry Andric };
925ffd83dbSDimitry Andric 
93*349cc55cSDimitry Andric static constexpr GV AMDGPUGridValues32 = {
945ffd83dbSDimitry Andric     256,  // GV_Slot_Size
955ffd83dbSDimitry Andric     32,   // GV_Warp_Size
96*349cc55cSDimitry Andric     128,  // GV_Max_Teams
97*349cc55cSDimitry Andric     896,  // GV_SimpleBufferSize
98*349cc55cSDimitry Andric     1024, // GV_Max_WG_Size,
99*349cc55cSDimitry Andric     256,  // GV_Default_WG_Size
100*349cc55cSDimitry Andric };
101*349cc55cSDimitry Andric 
102*349cc55cSDimitry Andric template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
103*349cc55cSDimitry Andric   static_assert(wavesize == 32 || wavesize == 64, "");
104*349cc55cSDimitry Andric   return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
105*349cc55cSDimitry Andric }
106*349cc55cSDimitry Andric 
107*349cc55cSDimitry Andric /// For Nvidia GPUs
108*349cc55cSDimitry Andric static constexpr GV NVPTXGridValues = {
109*349cc55cSDimitry Andric     256,  // GV_Slot_Size
110*349cc55cSDimitry Andric     32,   // GV_Warp_Size
1115ffd83dbSDimitry Andric     1024, // GV_Max_Teams
1125ffd83dbSDimitry Andric     896,  // GV_SimpleBufferSize
1135ffd83dbSDimitry Andric     1024, // GV_Max_WG_Size
114*349cc55cSDimitry Andric     128,  // GV_Default_WG_Size
1155ffd83dbSDimitry Andric };
1165ffd83dbSDimitry Andric 
1175ffd83dbSDimitry Andric } // namespace omp
1185ffd83dbSDimitry Andric } // namespace llvm
1195ffd83dbSDimitry Andric 
120fe6060f1SDimitry Andric #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
121