xref: /llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h (revision 48d5ad93cd6921de498a00421d696dba33fac7e4)
14022bc2aSSaiyedul Islam //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
24022bc2aSSaiyedul Islam //
3c66e0910SJonChesterfield // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c66e0910SJonChesterfield // See https://llvm.org/LICENSE.txt for license information.
5c66e0910SJonChesterfield // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64022bc2aSSaiyedul Islam //
74022bc2aSSaiyedul Islam //===----------------------------------------------------------------------===//
84022bc2aSSaiyedul Islam ///
94022bc2aSSaiyedul Islam /// \file
104022bc2aSSaiyedul Islam /// \brief Provides definitions for Target specific Grid Values
114022bc2aSSaiyedul Islam ///
124022bc2aSSaiyedul Islam //===----------------------------------------------------------------------===//
134022bc2aSSaiyedul Islam 
14aa5c09beSKazu Hirata #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
15aa5c09beSKazu Hirata #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
164022bc2aSSaiyedul Islam 
174022bc2aSSaiyedul Islam namespace llvm {
184022bc2aSSaiyedul Islam 
194022bc2aSSaiyedul Islam namespace omp {
204022bc2aSSaiyedul Islam 
214022bc2aSSaiyedul Islam /// \brief Defines various target-specific GPU grid values that must be
224022bc2aSSaiyedul Islam ///        consistent between host RTL (plugin), device RTL, and clang.
234022bc2aSSaiyedul Islam ///        We can change grid values for a "fat" binary so that different
244022bc2aSSaiyedul Islam ///        passes get the correct values when generating code for a
254022bc2aSSaiyedul Islam ///        multi-target binary. Both amdgcn and nvptx values are stored in
264022bc2aSSaiyedul Islam ///        this file. In the future, should there be differences between GPUs
274022bc2aSSaiyedul Islam ///        of the same architecture, then simply make a different array and
284022bc2aSSaiyedul Islam ///        use the new array name.
294022bc2aSSaiyedul Islam ///
304022bc2aSSaiyedul Islam /// Example usage in clang:
31cb319b1bSSaiyedul Islam ///   const unsigned slot_size =
3277579b99SJon Chesterfield ///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
334022bc2aSSaiyedul Islam ///
344022bc2aSSaiyedul Islam /// Example usage in libomptarget/deviceRTLs:
35cb319b1bSSaiyedul Islam ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
364022bc2aSSaiyedul Islam ///   #ifdef __AMDGPU__
3777579b99SJon Chesterfield ///     #define GRIDVAL AMDGPUGridValues
384022bc2aSSaiyedul Islam ///   #else
3977579b99SJon Chesterfield ///     #define GRIDVAL NVPTXGridValues
404022bc2aSSaiyedul Islam ///   #endif
414022bc2aSSaiyedul Islam ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
4277579b99SJon Chesterfield ///   llvm::omp::GRIDVAL().GV_Warp_Size
434022bc2aSSaiyedul Islam ///
444022bc2aSSaiyedul Islam /// Example usage in libomptarget hsa plugin:
45cb319b1bSSaiyedul Islam ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
4677579b99SJon Chesterfield ///   #define GRIDVAL AMDGPUGridValues
474022bc2aSSaiyedul Islam ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
4877579b99SJon Chesterfield ///   llvm::omp::GRIDVAL().GV_Warp_Size
494022bc2aSSaiyedul Islam ///
504022bc2aSSaiyedul Islam /// Example usage in libomptarget cuda plugin:
51cb319b1bSSaiyedul Islam ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
5277579b99SJon Chesterfield ///    #define GRIDVAL NVPTXGridValues
534022bc2aSSaiyedul Islam ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
5477579b99SJon Chesterfield ///    llvm::omp::GRIDVAL().GV_Warp_Size
554022bc2aSSaiyedul Islam ///
5677579b99SJon Chesterfield 
5777579b99SJon Chesterfield struct GV {
584022bc2aSSaiyedul Islam   /// The size reserved for data in a shared memory slot.
5984690419SKevin Sala   unsigned GV_Slot_Size;
604022bc2aSSaiyedul Islam   /// The default value of maximum number of threads in a worker warp.
6184690419SKevin Sala   unsigned GV_Warp_Size;
62c2574e63SJon Chesterfield 
warpSlotSizeGV63c2574e63SJon Chesterfield   constexpr unsigned warpSlotSize() const {
64c2574e63SJon Chesterfield     return GV_Warp_Size * GV_Slot_Size;
65c2574e63SJon Chesterfield   }
66c2574e63SJon Chesterfield 
674022bc2aSSaiyedul Islam   /// the maximum number of teams.
6884690419SKevin Sala   unsigned GV_Max_Teams;
69*fb2c42dfSJohannes Doerfert   // The default number of teams in the absence of any other information.
70*fb2c42dfSJohannes Doerfert   unsigned GV_Default_Num_Teams;
71*fb2c42dfSJohannes Doerfert 
724022bc2aSSaiyedul Islam   // An alternative to the heavy data sharing infrastructure that uses global
734022bc2aSSaiyedul Islam   // memory is one that uses device __shared__ memory.  The amount of such space
744022bc2aSSaiyedul Islam   // (in bytes) reserved by the OpenMP runtime is noted here.
7584690419SKevin Sala   unsigned GV_SimpleBufferSize;
764022bc2aSSaiyedul Islam   // The absolute maximum team size for a working group
7784690419SKevin Sala   unsigned GV_Max_WG_Size;
784022bc2aSSaiyedul Islam   // The default maximum team size for a working group
7984690419SKevin Sala   unsigned GV_Default_WG_Size;
80c2574e63SJon Chesterfield 
maxWarpNumberGV81c2574e63SJon Chesterfield   constexpr unsigned maxWarpNumber() const {
82c2574e63SJon Chesterfield     return GV_Max_WG_Size / GV_Warp_Size;
83c2574e63SJon Chesterfield   }
844022bc2aSSaiyedul Islam };
854022bc2aSSaiyedul Islam 
864022bc2aSSaiyedul Islam /// For AMDGPU GPUs
8778f92c38SJon Chesterfield static constexpr GV AMDGPUGridValues64 = {
884022bc2aSSaiyedul Islam     256,       // GV_Slot_Size
894022bc2aSSaiyedul Islam     64,        // GV_Warp_Size
902e9c3fe6SKevin Sala     (1 << 16), // GV_Max_Teams
91*fb2c42dfSJohannes Doerfert     440,       // GV_Default_Num_Teams
924022bc2aSSaiyedul Islam     896,       // GV_SimpleBufferSize
934022bc2aSSaiyedul Islam     1024,      // GV_Max_WG_Size,
94c2574e63SJon Chesterfield     256,       // GV_Default_WG_Size
954022bc2aSSaiyedul Islam };
964022bc2aSSaiyedul Islam 
9778f92c38SJon Chesterfield static constexpr GV AMDGPUGridValues32 = {
9878f92c38SJon Chesterfield     256,       // GV_Slot_Size
9978f92c38SJon Chesterfield     32,        // GV_Warp_Size
1002e9c3fe6SKevin Sala     (1 << 16), // GV_Max_Teams
101*fb2c42dfSJohannes Doerfert     440,       // GV_Default_Num_Teams
10278f92c38SJon Chesterfield     896,       // GV_SimpleBufferSize
10378f92c38SJon Chesterfield     1024,      // GV_Max_WG_Size,
10478f92c38SJon Chesterfield     256,       // GV_Default_WG_Size
10578f92c38SJon Chesterfield };
10678f92c38SJon Chesterfield 
getAMDGPUGridValues()10778f92c38SJon Chesterfield template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
10844c734afSDan Palermo   static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
10978f92c38SJon Chesterfield   return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
11078f92c38SJon Chesterfield }
11178f92c38SJon Chesterfield 
1124022bc2aSSaiyedul Islam /// For Nvidia GPUs
11377579b99SJon Chesterfield static constexpr GV NVPTXGridValues = {
1144022bc2aSSaiyedul Islam     256,       // GV_Slot_Size
1154022bc2aSSaiyedul Islam     32,        // GV_Warp_Size
1162e9c3fe6SKevin Sala     (1 << 16), // GV_Max_Teams
117*fb2c42dfSJohannes Doerfert     3200,      // GV_Default_Num_Teams
1184022bc2aSSaiyedul Islam     896,       // GV_SimpleBufferSize
1194022bc2aSSaiyedul Islam     1024,      // GV_Max_WG_Size
120c2574e63SJon Chesterfield     128,       // GV_Default_WG_Size
1214022bc2aSSaiyedul Islam };
1224022bc2aSSaiyedul Islam 
1234022bc2aSSaiyedul Islam } // namespace omp
1244022bc2aSSaiyedul Islam } // namespace llvm
1254022bc2aSSaiyedul Islam 
126aa5c09beSKazu Hirata #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
127