xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1*5ffd83dbSDimitry Andric //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
2*5ffd83dbSDimitry Andric //
3*5ffd83dbSDimitry Andric //                     The LLVM Compiler Infrastructure
4*5ffd83dbSDimitry Andric //
5*5ffd83dbSDimitry Andric // This file is distributed under the University of Illinois Open Source
6*5ffd83dbSDimitry Andric // License. See LICENSE.TXT for details.
7*5ffd83dbSDimitry Andric //
8*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
9*5ffd83dbSDimitry Andric ///
10*5ffd83dbSDimitry Andric /// \file
11*5ffd83dbSDimitry Andric /// \brief Provides definitions for Target specific Grid Values
12*5ffd83dbSDimitry Andric ///
13*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
14*5ffd83dbSDimitry Andric 
15*5ffd83dbSDimitry Andric #ifndef LLVM_OPENMP_GRIDVALUES_H
16*5ffd83dbSDimitry Andric #define LLVM_OPENMP_GRIDVALUES_H
17*5ffd83dbSDimitry Andric 
18*5ffd83dbSDimitry Andric namespace llvm {
19*5ffd83dbSDimitry Andric 
20*5ffd83dbSDimitry Andric namespace omp {
21*5ffd83dbSDimitry Andric 
22*5ffd83dbSDimitry Andric /// \brief Defines various target-specific GPU grid values that must be
23*5ffd83dbSDimitry Andric ///        consistent between host RTL (plugin), device RTL, and clang.
24*5ffd83dbSDimitry Andric ///        We can change grid values for a "fat" binary so that different
25*5ffd83dbSDimitry Andric ///        passes get the correct values when generating code for a
26*5ffd83dbSDimitry Andric ///        multi-target binary. Both amdgcn and nvptx values are stored in
27*5ffd83dbSDimitry Andric ///        this file. In the future, should there be differences between GPUs
28*5ffd83dbSDimitry Andric ///        of the same architecture, then simply make a different array and
29*5ffd83dbSDimitry Andric ///        use the new array name.
30*5ffd83dbSDimitry Andric ///
31*5ffd83dbSDimitry Andric /// Example usage in clang:
32*5ffd83dbSDimitry Andric ///   const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
33*5ffd83dbSDimitry Andric ///
34*5ffd83dbSDimitry Andric /// Example usage in libomptarget/deviceRTLs:
35*5ffd83dbSDimitry Andric ///   #include "OMPGridValues.h"
36*5ffd83dbSDimitry Andric ///   #ifdef __AMDGPU__
37*5ffd83dbSDimitry Andric ///     #define GRIDVAL AMDGPUGpuGridValues
38*5ffd83dbSDimitry Andric ///   #else
39*5ffd83dbSDimitry Andric ///     #define GRIDVAL NVPTXGpuGridValues
40*5ffd83dbSDimitry Andric ///   #endif
41*5ffd83dbSDimitry Andric ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
42*5ffd83dbSDimitry Andric ///   GRIDVAL[GV_Warp_Size]
43*5ffd83dbSDimitry Andric ///
44*5ffd83dbSDimitry Andric /// Example usage in libomptarget hsa plugin:
45*5ffd83dbSDimitry Andric ///   #include "OMPGridValues.h"
46*5ffd83dbSDimitry Andric ///   #define GRIDVAL AMDGPUGpuGridValues
47*5ffd83dbSDimitry Andric ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
48*5ffd83dbSDimitry Andric ///   GRIDVAL[GV_Warp_Size]
49*5ffd83dbSDimitry Andric ///
50*5ffd83dbSDimitry Andric /// Example usage in libomptarget cuda plugin:
51*5ffd83dbSDimitry Andric ///    #include "OMPGridValues.h"
52*5ffd83dbSDimitry Andric ///    #define GRIDVAL NVPTXGpuGridValues
53*5ffd83dbSDimitry Andric ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
54*5ffd83dbSDimitry Andric ///    GRIDVAL[GV_Warp_Size]
55*5ffd83dbSDimitry Andric ///
56*5ffd83dbSDimitry Andric enum GVIDX {
57*5ffd83dbSDimitry Andric   /// The maximum number of workers in a kernel.
58*5ffd83dbSDimitry Andric   /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
59*5ffd83dbSDimitry Andric   GV_Threads,
60*5ffd83dbSDimitry Andric   /// The size reserved for data in a shared memory slot.
61*5ffd83dbSDimitry Andric   GV_Slot_Size,
62*5ffd83dbSDimitry Andric   /// The default value of maximum number of threads in a worker warp.
63*5ffd83dbSDimitry Andric   GV_Warp_Size,
64*5ffd83dbSDimitry Andric   /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
65*5ffd83dbSDimitry Andric   /// for NVPTX.
66*5ffd83dbSDimitry Andric   GV_Warp_Size_32,
67*5ffd83dbSDimitry Andric   /// The number of bits required to represent the max number of threads in warp
68*5ffd83dbSDimitry Andric   GV_Warp_Size_Log2,
69*5ffd83dbSDimitry Andric   /// GV_Warp_Size * GV_Slot_Size,
70*5ffd83dbSDimitry Andric   GV_Warp_Slot_Size,
71*5ffd83dbSDimitry Andric   /// the maximum number of teams.
72*5ffd83dbSDimitry Andric   GV_Max_Teams,
73*5ffd83dbSDimitry Andric   /// Global Memory Alignment
74*5ffd83dbSDimitry Andric   GV_Mem_Align,
75*5ffd83dbSDimitry Andric   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
76*5ffd83dbSDimitry Andric   GV_Warp_Size_Log2_Mask,
77*5ffd83dbSDimitry Andric   // An alternative to the heavy data sharing infrastructure that uses global
78*5ffd83dbSDimitry Andric   // memory is one that uses device __shared__ memory.  The amount of such space
79*5ffd83dbSDimitry Andric   // (in bytes) reserved by the OpenMP runtime is noted here.
80*5ffd83dbSDimitry Andric   GV_SimpleBufferSize,
81*5ffd83dbSDimitry Andric   // The absolute maximum team size for a working group
82*5ffd83dbSDimitry Andric   GV_Max_WG_Size,
83*5ffd83dbSDimitry Andric   // The default maximum team size for a working group
84*5ffd83dbSDimitry Andric   GV_Default_WG_Size,
85*5ffd83dbSDimitry Andric   // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
86*5ffd83dbSDimitry Andric   GV_Max_Warp_Number,
87*5ffd83dbSDimitry Andric   /// The slot size that should be reserved for a working warp.
88*5ffd83dbSDimitry Andric   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
89*5ffd83dbSDimitry Andric   GV_Warp_Size_Log2_MaskL
90*5ffd83dbSDimitry Andric };
91*5ffd83dbSDimitry Andric 
92*5ffd83dbSDimitry Andric /// For AMDGPU GPUs
93*5ffd83dbSDimitry Andric static constexpr unsigned AMDGPUGpuGridValues[] = {
94*5ffd83dbSDimitry Andric     448,       // GV_Threads
95*5ffd83dbSDimitry Andric     256,       // GV_Slot_Size
96*5ffd83dbSDimitry Andric     64,        // GV_Warp_Size
97*5ffd83dbSDimitry Andric     32,        // GV_Warp_Size_32
98*5ffd83dbSDimitry Andric     6,         // GV_Warp_Size_Log2
99*5ffd83dbSDimitry Andric     64 * 256,  // GV_Warp_Slot_Size
100*5ffd83dbSDimitry Andric     128,       // GV_Max_Teams
101*5ffd83dbSDimitry Andric     256,       // GV_Mem_Align
102*5ffd83dbSDimitry Andric     63,        // GV_Warp_Size_Log2_Mask
103*5ffd83dbSDimitry Andric     896,       // GV_SimpleBufferSize
104*5ffd83dbSDimitry Andric     1024,      // GV_Max_WG_Size,
105*5ffd83dbSDimitry Andric     256,       // GV_Defaut_WG_Size
106*5ffd83dbSDimitry Andric     1024 / 64, // GV_Max_WG_Size / GV_WarpSize
107*5ffd83dbSDimitry Andric     63         // GV_Warp_Size_Log2_MaskL
108*5ffd83dbSDimitry Andric };
109*5ffd83dbSDimitry Andric 
110*5ffd83dbSDimitry Andric /// For Nvidia GPUs
111*5ffd83dbSDimitry Andric static constexpr unsigned NVPTXGpuGridValues[] = {
112*5ffd83dbSDimitry Andric     992,               // GV_Threads
113*5ffd83dbSDimitry Andric     256,               // GV_Slot_Size
114*5ffd83dbSDimitry Andric     32,                // GV_Warp_Size
115*5ffd83dbSDimitry Andric     32,                // GV_Warp_Size_32
116*5ffd83dbSDimitry Andric     5,                 // GV_Warp_Size_Log2
117*5ffd83dbSDimitry Andric     32 * 256,          // GV_Warp_Slot_Size
118*5ffd83dbSDimitry Andric     1024,              // GV_Max_Teams
119*5ffd83dbSDimitry Andric     256,               // GV_Mem_Align
120*5ffd83dbSDimitry Andric     (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
121*5ffd83dbSDimitry Andric     896,               // GV_SimpleBufferSize
122*5ffd83dbSDimitry Andric     1024,              // GV_Max_WG_Size
123*5ffd83dbSDimitry Andric     128,               // GV_Defaut_WG_Size
124*5ffd83dbSDimitry Andric     1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
125*5ffd83dbSDimitry Andric     31                 // GV_Warp_Size_Log2_MaskL
126*5ffd83dbSDimitry Andric };
127*5ffd83dbSDimitry Andric 
128*5ffd83dbSDimitry Andric } // namespace omp
129*5ffd83dbSDimitry Andric } // namespace llvm
130*5ffd83dbSDimitry Andric 
131*5ffd83dbSDimitry Andric #endif // LLVM_OPENMP_GRIDVALUES_H
132