xref: /freebsd-src/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
15ffd83dbSDimitry Andric //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
25ffd83dbSDimitry Andric //
3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric ///
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// \brief Provides definitions for Target specific Grid Values
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric 
145ffd83dbSDimitry Andric #ifndef LLVM_OPENMP_GRIDVALUES_H
155ffd83dbSDimitry Andric #define LLVM_OPENMP_GRIDVALUES_H
165ffd83dbSDimitry Andric 
175ffd83dbSDimitry Andric namespace llvm {
185ffd83dbSDimitry Andric 
195ffd83dbSDimitry Andric namespace omp {
205ffd83dbSDimitry Andric 
215ffd83dbSDimitry Andric /// \brief Defines various target-specific GPU grid values that must be
225ffd83dbSDimitry Andric ///        consistent between host RTL (plugin), device RTL, and clang.
235ffd83dbSDimitry Andric ///        We can change grid values for a "fat" binary so that different
245ffd83dbSDimitry Andric ///        passes get the correct values when generating code for a
255ffd83dbSDimitry Andric ///        multi-target binary. Both amdgcn and nvptx values are stored in
265ffd83dbSDimitry Andric ///        this file. In the future, should there be differences between GPUs
275ffd83dbSDimitry Andric ///        of the same architecture, then simply make a different array and
285ffd83dbSDimitry Andric ///        use the new array name.
295ffd83dbSDimitry Andric ///
305ffd83dbSDimitry Andric /// Example usage in clang:
31*e8d8bef9SDimitry Andric ///   const unsigned slot_size =
32*e8d8bef9SDimitry Andric ///   ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
335ffd83dbSDimitry Andric ///
345ffd83dbSDimitry Andric /// Example usage in libomptarget/deviceRTLs:
35*e8d8bef9SDimitry Andric ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
365ffd83dbSDimitry Andric ///   #ifdef __AMDGPU__
375ffd83dbSDimitry Andric ///     #define GRIDVAL AMDGPUGpuGridValues
385ffd83dbSDimitry Andric ///   #else
395ffd83dbSDimitry Andric ///     #define GRIDVAL NVPTXGpuGridValues
405ffd83dbSDimitry Andric ///   #endif
415ffd83dbSDimitry Andric ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
42*e8d8bef9SDimitry Andric ///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
435ffd83dbSDimitry Andric ///
445ffd83dbSDimitry Andric /// Example usage in libomptarget hsa plugin:
45*e8d8bef9SDimitry Andric ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
465ffd83dbSDimitry Andric ///   #define GRIDVAL AMDGPUGpuGridValues
475ffd83dbSDimitry Andric ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
48*e8d8bef9SDimitry Andric ///   llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
495ffd83dbSDimitry Andric ///
505ffd83dbSDimitry Andric /// Example usage in libomptarget cuda plugin:
51*e8d8bef9SDimitry Andric ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
525ffd83dbSDimitry Andric ///    #define GRIDVAL NVPTXGpuGridValues
535ffd83dbSDimitry Andric ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
54*e8d8bef9SDimitry Andric ///    llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
555ffd83dbSDimitry Andric ///
565ffd83dbSDimitry Andric enum GVIDX {
575ffd83dbSDimitry Andric   /// The maximum number of workers in a kernel.
585ffd83dbSDimitry Andric   /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
595ffd83dbSDimitry Andric   GV_Threads,
605ffd83dbSDimitry Andric   /// The size reserved for data in a shared memory slot.
615ffd83dbSDimitry Andric   GV_Slot_Size,
625ffd83dbSDimitry Andric   /// The default value of maximum number of threads in a worker warp.
635ffd83dbSDimitry Andric   GV_Warp_Size,
645ffd83dbSDimitry Andric   /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
655ffd83dbSDimitry Andric   /// for NVPTX.
665ffd83dbSDimitry Andric   GV_Warp_Size_32,
675ffd83dbSDimitry Andric   /// The number of bits required to represent the max number of threads in warp
685ffd83dbSDimitry Andric   GV_Warp_Size_Log2,
695ffd83dbSDimitry Andric   /// GV_Warp_Size * GV_Slot_Size,
705ffd83dbSDimitry Andric   GV_Warp_Slot_Size,
715ffd83dbSDimitry Andric   /// the maximum number of teams.
725ffd83dbSDimitry Andric   GV_Max_Teams,
735ffd83dbSDimitry Andric   /// Global Memory Alignment
745ffd83dbSDimitry Andric   GV_Mem_Align,
755ffd83dbSDimitry Andric   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
765ffd83dbSDimitry Andric   GV_Warp_Size_Log2_Mask,
775ffd83dbSDimitry Andric   // An alternative to the heavy data sharing infrastructure that uses global
785ffd83dbSDimitry Andric   // memory is one that uses device __shared__ memory.  The amount of such space
795ffd83dbSDimitry Andric   // (in bytes) reserved by the OpenMP runtime is noted here.
805ffd83dbSDimitry Andric   GV_SimpleBufferSize,
815ffd83dbSDimitry Andric   // The absolute maximum team size for a working group
825ffd83dbSDimitry Andric   GV_Max_WG_Size,
835ffd83dbSDimitry Andric   // The default maximum team size for a working group
845ffd83dbSDimitry Andric   GV_Default_WG_Size,
855ffd83dbSDimitry Andric   // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
865ffd83dbSDimitry Andric   GV_Max_Warp_Number,
875ffd83dbSDimitry Andric   /// The slot size that should be reserved for a working warp.
885ffd83dbSDimitry Andric   /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
895ffd83dbSDimitry Andric   GV_Warp_Size_Log2_MaskL
905ffd83dbSDimitry Andric };
915ffd83dbSDimitry Andric 
925ffd83dbSDimitry Andric /// For AMDGPU GPUs
935ffd83dbSDimitry Andric static constexpr unsigned AMDGPUGpuGridValues[] = {
945ffd83dbSDimitry Andric     448,       // GV_Threads
955ffd83dbSDimitry Andric     256,       // GV_Slot_Size
965ffd83dbSDimitry Andric     64,        // GV_Warp_Size
975ffd83dbSDimitry Andric     32,        // GV_Warp_Size_32
985ffd83dbSDimitry Andric     6,         // GV_Warp_Size_Log2
995ffd83dbSDimitry Andric     64 * 256,  // GV_Warp_Slot_Size
1005ffd83dbSDimitry Andric     128,       // GV_Max_Teams
1015ffd83dbSDimitry Andric     256,       // GV_Mem_Align
1025ffd83dbSDimitry Andric     63,        // GV_Warp_Size_Log2_Mask
1035ffd83dbSDimitry Andric     896,       // GV_SimpleBufferSize
1045ffd83dbSDimitry Andric     1024,      // GV_Max_WG_Size,
1055ffd83dbSDimitry Andric     256,       // GV_Defaut_WG_Size
1065ffd83dbSDimitry Andric     1024 / 64, // GV_Max_WG_Size / GV_WarpSize
1075ffd83dbSDimitry Andric     63         // GV_Warp_Size_Log2_MaskL
1085ffd83dbSDimitry Andric };
1095ffd83dbSDimitry Andric 
1105ffd83dbSDimitry Andric /// For Nvidia GPUs
1115ffd83dbSDimitry Andric static constexpr unsigned NVPTXGpuGridValues[] = {
1125ffd83dbSDimitry Andric     992,               // GV_Threads
1135ffd83dbSDimitry Andric     256,               // GV_Slot_Size
1145ffd83dbSDimitry Andric     32,                // GV_Warp_Size
1155ffd83dbSDimitry Andric     32,                // GV_Warp_Size_32
1165ffd83dbSDimitry Andric     5,                 // GV_Warp_Size_Log2
1175ffd83dbSDimitry Andric     32 * 256,          // GV_Warp_Slot_Size
1185ffd83dbSDimitry Andric     1024,              // GV_Max_Teams
1195ffd83dbSDimitry Andric     256,               // GV_Mem_Align
1205ffd83dbSDimitry Andric     (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
1215ffd83dbSDimitry Andric     896,               // GV_SimpleBufferSize
1225ffd83dbSDimitry Andric     1024,              // GV_Max_WG_Size
1235ffd83dbSDimitry Andric     128,               // GV_Defaut_WG_Size
1245ffd83dbSDimitry Andric     1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
1255ffd83dbSDimitry Andric     31                 // GV_Warp_Size_Log2_MaskL
1265ffd83dbSDimitry Andric };
1275ffd83dbSDimitry Andric 
1285ffd83dbSDimitry Andric } // namespace omp
1295ffd83dbSDimitry Andric } // namespace llvm
1305ffd83dbSDimitry Andric 
1315ffd83dbSDimitry Andric #endif // LLVM_OPENMP_GRIDVALUES_H
132