1*5ffd83dbSDimitry Andric //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // The LLVM Compiler Infrastructure 4*5ffd83dbSDimitry Andric // 5*5ffd83dbSDimitry Andric // This file is distributed under the University of Illinois Open Source 6*5ffd83dbSDimitry Andric // License. See LICENSE.TXT for details. 7*5ffd83dbSDimitry Andric // 8*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 9*5ffd83dbSDimitry Andric /// 10*5ffd83dbSDimitry Andric /// \file 11*5ffd83dbSDimitry Andric /// \brief Provides definitions for Target specific Grid Values 12*5ffd83dbSDimitry Andric /// 13*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 14*5ffd83dbSDimitry Andric 15*5ffd83dbSDimitry Andric #ifndef LLVM_OPENMP_GRIDVALUES_H 16*5ffd83dbSDimitry Andric #define LLVM_OPENMP_GRIDVALUES_H 17*5ffd83dbSDimitry Andric 18*5ffd83dbSDimitry Andric namespace llvm { 19*5ffd83dbSDimitry Andric 20*5ffd83dbSDimitry Andric namespace omp { 21*5ffd83dbSDimitry Andric 22*5ffd83dbSDimitry Andric /// \brief Defines various target-specific GPU grid values that must be 23*5ffd83dbSDimitry Andric /// consistent between host RTL (plugin), device RTL, and clang. 24*5ffd83dbSDimitry Andric /// We can change grid values for a "fat" binary so that different 25*5ffd83dbSDimitry Andric /// passes get the correct values when generating code for a 26*5ffd83dbSDimitry Andric /// multi-target binary. Both amdgcn and nvptx values are stored in 27*5ffd83dbSDimitry Andric /// this file. In the future, should there be differences between GPUs 28*5ffd83dbSDimitry Andric /// of the same architecture, then simply make a different array and 29*5ffd83dbSDimitry Andric /// use the new array name. 30*5ffd83dbSDimitry Andric /// 31*5ffd83dbSDimitry Andric /// Example usage in clang: 32*5ffd83dbSDimitry Andric /// const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size); 33*5ffd83dbSDimitry Andric /// 34*5ffd83dbSDimitry Andric /// Example usage in libomptarget/deviceRTLs: 35*5ffd83dbSDimitry Andric /// #include "OMPGridValues.h" 36*5ffd83dbSDimitry Andric /// #ifdef __AMDGPU__ 37*5ffd83dbSDimitry Andric /// #define GRIDVAL AMDGPUGpuGridValues 38*5ffd83dbSDimitry Andric /// #else 39*5ffd83dbSDimitry Andric /// #define GRIDVAL NVPTXGpuGridValues 40*5ffd83dbSDimitry Andric /// #endif 41*5ffd83dbSDimitry Andric /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. 42*5ffd83dbSDimitry Andric /// GRIDVAL[GV_Warp_Size] 43*5ffd83dbSDimitry Andric /// 44*5ffd83dbSDimitry Andric /// Example usage in libomptarget hsa plugin: 45*5ffd83dbSDimitry Andric /// #include "OMPGridValues.h" 46*5ffd83dbSDimitry Andric /// #define GRIDVAL AMDGPUGpuGridValues 47*5ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. 48*5ffd83dbSDimitry Andric /// GRIDVAL[GV_Warp_Size] 49*5ffd83dbSDimitry Andric /// 50*5ffd83dbSDimitry Andric /// Example usage in libomptarget cuda plugin: 51*5ffd83dbSDimitry Andric /// #include "OMPGridValues.h" 52*5ffd83dbSDimitry Andric /// #define GRIDVAL NVPTXGpuGridValues 53*5ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. 54*5ffd83dbSDimitry Andric /// GRIDVAL[GV_Warp_Size] 55*5ffd83dbSDimitry Andric /// 56*5ffd83dbSDimitry Andric enum GVIDX { 57*5ffd83dbSDimitry Andric /// The maximum number of workers in a kernel. 58*5ffd83dbSDimitry Andric /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z 59*5ffd83dbSDimitry Andric GV_Threads, 60*5ffd83dbSDimitry Andric /// The size reserved for data in a shared memory slot. 61*5ffd83dbSDimitry Andric GV_Slot_Size, 62*5ffd83dbSDimitry Andric /// The default value of maximum number of threads in a worker warp. 63*5ffd83dbSDimitry Andric GV_Warp_Size, 64*5ffd83dbSDimitry Andric /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size 65*5ffd83dbSDimitry Andric /// for NVPTX. 66*5ffd83dbSDimitry Andric GV_Warp_Size_32, 67*5ffd83dbSDimitry Andric /// The number of bits required to represent the max number of threads in warp 68*5ffd83dbSDimitry Andric GV_Warp_Size_Log2, 69*5ffd83dbSDimitry Andric /// GV_Warp_Size * GV_Slot_Size, 70*5ffd83dbSDimitry Andric GV_Warp_Slot_Size, 71*5ffd83dbSDimitry Andric /// the maximum number of teams. 72*5ffd83dbSDimitry Andric GV_Max_Teams, 73*5ffd83dbSDimitry Andric /// Global Memory Alignment 74*5ffd83dbSDimitry Andric GV_Mem_Align, 75*5ffd83dbSDimitry Andric /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) 76*5ffd83dbSDimitry Andric GV_Warp_Size_Log2_Mask, 77*5ffd83dbSDimitry Andric // An alternative to the heavy data sharing infrastructure that uses global 78*5ffd83dbSDimitry Andric // memory is one that uses device __shared__ memory. The amount of such space 79*5ffd83dbSDimitry Andric // (in bytes) reserved by the OpenMP runtime is noted here. 80*5ffd83dbSDimitry Andric GV_SimpleBufferSize, 81*5ffd83dbSDimitry Andric // The absolute maximum team size for a working group 82*5ffd83dbSDimitry Andric GV_Max_WG_Size, 83*5ffd83dbSDimitry Andric // The default maximum team size for a working group 84*5ffd83dbSDimitry Andric GV_Default_WG_Size, 85*5ffd83dbSDimitry Andric // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. 86*5ffd83dbSDimitry Andric GV_Max_Warp_Number, 87*5ffd83dbSDimitry Andric /// The slot size that should be reserved for a working warp. 88*5ffd83dbSDimitry Andric /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) 89*5ffd83dbSDimitry Andric GV_Warp_Size_Log2_MaskL 90*5ffd83dbSDimitry Andric }; 91*5ffd83dbSDimitry Andric 92*5ffd83dbSDimitry Andric /// For AMDGPU GPUs 93*5ffd83dbSDimitry Andric static constexpr unsigned AMDGPUGpuGridValues[] = { 94*5ffd83dbSDimitry Andric 448, // GV_Threads 95*5ffd83dbSDimitry Andric 256, // GV_Slot_Size 96*5ffd83dbSDimitry Andric 64, // GV_Warp_Size 97*5ffd83dbSDimitry Andric 32, // GV_Warp_Size_32 98*5ffd83dbSDimitry Andric 6, // GV_Warp_Size_Log2 99*5ffd83dbSDimitry Andric 64 * 256, // GV_Warp_Slot_Size 100*5ffd83dbSDimitry Andric 128, // GV_Max_Teams 101*5ffd83dbSDimitry Andric 256, // GV_Mem_Align 102*5ffd83dbSDimitry Andric 63, // GV_Warp_Size_Log2_Mask 103*5ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize 104*5ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size, 105*5ffd83dbSDimitry Andric 256, // GV_Defaut_WG_Size 106*5ffd83dbSDimitry Andric 1024 / 64, // GV_Max_WG_Size / GV_WarpSize 107*5ffd83dbSDimitry Andric 63 // GV_Warp_Size_Log2_MaskL 108*5ffd83dbSDimitry Andric }; 109*5ffd83dbSDimitry Andric 110*5ffd83dbSDimitry Andric /// For Nvidia GPUs 111*5ffd83dbSDimitry Andric static constexpr unsigned NVPTXGpuGridValues[] = { 112*5ffd83dbSDimitry Andric 992, // GV_Threads 113*5ffd83dbSDimitry Andric 256, // GV_Slot_Size 114*5ffd83dbSDimitry Andric 32, // GV_Warp_Size 115*5ffd83dbSDimitry Andric 32, // GV_Warp_Size_32 116*5ffd83dbSDimitry Andric 5, // GV_Warp_Size_Log2 117*5ffd83dbSDimitry Andric 32 * 256, // GV_Warp_Slot_Size 118*5ffd83dbSDimitry Andric 1024, // GV_Max_Teams 119*5ffd83dbSDimitry Andric 256, // GV_Mem_Align 120*5ffd83dbSDimitry Andric (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask 121*5ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize 122*5ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size 123*5ffd83dbSDimitry Andric 128, // GV_Defaut_WG_Size 124*5ffd83dbSDimitry Andric 1024 / 32, // GV_Max_WG_Size / GV_WarpSize 125*5ffd83dbSDimitry Andric 31 // GV_Warp_Size_Log2_MaskL 126*5ffd83dbSDimitry Andric }; 127*5ffd83dbSDimitry Andric 128*5ffd83dbSDimitry Andric } // namespace omp 129*5ffd83dbSDimitry Andric } // namespace llvm 130*5ffd83dbSDimitry Andric 131*5ffd83dbSDimitry Andric #endif // LLVM_OPENMP_GRIDVALUES_H 132