15ffd83dbSDimitry Andric //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// 25ffd83dbSDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric /// 95ffd83dbSDimitry Andric /// \file 105ffd83dbSDimitry Andric /// \brief Provides definitions for Target specific Grid Values 115ffd83dbSDimitry Andric /// 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 14fe6060f1SDimitry Andric #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 15fe6060f1SDimitry Andric #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 165ffd83dbSDimitry Andric 175ffd83dbSDimitry Andric namespace llvm { 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric namespace omp { 205ffd83dbSDimitry Andric 215ffd83dbSDimitry Andric /// \brief Defines various target-specific GPU grid values that must be 225ffd83dbSDimitry Andric /// consistent between host RTL (plugin), device RTL, and clang. 235ffd83dbSDimitry Andric /// We can change grid values for a "fat" binary so that different 245ffd83dbSDimitry Andric /// passes get the correct values when generating code for a 255ffd83dbSDimitry Andric /// multi-target binary. Both amdgcn and nvptx values are stored in 265ffd83dbSDimitry Andric /// this file. In the future, should there be differences between GPUs 275ffd83dbSDimitry Andric /// of the same architecture, then simply make a different array and 285ffd83dbSDimitry Andric /// use the new array name. 295ffd83dbSDimitry Andric /// 305ffd83dbSDimitry Andric /// Example usage in clang: 31e8d8bef9SDimitry Andric /// const unsigned slot_size = 32349cc55cSDimitry Andric /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; 335ffd83dbSDimitry Andric /// 345ffd83dbSDimitry Andric /// Example usage in libomptarget/deviceRTLs: 35e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 365ffd83dbSDimitry Andric /// #ifdef __AMDGPU__ 37349cc55cSDimitry Andric /// #define GRIDVAL AMDGPUGridValues 385ffd83dbSDimitry Andric /// #else 39349cc55cSDimitry Andric /// #define GRIDVAL NVPTXGridValues 405ffd83dbSDimitry Andric /// #endif 415ffd83dbSDimitry Andric /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. 42349cc55cSDimitry Andric /// llvm::omp::GRIDVAL().GV_Warp_Size 435ffd83dbSDimitry Andric /// 445ffd83dbSDimitry Andric /// Example usage in libomptarget hsa plugin: 45e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 46349cc55cSDimitry Andric /// #define GRIDVAL AMDGPUGridValues 475ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. 48349cc55cSDimitry Andric /// llvm::omp::GRIDVAL().GV_Warp_Size 495ffd83dbSDimitry Andric /// 505ffd83dbSDimitry Andric /// Example usage in libomptarget cuda plugin: 51e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 52349cc55cSDimitry Andric /// #define GRIDVAL NVPTXGridValues 535ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. 54349cc55cSDimitry Andric /// llvm::omp::GRIDVAL().GV_Warp_Size 555ffd83dbSDimitry Andric /// 56349cc55cSDimitry Andric 57349cc55cSDimitry Andric struct GV { 585ffd83dbSDimitry Andric /// The size reserved for data in a shared memory slot. 59*bdd1243dSDimitry Andric unsigned GV_Slot_Size; 605ffd83dbSDimitry Andric /// The default value of maximum number of threads in a worker warp. 61*bdd1243dSDimitry Andric unsigned GV_Warp_Size; 62349cc55cSDimitry Andric 63349cc55cSDimitry Andric constexpr unsigned warpSlotSize() const { 64349cc55cSDimitry Andric return GV_Warp_Size * GV_Slot_Size; 65349cc55cSDimitry Andric } 66349cc55cSDimitry Andric 675ffd83dbSDimitry Andric /// the maximum number of teams. 68*bdd1243dSDimitry Andric unsigned GV_Max_Teams; 69*bdd1243dSDimitry Andric // The default number of teams in the absence of any other information. 70*bdd1243dSDimitry Andric unsigned GV_Default_Num_Teams; 71*bdd1243dSDimitry Andric 725ffd83dbSDimitry Andric // An alternative to the heavy data sharing infrastructure that uses global 735ffd83dbSDimitry Andric // memory is one that uses device __shared__ memory. The amount of such space 745ffd83dbSDimitry Andric // (in bytes) reserved by the OpenMP runtime is noted here. 75*bdd1243dSDimitry Andric unsigned GV_SimpleBufferSize; 765ffd83dbSDimitry Andric // The absolute maximum team size for a working group 77*bdd1243dSDimitry Andric unsigned GV_Max_WG_Size; 785ffd83dbSDimitry Andric // The default maximum team size for a working group 79*bdd1243dSDimitry Andric unsigned GV_Default_WG_Size; 80349cc55cSDimitry Andric 81349cc55cSDimitry Andric constexpr unsigned maxWarpNumber() const { 82349cc55cSDimitry Andric return GV_Max_WG_Size / GV_Warp_Size; 83349cc55cSDimitry Andric } 845ffd83dbSDimitry Andric }; 855ffd83dbSDimitry Andric 865ffd83dbSDimitry Andric /// For AMDGPU GPUs 87349cc55cSDimitry Andric static constexpr GV AMDGPUGridValues64 = { 885ffd83dbSDimitry Andric 256, // GV_Slot_Size 895ffd83dbSDimitry Andric 64, // GV_Warp_Size 90*bdd1243dSDimitry Andric (1 << 16), // GV_Max_Teams 91*bdd1243dSDimitry Andric 440, // GV_Default_Num_Teams 925ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize 935ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size, 94349cc55cSDimitry Andric 256, // GV_Default_WG_Size 955ffd83dbSDimitry Andric }; 965ffd83dbSDimitry Andric 97349cc55cSDimitry Andric static constexpr GV AMDGPUGridValues32 = { 985ffd83dbSDimitry Andric 256, // GV_Slot_Size 995ffd83dbSDimitry Andric 32, // GV_Warp_Size 100*bdd1243dSDimitry Andric (1 << 16), // GV_Max_Teams 101*bdd1243dSDimitry Andric 440, // GV_Default_Num_Teams 102349cc55cSDimitry Andric 896, // GV_SimpleBufferSize 103349cc55cSDimitry Andric 1024, // GV_Max_WG_Size, 104349cc55cSDimitry Andric 256, // GV_Default_WG_Size 105349cc55cSDimitry Andric }; 106349cc55cSDimitry Andric 107349cc55cSDimitry Andric template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() { 108*bdd1243dSDimitry Andric static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize"); 109349cc55cSDimitry Andric return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; 110349cc55cSDimitry Andric } 111349cc55cSDimitry Andric 112349cc55cSDimitry Andric /// For Nvidia GPUs 113349cc55cSDimitry Andric static constexpr GV NVPTXGridValues = { 114349cc55cSDimitry Andric 256, // GV_Slot_Size 115349cc55cSDimitry Andric 32, // GV_Warp_Size 116*bdd1243dSDimitry Andric (1 << 16), // GV_Max_Teams 117*bdd1243dSDimitry Andric 3200, // GV_Default_Num_Teams 1185ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize 1195ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size 120349cc55cSDimitry Andric 128, // GV_Default_WG_Size 1215ffd83dbSDimitry Andric }; 1225ffd83dbSDimitry Andric 1235ffd83dbSDimitry Andric } // namespace omp 1245ffd83dbSDimitry Andric } // namespace llvm 1255ffd83dbSDimitry Andric 126fe6060f1SDimitry Andric #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H 127