15ffd83dbSDimitry Andric //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// 25ffd83dbSDimitry Andric // 3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric /// 95ffd83dbSDimitry Andric /// \file 105ffd83dbSDimitry Andric /// \brief Provides definitions for Target specific Grid Values 115ffd83dbSDimitry Andric /// 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 145ffd83dbSDimitry Andric #ifndef LLVM_OPENMP_GRIDVALUES_H 155ffd83dbSDimitry Andric #define LLVM_OPENMP_GRIDVALUES_H 165ffd83dbSDimitry Andric 175ffd83dbSDimitry Andric namespace llvm { 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric namespace omp { 205ffd83dbSDimitry Andric 215ffd83dbSDimitry Andric /// \brief Defines various target-specific GPU grid values that must be 225ffd83dbSDimitry Andric /// consistent between host RTL (plugin), device RTL, and clang. 235ffd83dbSDimitry Andric /// We can change grid values for a "fat" binary so that different 245ffd83dbSDimitry Andric /// passes get the correct values when generating code for a 255ffd83dbSDimitry Andric /// multi-target binary. Both amdgcn and nvptx values are stored in 265ffd83dbSDimitry Andric /// this file. In the future, should there be differences between GPUs 275ffd83dbSDimitry Andric /// of the same architecture, then simply make a different array and 285ffd83dbSDimitry Andric /// use the new array name. 295ffd83dbSDimitry Andric /// 305ffd83dbSDimitry Andric /// Example usage in clang: 31*e8d8bef9SDimitry Andric /// const unsigned slot_size = 32*e8d8bef9SDimitry Andric /// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size); 335ffd83dbSDimitry Andric /// 345ffd83dbSDimitry Andric /// Example usage in libomptarget/deviceRTLs: 35*e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 365ffd83dbSDimitry Andric /// #ifdef __AMDGPU__ 375ffd83dbSDimitry Andric /// #define GRIDVAL AMDGPUGpuGridValues 385ffd83dbSDimitry Andric /// #else 395ffd83dbSDimitry Andric /// #define GRIDVAL NVPTXGpuGridValues 405ffd83dbSDimitry Andric /// #endif 415ffd83dbSDimitry Andric /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. 42*e8d8bef9SDimitry Andric /// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] 435ffd83dbSDimitry Andric /// 445ffd83dbSDimitry Andric /// Example usage in libomptarget hsa plugin: 45*e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 465ffd83dbSDimitry Andric /// #define GRIDVAL AMDGPUGpuGridValues 475ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. 48*e8d8bef9SDimitry Andric /// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] 495ffd83dbSDimitry Andric /// 505ffd83dbSDimitry Andric /// Example usage in libomptarget cuda plugin: 51*e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" 525ffd83dbSDimitry Andric /// #define GRIDVAL NVPTXGpuGridValues 535ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. 54*e8d8bef9SDimitry Andric /// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] 555ffd83dbSDimitry Andric /// 565ffd83dbSDimitry Andric enum GVIDX { 575ffd83dbSDimitry Andric /// The maximum number of workers in a kernel. 585ffd83dbSDimitry Andric /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z 595ffd83dbSDimitry Andric GV_Threads, 605ffd83dbSDimitry Andric /// The size reserved for data in a shared memory slot. 615ffd83dbSDimitry Andric GV_Slot_Size, 625ffd83dbSDimitry Andric /// The default value of maximum number of threads in a worker warp. 635ffd83dbSDimitry Andric GV_Warp_Size, 645ffd83dbSDimitry Andric /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size 655ffd83dbSDimitry Andric /// for NVPTX. 665ffd83dbSDimitry Andric GV_Warp_Size_32, 675ffd83dbSDimitry Andric /// The number of bits required to represent the max number of threads in warp 685ffd83dbSDimitry Andric GV_Warp_Size_Log2, 695ffd83dbSDimitry Andric /// GV_Warp_Size * GV_Slot_Size, 705ffd83dbSDimitry Andric GV_Warp_Slot_Size, 715ffd83dbSDimitry Andric /// the maximum number of teams. 725ffd83dbSDimitry Andric GV_Max_Teams, 735ffd83dbSDimitry Andric /// Global Memory Alignment 745ffd83dbSDimitry Andric GV_Mem_Align, 755ffd83dbSDimitry Andric /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) 765ffd83dbSDimitry Andric GV_Warp_Size_Log2_Mask, 775ffd83dbSDimitry Andric // An alternative to the heavy data sharing infrastructure that uses global 785ffd83dbSDimitry Andric // memory is one that uses device __shared__ memory. The amount of such space 795ffd83dbSDimitry Andric // (in bytes) reserved by the OpenMP runtime is noted here. 805ffd83dbSDimitry Andric GV_SimpleBufferSize, 815ffd83dbSDimitry Andric // The absolute maximum team size for a working group 825ffd83dbSDimitry Andric GV_Max_WG_Size, 835ffd83dbSDimitry Andric // The default maximum team size for a working group 845ffd83dbSDimitry Andric GV_Default_WG_Size, 855ffd83dbSDimitry Andric // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. 865ffd83dbSDimitry Andric GV_Max_Warp_Number, 875ffd83dbSDimitry Andric /// The slot size that should be reserved for a working warp. 885ffd83dbSDimitry Andric /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) 895ffd83dbSDimitry Andric GV_Warp_Size_Log2_MaskL 905ffd83dbSDimitry Andric }; 915ffd83dbSDimitry Andric 925ffd83dbSDimitry Andric /// For AMDGPU GPUs 935ffd83dbSDimitry Andric static constexpr unsigned AMDGPUGpuGridValues[] = { 945ffd83dbSDimitry Andric 448, // GV_Threads 955ffd83dbSDimitry Andric 256, // GV_Slot_Size 965ffd83dbSDimitry Andric 64, // GV_Warp_Size 975ffd83dbSDimitry Andric 32, // GV_Warp_Size_32 985ffd83dbSDimitry Andric 6, // GV_Warp_Size_Log2 995ffd83dbSDimitry Andric 64 * 256, // GV_Warp_Slot_Size 1005ffd83dbSDimitry Andric 128, // GV_Max_Teams 1015ffd83dbSDimitry Andric 256, // GV_Mem_Align 1025ffd83dbSDimitry Andric 63, // GV_Warp_Size_Log2_Mask 1035ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize 1045ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size, 1055ffd83dbSDimitry Andric 256, // GV_Defaut_WG_Size 1065ffd83dbSDimitry Andric 1024 / 64, // GV_Max_WG_Size / GV_WarpSize 1075ffd83dbSDimitry Andric 63 // GV_Warp_Size_Log2_MaskL 1085ffd83dbSDimitry Andric }; 1095ffd83dbSDimitry Andric 1105ffd83dbSDimitry Andric /// For Nvidia GPUs 1115ffd83dbSDimitry Andric static constexpr unsigned NVPTXGpuGridValues[] = { 1125ffd83dbSDimitry Andric 992, // GV_Threads 1135ffd83dbSDimitry Andric 256, // GV_Slot_Size 1145ffd83dbSDimitry Andric 32, // GV_Warp_Size 1155ffd83dbSDimitry Andric 32, // GV_Warp_Size_32 1165ffd83dbSDimitry Andric 5, // GV_Warp_Size_Log2 1175ffd83dbSDimitry Andric 32 * 256, // GV_Warp_Slot_Size 1185ffd83dbSDimitry Andric 1024, // GV_Max_Teams 1195ffd83dbSDimitry Andric 256, // GV_Mem_Align 1205ffd83dbSDimitry Andric (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask 1215ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize 1225ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size 1235ffd83dbSDimitry Andric 128, // GV_Defaut_WG_Size 1245ffd83dbSDimitry Andric 1024 / 32, // GV_Max_WG_Size / GV_WarpSize 1255ffd83dbSDimitry Andric 31 // GV_Warp_Size_Log2_MaskL 1265ffd83dbSDimitry Andric }; 1275ffd83dbSDimitry Andric 1285ffd83dbSDimitry Andric } // namespace omp 1295ffd83dbSDimitry Andric } // namespace llvm 1305ffd83dbSDimitry Andric 1315ffd83dbSDimitry Andric #endif // LLVM_OPENMP_GRIDVALUES_H 132