Frontend/OpenMP/OMPGridValues.h

5ffd83dbSDimitry Andric//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
5ffd83dbSDimitry Andric//
e8d8bef9SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
e8d8bef9SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
e8d8bef9SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5ffd83dbSDimitry Andric//
5ffd83dbSDimitry Andric//===----------------------------------------------------------------------===//
5ffd83dbSDimitry Andric///
5ffd83dbSDimitry Andric/// \file
5ffd83dbSDimitry Andric/// \brief Provides definitions for Target specific Grid Values
5ffd83dbSDimitry Andric///
5ffd83dbSDimitry Andric//===----------------------------------------------------------------------===//
5ffd83dbSDimitry Andric
fe6060f1SDimitry Andric#ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
fe6060f1SDimitry Andric#define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andricnamespace llvm {
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andricnamespace omp {
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric/// \brief Defines various target-specific GPU grid values that must be
5ffd83dbSDimitry Andric///        consistent between host RTL (plugin), device RTL, and clang.
5ffd83dbSDimitry Andric///        We can change grid values for a "fat" binary so that different
5ffd83dbSDimitry Andric///        passes get the correct values when generating code for a
5ffd83dbSDimitry Andric///        multi-target binary. Both amdgcn and nvptx values are stored in
5ffd83dbSDimitry Andric///        this file. In the future, should there be differences between GPUs
5ffd83dbSDimitry Andric///        of the same architecture, then simply make a different array and
5ffd83dbSDimitry Andric///        use the new array name.
5ffd83dbSDimitry Andric///
5ffd83dbSDimitry Andric/// Example usage in clang:
e8d8bef9SDimitry Andric///   const unsigned slot_size =
349cc55cSDimitry Andric///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
5ffd83dbSDimitry Andric///
5ffd83dbSDimitry Andric/// Example usage in libomptarget/deviceRTLs:
e8d8bef9SDimitry Andric///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
5ffd83dbSDimitry Andric///   #ifdef __AMDGPU__
349cc55cSDimitry Andric///     #define GRIDVAL AMDGPUGridValues
5ffd83dbSDimitry Andric///   #else
349cc55cSDimitry Andric///     #define GRIDVAL NVPTXGridValues
5ffd83dbSDimitry Andric///   #endif
5ffd83dbSDimitry Andric///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
349cc55cSDimitry Andric///   llvm::omp::GRIDVAL().GV_Warp_Size
5ffd83dbSDimitry Andric///
5ffd83dbSDimitry Andric/// Example usage in libomptarget hsa plugin:
e8d8bef9SDimitry Andric///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
349cc55cSDimitry Andric///   #define GRIDVAL AMDGPUGridValues
5ffd83dbSDimitry Andric///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
349cc55cSDimitry Andric///   llvm::omp::GRIDVAL().GV_Warp_Size
5ffd83dbSDimitry Andric///
5ffd83dbSDimitry Andric/// Example usage in libomptarget cuda plugin:
e8d8bef9SDimitry Andric///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
349cc55cSDimitry Andric///    #define GRIDVAL NVPTXGridValues
5ffd83dbSDimitry Andric///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
349cc55cSDimitry Andric///    llvm::omp::GRIDVAL().GV_Warp_Size
5ffd83dbSDimitry Andric///
349cc55cSDimitry Andric
349cc55cSDimitry Andricstruct GV {
5ffd83dbSDimitry Andric  /// The size reserved for data in a shared memory slot.
*bdd1243dSDimitry Andric  unsigned GV_Slot_Size;
5ffd83dbSDimitry Andric  /// The default value of maximum number of threads in a worker warp.
*bdd1243dSDimitry Andric  unsigned GV_Warp_Size;
349cc55cSDimitry Andric
349cc55cSDimitry Andric  constexpr unsigned warpSlotSize() const {
349cc55cSDimitry Andric    return GV_Warp_Size * GV_Slot_Size;
349cc55cSDimitry Andric  }
349cc55cSDimitry Andric
5ffd83dbSDimitry Andric  /// the maximum number of teams.
*bdd1243dSDimitry Andric  unsigned GV_Max_Teams;
*bdd1243dSDimitry Andric  // The default number of teams in the absence of any other information.
*bdd1243dSDimitry Andric  unsigned GV_Default_Num_Teams;
*bdd1243dSDimitry Andric
5ffd83dbSDimitry Andric  // An alternative to the heavy data sharing infrastructure that uses global
5ffd83dbSDimitry Andric  // memory is one that uses device __shared__ memory.  The amount of such space
5ffd83dbSDimitry Andric  // (in bytes) reserved by the OpenMP runtime is noted here.
*bdd1243dSDimitry Andric  unsigned GV_SimpleBufferSize;
5ffd83dbSDimitry Andric  // The absolute maximum team size for a working group
*bdd1243dSDimitry Andric  unsigned GV_Max_WG_Size;
5ffd83dbSDimitry Andric  // The default maximum team size for a working group
*bdd1243dSDimitry Andric  unsigned GV_Default_WG_Size;
349cc55cSDimitry Andric
349cc55cSDimitry Andric  constexpr unsigned maxWarpNumber() const {
349cc55cSDimitry Andric    return GV_Max_WG_Size / GV_Warp_Size;
349cc55cSDimitry Andric  }
5ffd83dbSDimitry Andric};
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric/// For AMDGPU GPUs
349cc55cSDimitry Andricstatic constexpr GV AMDGPUGridValues64 = {
5ffd83dbSDimitry Andric    256,  // GV_Slot_Size
5ffd83dbSDimitry Andric    64,   // GV_Warp_Size
*bdd1243dSDimitry Andric    (1 << 16), // GV_Max_Teams
*bdd1243dSDimitry Andric    440,  // GV_Default_Num_Teams
5ffd83dbSDimitry Andric    896,  // GV_SimpleBufferSize
5ffd83dbSDimitry Andric    1024, // GV_Max_WG_Size,
349cc55cSDimitry Andric    256,  // GV_Default_WG_Size
5ffd83dbSDimitry Andric};
5ffd83dbSDimitry Andric
349cc55cSDimitry Andricstatic constexpr GV AMDGPUGridValues32 = {
5ffd83dbSDimitry Andric    256,  // GV_Slot_Size
5ffd83dbSDimitry Andric    32,   // GV_Warp_Size
*bdd1243dSDimitry Andric    (1 << 16), // GV_Max_Teams
*bdd1243dSDimitry Andric    440,  // GV_Default_Num_Teams
349cc55cSDimitry Andric    896,  // GV_SimpleBufferSize
349cc55cSDimitry Andric    1024, // GV_Max_WG_Size,
349cc55cSDimitry Andric    256,  // GV_Default_WG_Size
349cc55cSDimitry Andric};
349cc55cSDimitry Andric
349cc55cSDimitry Andrictemplate <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
*bdd1243dSDimitry Andric  static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
349cc55cSDimitry Andric  return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
349cc55cSDimitry Andric}
349cc55cSDimitry Andric
349cc55cSDimitry Andric/// For Nvidia GPUs
349cc55cSDimitry Andricstatic constexpr GV NVPTXGridValues = {
349cc55cSDimitry Andric    256,  // GV_Slot_Size
349cc55cSDimitry Andric    32,   // GV_Warp_Size
*bdd1243dSDimitry Andric    (1 << 16), // GV_Max_Teams
*bdd1243dSDimitry Andric    3200, // GV_Default_Num_Teams
5ffd83dbSDimitry Andric    896,  // GV_SimpleBufferSize
5ffd83dbSDimitry Andric    1024, // GV_Max_WG_Size
349cc55cSDimitry Andric    128,  // GV_Default_WG_Size
5ffd83dbSDimitry Andric};
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric} // namespace omp
5ffd83dbSDimitry Andric} // namespace llvm
5ffd83dbSDimitry Andric
fe6060f1SDimitry Andric#endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H