Frontend/OpenMP/OMPGridValues.h

4022bc2aSSaiyedul Islam//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
4022bc2aSSaiyedul Islam//
c66e0910SJonChesterfield// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
c66e0910SJonChesterfield// See https://llvm.org/LICENSE.txt for license information.
c66e0910SJonChesterfield// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4022bc2aSSaiyedul Islam//
4022bc2aSSaiyedul Islam//===----------------------------------------------------------------------===//
4022bc2aSSaiyedul Islam///
4022bc2aSSaiyedul Islam/// \file
4022bc2aSSaiyedul Islam/// \brief Provides definitions for Target specific Grid Values
4022bc2aSSaiyedul Islam///
4022bc2aSSaiyedul Islam//===----------------------------------------------------------------------===//
4022bc2aSSaiyedul Islam
aa5c09beSKazu Hirata#ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
aa5c09beSKazu Hirata#define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
4022bc2aSSaiyedul Islam
4022bc2aSSaiyedul Islamnamespace llvm {
4022bc2aSSaiyedul Islam
4022bc2aSSaiyedul Islamnamespace omp {
4022bc2aSSaiyedul Islam
4022bc2aSSaiyedul Islam/// \brief Defines various target-specific GPU grid values that must be
4022bc2aSSaiyedul Islam///        consistent between host RTL (plugin), device RTL, and clang.
4022bc2aSSaiyedul Islam///        We can change grid values for a "fat" binary so that different
4022bc2aSSaiyedul Islam///        passes get the correct values when generating code for a
4022bc2aSSaiyedul Islam///        multi-target binary. Both amdgcn and nvptx values are stored in
4022bc2aSSaiyedul Islam///        this file. In the future, should there be differences between GPUs
4022bc2aSSaiyedul Islam///        of the same architecture, then simply make a different array and
4022bc2aSSaiyedul Islam///        use the new array name.
4022bc2aSSaiyedul Islam///
4022bc2aSSaiyedul Islam/// Example usage in clang:
cb319b1bSSaiyedul Islam///   const unsigned slot_size =
77579b99SJon Chesterfield///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
4022bc2aSSaiyedul Islam///
4022bc2aSSaiyedul Islam/// Example usage in libomptarget/deviceRTLs:
cb319b1bSSaiyedul Islam///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
4022bc2aSSaiyedul Islam///   #ifdef __AMDGPU__
77579b99SJon Chesterfield///     #define GRIDVAL AMDGPUGridValues
4022bc2aSSaiyedul Islam///   #else
77579b99SJon Chesterfield///     #define GRIDVAL NVPTXGridValues
4022bc2aSSaiyedul Islam///   #endif
4022bc2aSSaiyedul Islam///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
77579b99SJon Chesterfield///   llvm::omp::GRIDVAL().GV_Warp_Size
4022bc2aSSaiyedul Islam///
4022bc2aSSaiyedul Islam/// Example usage in libomptarget hsa plugin:
cb319b1bSSaiyedul Islam///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
77579b99SJon Chesterfield///   #define GRIDVAL AMDGPUGridValues
4022bc2aSSaiyedul Islam///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
77579b99SJon Chesterfield///   llvm::omp::GRIDVAL().GV_Warp_Size
4022bc2aSSaiyedul Islam///
4022bc2aSSaiyedul Islam/// Example usage in libomptarget cuda plugin:
cb319b1bSSaiyedul Islam///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
77579b99SJon Chesterfield///    #define GRIDVAL NVPTXGridValues
4022bc2aSSaiyedul Islam///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
77579b99SJon Chesterfield///    llvm::omp::GRIDVAL().GV_Warp_Size
4022bc2aSSaiyedul Islam///
77579b99SJon Chesterfield
77579b99SJon Chesterfieldstruct GV {
4022bc2aSSaiyedul Islam  /// The size reserved for data in a shared memory slot.
84690419SKevin Sala  unsigned GV_Slot_Size;
4022bc2aSSaiyedul Islam  /// The default value of maximum number of threads in a worker warp.
84690419SKevin Sala  unsigned GV_Warp_Size;
c2574e63SJon Chesterfield
c2574e63SJon Chesterfield  constexpr unsigned warpSlotSize() const {
c2574e63SJon Chesterfield    return GV_Warp_Size * GV_Slot_Size;
c2574e63SJon Chesterfield  }
c2574e63SJon Chesterfield
4022bc2aSSaiyedul Islam  /// the maximum number of teams.
84690419SKevin Sala  unsigned GV_Max_Teams;
*fb2c42dfSJohannes Doerfert  // The default number of teams in the absence of any other information.
*fb2c42dfSJohannes Doerfert  unsigned GV_Default_Num_Teams;
*fb2c42dfSJohannes Doerfert
4022bc2aSSaiyedul Islam  // An alternative to the heavy data sharing infrastructure that uses global
4022bc2aSSaiyedul Islam  // memory is one that uses device __shared__ memory.  The amount of such space
4022bc2aSSaiyedul Islam  // (in bytes) reserved by the OpenMP runtime is noted here.
84690419SKevin Sala  unsigned GV_SimpleBufferSize;
4022bc2aSSaiyedul Islam  // The absolute maximum team size for a working group
84690419SKevin Sala  unsigned GV_Max_WG_Size;
4022bc2aSSaiyedul Islam  // The default maximum team size for a working group
84690419SKevin Sala  unsigned GV_Default_WG_Size;
c2574e63SJon Chesterfield
c2574e63SJon Chesterfield  constexpr unsigned maxWarpNumber() const {
c2574e63SJon Chesterfield    return GV_Max_WG_Size / GV_Warp_Size;
c2574e63SJon Chesterfield  }
4022bc2aSSaiyedul Islam};
4022bc2aSSaiyedul Islam
4022bc2aSSaiyedul Islam/// For AMDGPU GPUs
78f92c38SJon Chesterfieldstatic constexpr GV AMDGPUGridValues64 = {
4022bc2aSSaiyedul Islam    256,       // GV_Slot_Size
4022bc2aSSaiyedul Islam    64,        // GV_Warp_Size
2e9c3fe6SKevin Sala    (1 << 16), // GV_Max_Teams
*fb2c42dfSJohannes Doerfert    440,       // GV_Default_Num_Teams
4022bc2aSSaiyedul Islam    896,       // GV_SimpleBufferSize
4022bc2aSSaiyedul Islam    1024,      // GV_Max_WG_Size,
c2574e63SJon Chesterfield    256,       // GV_Default_WG_Size
4022bc2aSSaiyedul Islam};
4022bc2aSSaiyedul Islam
78f92c38SJon Chesterfieldstatic constexpr GV AMDGPUGridValues32 = {
78f92c38SJon Chesterfield    256,       // GV_Slot_Size
78f92c38SJon Chesterfield    32,        // GV_Warp_Size
2e9c3fe6SKevin Sala    (1 << 16), // GV_Max_Teams
*fb2c42dfSJohannes Doerfert    440,       // GV_Default_Num_Teams
78f92c38SJon Chesterfield    896,       // GV_SimpleBufferSize
78f92c38SJon Chesterfield    1024,      // GV_Max_WG_Size,
78f92c38SJon Chesterfield    256,       // GV_Default_WG_Size
78f92c38SJon Chesterfield};
78f92c38SJon Chesterfield
78f92c38SJon Chesterfieldtemplate <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
44c734afSDan Palermo  static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
78f92c38SJon Chesterfield  return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
78f92c38SJon Chesterfield}
78f92c38SJon Chesterfield
4022bc2aSSaiyedul Islam/// For Nvidia GPUs
77579b99SJon Chesterfieldstatic constexpr GV NVPTXGridValues = {
4022bc2aSSaiyedul Islam    256,       // GV_Slot_Size
4022bc2aSSaiyedul Islam    32,        // GV_Warp_Size
2e9c3fe6SKevin Sala    (1 << 16), // GV_Max_Teams
*fb2c42dfSJohannes Doerfert    3200,      // GV_Default_Num_Teams
4022bc2aSSaiyedul Islam    896,       // GV_SimpleBufferSize
4022bc2aSSaiyedul Islam    1024,      // GV_Max_WG_Size
c2574e63SJon Chesterfield    128,       // GV_Default_WG_Size
4022bc2aSSaiyedul Islam};
4022bc2aSSaiyedul Islam
4022bc2aSSaiyedul Islam} // namespace omp
4022bc2aSSaiyedul Islam} // namespace llvm
4022bc2aSSaiyedul Islam
aa5c09beSKazu Hirata#endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H