15ffd83dbSDimitry Andric //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
25ffd83dbSDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric ///
95ffd83dbSDimitry Andric /// \file
105ffd83dbSDimitry Andric /// \brief Provides definitions for Target specific Grid Values
115ffd83dbSDimitry Andric ///
125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
135ffd83dbSDimitry Andric
14fe6060f1SDimitry Andric #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
15fe6060f1SDimitry Andric #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
165ffd83dbSDimitry Andric
175ffd83dbSDimitry Andric namespace llvm {
185ffd83dbSDimitry Andric
195ffd83dbSDimitry Andric namespace omp {
205ffd83dbSDimitry Andric
215ffd83dbSDimitry Andric /// \brief Defines various target-specific GPU grid values that must be
225ffd83dbSDimitry Andric /// consistent between host RTL (plugin), device RTL, and clang.
235ffd83dbSDimitry Andric /// We can change grid values for a "fat" binary so that different
245ffd83dbSDimitry Andric /// passes get the correct values when generating code for a
255ffd83dbSDimitry Andric /// multi-target binary. Both amdgcn and nvptx values are stored in
265ffd83dbSDimitry Andric /// this file. In the future, should there be differences between GPUs
275ffd83dbSDimitry Andric /// of the same architecture, then simply make a different array and
285ffd83dbSDimitry Andric /// use the new array name.
295ffd83dbSDimitry Andric ///
305ffd83dbSDimitry Andric /// Example usage in clang:
31e8d8bef9SDimitry Andric /// const unsigned slot_size =
32349cc55cSDimitry Andric /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
335ffd83dbSDimitry Andric ///
345ffd83dbSDimitry Andric /// Example usage in libomptarget/deviceRTLs:
35e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
365ffd83dbSDimitry Andric /// #ifdef __AMDGPU__
37349cc55cSDimitry Andric /// #define GRIDVAL AMDGPUGridValues
385ffd83dbSDimitry Andric /// #else
39349cc55cSDimitry Andric /// #define GRIDVAL NVPTXGridValues
405ffd83dbSDimitry Andric /// #endif
415ffd83dbSDimitry Andric /// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
42349cc55cSDimitry Andric /// llvm::omp::GRIDVAL().GV_Warp_Size
435ffd83dbSDimitry Andric ///
445ffd83dbSDimitry Andric /// Example usage in libomptarget hsa plugin:
45e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
46349cc55cSDimitry Andric /// #define GRIDVAL AMDGPUGridValues
475ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
48349cc55cSDimitry Andric /// llvm::omp::GRIDVAL().GV_Warp_Size
495ffd83dbSDimitry Andric ///
505ffd83dbSDimitry Andric /// Example usage in libomptarget cuda plugin:
51e8d8bef9SDimitry Andric /// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
52349cc55cSDimitry Andric /// #define GRIDVAL NVPTXGridValues
535ffd83dbSDimitry Andric /// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
54349cc55cSDimitry Andric /// llvm::omp::GRIDVAL().GV_Warp_Size
555ffd83dbSDimitry Andric ///
56349cc55cSDimitry Andric
57349cc55cSDimitry Andric struct GV {
585ffd83dbSDimitry Andric /// The size reserved for data in a shared memory slot.
59*bdd1243dSDimitry Andric unsigned GV_Slot_Size;
605ffd83dbSDimitry Andric /// The default value of maximum number of threads in a worker warp.
61*bdd1243dSDimitry Andric unsigned GV_Warp_Size;
62349cc55cSDimitry Andric
warpSlotSizeGV63349cc55cSDimitry Andric constexpr unsigned warpSlotSize() const {
64349cc55cSDimitry Andric return GV_Warp_Size * GV_Slot_Size;
65349cc55cSDimitry Andric }
66349cc55cSDimitry Andric
675ffd83dbSDimitry Andric /// the maximum number of teams.
68*bdd1243dSDimitry Andric unsigned GV_Max_Teams;
69*bdd1243dSDimitry Andric // The default number of teams in the absence of any other information.
70*bdd1243dSDimitry Andric unsigned GV_Default_Num_Teams;
71*bdd1243dSDimitry Andric
725ffd83dbSDimitry Andric // An alternative to the heavy data sharing infrastructure that uses global
735ffd83dbSDimitry Andric // memory is one that uses device __shared__ memory. The amount of such space
745ffd83dbSDimitry Andric // (in bytes) reserved by the OpenMP runtime is noted here.
75*bdd1243dSDimitry Andric unsigned GV_SimpleBufferSize;
765ffd83dbSDimitry Andric // The absolute maximum team size for a working group
77*bdd1243dSDimitry Andric unsigned GV_Max_WG_Size;
785ffd83dbSDimitry Andric // The default maximum team size for a working group
79*bdd1243dSDimitry Andric unsigned GV_Default_WG_Size;
80349cc55cSDimitry Andric
maxWarpNumberGV81349cc55cSDimitry Andric constexpr unsigned maxWarpNumber() const {
82349cc55cSDimitry Andric return GV_Max_WG_Size / GV_Warp_Size;
83349cc55cSDimitry Andric }
845ffd83dbSDimitry Andric };
855ffd83dbSDimitry Andric
865ffd83dbSDimitry Andric /// For AMDGPU GPUs
87349cc55cSDimitry Andric static constexpr GV AMDGPUGridValues64 = {
885ffd83dbSDimitry Andric 256, // GV_Slot_Size
895ffd83dbSDimitry Andric 64, // GV_Warp_Size
90*bdd1243dSDimitry Andric (1 << 16), // GV_Max_Teams
91*bdd1243dSDimitry Andric 440, // GV_Default_Num_Teams
925ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize
935ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size,
94349cc55cSDimitry Andric 256, // GV_Default_WG_Size
955ffd83dbSDimitry Andric };
965ffd83dbSDimitry Andric
97349cc55cSDimitry Andric static constexpr GV AMDGPUGridValues32 = {
985ffd83dbSDimitry Andric 256, // GV_Slot_Size
995ffd83dbSDimitry Andric 32, // GV_Warp_Size
100*bdd1243dSDimitry Andric (1 << 16), // GV_Max_Teams
101*bdd1243dSDimitry Andric 440, // GV_Default_Num_Teams
102349cc55cSDimitry Andric 896, // GV_SimpleBufferSize
103349cc55cSDimitry Andric 1024, // GV_Max_WG_Size,
104349cc55cSDimitry Andric 256, // GV_Default_WG_Size
105349cc55cSDimitry Andric };
106349cc55cSDimitry Andric
getAMDGPUGridValues()107349cc55cSDimitry Andric template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
108*bdd1243dSDimitry Andric static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
109349cc55cSDimitry Andric return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
110349cc55cSDimitry Andric }
111349cc55cSDimitry Andric
112349cc55cSDimitry Andric /// For Nvidia GPUs
113349cc55cSDimitry Andric static constexpr GV NVPTXGridValues = {
114349cc55cSDimitry Andric 256, // GV_Slot_Size
115349cc55cSDimitry Andric 32, // GV_Warp_Size
116*bdd1243dSDimitry Andric (1 << 16), // GV_Max_Teams
117*bdd1243dSDimitry Andric 3200, // GV_Default_Num_Teams
1185ffd83dbSDimitry Andric 896, // GV_SimpleBufferSize
1195ffd83dbSDimitry Andric 1024, // GV_Max_WG_Size
120349cc55cSDimitry Andric 128, // GV_Default_WG_Size
1215ffd83dbSDimitry Andric };
1225ffd83dbSDimitry Andric
1235ffd83dbSDimitry Andric } // namespace omp
1245ffd83dbSDimitry Andric } // namespace llvm
1255ffd83dbSDimitry Andric
126fe6060f1SDimitry Andric #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
127