xref: /llvm-project/offload/DeviceRTL/src/Kernel.cpp (revision 08533a3ee8f3a09a59cf6ac3be59198b26b7f739)
1 //===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the kernel entry points for the device.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "Shared/Environment.h"
14 
15 #include "Allocator.h"
16 #include "Debug.h"
17 #include "DeviceTypes.h"
18 #include "Interface.h"
19 #include "Mapping.h"
20 #include "State.h"
21 #include "Synchronization.h"
22 #include "Workshare.h"
23 
24 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
25 
26 using namespace ompx;
27 
28 #pragma omp begin declare target device_type(nohost)
29 
30 static void
31 inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
32                     KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
33   // Order is important here.
34   synchronize::init(IsSPMD);
35   mapping::init(IsSPMD);
36   state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment);
37   allocator::init(IsSPMD, KernelEnvironment);
38   workshare::init(IsSPMD);
39 }
40 
41 /// Simple generic state machine for worker threads.
42 static void genericStateMachine(IdentTy *Ident) {
43   uint32_t TId = mapping::getThreadIdInBlock();
44 
45   do {
46     ParallelRegionFnTy WorkFn = nullptr;
47 
48     // Wait for the signal that we have a new work function.
49     synchronize::threads(atomic::seq_cst);
50 
51     // Retrieve the work function from the runtime.
52     bool IsActive = __kmpc_kernel_parallel(&WorkFn);
53 
54     // If there is nothing more to do, break out of the state machine by
55     // returning to the caller.
56     if (!WorkFn)
57       return;
58 
59     if (IsActive) {
60       ASSERT(!mapping::isSPMDMode(), nullptr);
61       ((void (*)(uint32_t, uint32_t))WorkFn)(0, TId);
62       __kmpc_kernel_end_parallel();
63     }
64 
65     synchronize::threads(atomic::seq_cst);
66 
67   } while (true);
68 }
69 
70 extern "C" {
71 
72 /// Initialization
73 ///
74 /// \param Ident               Source location identification, can be NULL.
75 ///
76 int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
77                            KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
78   ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
79   bool IsSPMD = Configuration.ExecMode &
80                 llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
81   bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
82   if (IsSPMD) {
83     inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
84                         KernelLaunchEnvironment);
85     synchronize::threadsAligned(atomic::relaxed);
86   } else {
87     inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
88                         KernelLaunchEnvironment);
89     // No need to wait since only the main threads will execute user
90     // code and workers will run into a barrier right away.
91   }
92 
93   if (IsSPMD) {
94     state::assumeInitialState(IsSPMD);
95 
96     // Synchronize to ensure the assertions above are in an aligned region.
97     // The barrier is eliminated later.
98     synchronize::threadsAligned(atomic::relaxed);
99     return -1;
100   }
101 
102   if (mapping::isInitialThreadInLevel0(IsSPMD))
103     return -1;
104 
105   // Enter the generic state machine if enabled and if this thread can possibly
106   // be an active worker thread.
107   //
108   // The latter check is important for NVIDIA Pascal (but not Volta) and AMD
109   // GPU.  In those cases, a single thread can apparently satisfy a barrier on
110   // behalf of all threads in the same warp.  Thus, it would not be safe for
111   // other threads in the main thread's warp to reach the first
112   // synchronize::threads call in genericStateMachine before the main thread
113   // reaches its corresponding synchronize::threads call: that would permit all
114   // active worker threads to proceed before the main thread has actually set
115   // state::ParallelRegionFn, and then they would immediately quit without
116   // doing any work.  mapping::getMaxTeamThreads() does not include any of the
117   // main thread's warp, so none of its threads can ever be active worker
118   // threads.
119   if (UseGenericStateMachine &&
120       mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD))
121     genericStateMachine(KernelEnvironment.Ident);
122 
123   return mapping::getThreadIdInBlock();
124 }
125 
126 /// De-Initialization
127 ///
128 /// In non-SPMD, this function releases the workers trapped in a state machine
129 /// and also any memory dynamically allocated by the runtime.
130 ///
131 /// \param Ident Source location identification, can be NULL.
132 ///
133 void __kmpc_target_deinit() {
134   bool IsSPMD = mapping::isSPMDMode();
135   if (IsSPMD)
136     return;
137 
138   if (mapping::isInitialThreadInLevel0(IsSPMD)) {
139     // Signal the workers to exit the state machine and exit the kernel.
140     state::ParallelRegionFn = nullptr;
141   } else if (!state::getKernelEnvironment()
142                   .Configuration.UseGenericStateMachine) {
143     // Retrieve the work function just to ensure we always call
144     // __kmpc_kernel_parallel even if a custom state machine is used.
145     // TODO: this is not super pretty. The problem is we create the call to
146     // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it
147     // is not there yet. Thus, we assume we never reach it from
148     // __kmpc_target_deinit. That allows us to remove the store in there to
149     // ParallelRegionFn, which leads to bad results later on.
150     ParallelRegionFnTy WorkFn = nullptr;
151     __kmpc_kernel_parallel(&WorkFn);
152     ASSERT(WorkFn == nullptr, nullptr);
153   }
154 }
155 
156 int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
157 }
158 
159 #pragma omp end declare target
160