xref: /llvm-project/offload/DeviceRTL/include/Synchronization.h (revision 3274bf6b4282a0dafd4b5a2efa09824e5ca417d0)
1330d8983SJohannes Doerfert //===- Synchronization.h - OpenMP synchronization utilities ------- C++ -*-===//
2330d8983SJohannes Doerfert //
3330d8983SJohannes Doerfert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4330d8983SJohannes Doerfert // See https://llvm.org/LICENSE.txt for license information.
5330d8983SJohannes Doerfert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6330d8983SJohannes Doerfert //
7330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
8330d8983SJohannes Doerfert //
9330d8983SJohannes Doerfert //
10330d8983SJohannes Doerfert //===----------------------------------------------------------------------===//
11330d8983SJohannes Doerfert 
12330d8983SJohannes Doerfert #ifndef OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
13330d8983SJohannes Doerfert #define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
14330d8983SJohannes Doerfert 
1508533a3eSJohannes Doerfert #include "DeviceTypes.h"
16b57c0bacSJoseph Huber #include "DeviceUtils.h"
17b57c0bacSJoseph Huber 
18b57c0bacSJoseph Huber #pragma omp begin declare target device_type(nohost)
19330d8983SJohannes Doerfert 
20330d8983SJohannes Doerfert namespace ompx {
21330d8983SJohannes Doerfert namespace atomic {
22330d8983SJohannes Doerfert 
23330d8983SJohannes Doerfert enum OrderingTy {
24330d8983SJohannes Doerfert   relaxed = __ATOMIC_RELAXED,
25330d8983SJohannes Doerfert   aquire = __ATOMIC_ACQUIRE,
26330d8983SJohannes Doerfert   release = __ATOMIC_RELEASE,
27330d8983SJohannes Doerfert   acq_rel = __ATOMIC_ACQ_REL,
28330d8983SJohannes Doerfert   seq_cst = __ATOMIC_SEQ_CST,
29330d8983SJohannes Doerfert };
30330d8983SJohannes Doerfert 
31*3274bf6bSJoseph Huber enum MemScopeTy {
32f4ee5a67SJoseph Huber   system = __MEMORY_SCOPE_SYSTEM,
33*3274bf6bSJoseph Huber   device = __MEMORY_SCOPE_DEVICE,
34f4ee5a67SJoseph Huber   workgroup = __MEMORY_SCOPE_WRKGRP,
35f4ee5a67SJoseph Huber   wavefront = __MEMORY_SCOPE_WVFRNT,
36f4ee5a67SJoseph Huber   single = __MEMORY_SCOPE_SINGLE,
37f4ee5a67SJoseph Huber };
38f4ee5a67SJoseph Huber 
39330d8983SJohannes Doerfert /// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
40330d8983SJohannes Doerfert uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
41*3274bf6bSJoseph Huber              MemScopeTy MemScope = MemScopeTy::device);
42330d8983SJohannes Doerfert 
43330d8983SJohannes Doerfert /// Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The
44330d8983SJohannes Doerfert /// result is stored in \p *Addr;
45330d8983SJohannes Doerfert /// {
46330d8983SJohannes Doerfert 
47b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
48b57c0bacSJoseph Huber bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc,
49*3274bf6bSJoseph Huber          atomic::OrderingTy OrderingFail,
50*3274bf6bSJoseph Huber          MemScopeTy MemScope = MemScopeTy::device) {
51b57c0bacSJoseph Huber   return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false,
52*3274bf6bSJoseph Huber                                           OrderingSucc, OrderingFail, MemScope);
53b57c0bacSJoseph Huber }
54330d8983SJohannes Doerfert 
55b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
56*3274bf6bSJoseph Huber V add(Ty *Address, V Val, atomic::OrderingTy Ordering,
57*3274bf6bSJoseph Huber       MemScopeTy MemScope = MemScopeTy::device) {
58*3274bf6bSJoseph Huber   return __scoped_atomic_fetch_add(Address, Val, Ordering, MemScope);
59b57c0bacSJoseph Huber }
60330d8983SJohannes Doerfert 
61b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
62*3274bf6bSJoseph Huber V load(Ty *Address, atomic::OrderingTy Ordering,
63*3274bf6bSJoseph Huber        MemScopeTy MemScope = MemScopeTy::device) {
64*3274bf6bSJoseph Huber   return __scoped_atomic_load_n(Address, Ordering, MemScope);
65b57c0bacSJoseph Huber }
66330d8983SJohannes Doerfert 
67b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
68*3274bf6bSJoseph Huber void store(Ty *Address, V Val, atomic::OrderingTy Ordering,
69*3274bf6bSJoseph Huber            MemScopeTy MemScope = MemScopeTy::device) {
70*3274bf6bSJoseph Huber   __scoped_atomic_store_n(Address, Val, Ordering, MemScope);
71b57c0bacSJoseph Huber }
72330d8983SJohannes Doerfert 
73b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
74*3274bf6bSJoseph Huber V mul(Ty *Address, V Val, atomic::OrderingTy Ordering,
75*3274bf6bSJoseph Huber       MemScopeTy MemScope = MemScopeTy::device) {
76b57c0bacSJoseph Huber   Ty TypedCurrentVal, TypedResultVal, TypedNewVal;
77b57c0bacSJoseph Huber   bool Success;
78b57c0bacSJoseph Huber   do {
79b57c0bacSJoseph Huber     TypedCurrentVal = atomic::load(Address, Ordering);
80b57c0bacSJoseph Huber     TypedNewVal = TypedCurrentVal * Val;
81b57c0bacSJoseph Huber     Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering,
82*3274bf6bSJoseph Huber                           atomic::relaxed, MemScope);
83b57c0bacSJoseph Huber   } while (!Success);
84b57c0bacSJoseph Huber   return TypedResultVal;
85b57c0bacSJoseph Huber }
86330d8983SJohannes Doerfert 
87b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
88b57c0bacSJoseph Huber utils::enable_if_t<!utils::is_floating_point_v<V>, V>
89*3274bf6bSJoseph Huber max(Ty *Address, V Val, atomic::OrderingTy Ordering,
90*3274bf6bSJoseph Huber     MemScopeTy MemScope = MemScopeTy::device) {
91*3274bf6bSJoseph Huber   return __scoped_atomic_fetch_max(Address, Val, Ordering, MemScope);
92b57c0bacSJoseph Huber }
93330d8983SJohannes Doerfert 
94b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
95b57c0bacSJoseph Huber utils::enable_if_t<utils::is_same_v<V, float>, V>
96*3274bf6bSJoseph Huber max(Ty *Address, V Val, atomic::OrderingTy Ordering,
97*3274bf6bSJoseph Huber     MemScopeTy MemScope = MemScopeTy::device) {
98b57c0bacSJoseph Huber   if (Val >= 0)
99*3274bf6bSJoseph Huber     return utils::bitCast<float>(max(
100*3274bf6bSJoseph Huber         (int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope));
101*3274bf6bSJoseph Huber   return utils::bitCast<float>(min(
102*3274bf6bSJoseph Huber       (uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope));
103b57c0bacSJoseph Huber }
104b57c0bacSJoseph Huber 
105b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
106b57c0bacSJoseph Huber utils::enable_if_t<utils::is_same_v<V, double>, V>
107*3274bf6bSJoseph Huber max(Ty *Address, V Val, atomic::OrderingTy Ordering,
108*3274bf6bSJoseph Huber     MemScopeTy MemScope = MemScopeTy::device) {
109b57c0bacSJoseph Huber   if (Val >= 0)
110*3274bf6bSJoseph Huber     return utils::bitCast<double>(max(
111*3274bf6bSJoseph Huber         (int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope));
112*3274bf6bSJoseph Huber   return utils::bitCast<double>(min(
113*3274bf6bSJoseph Huber       (uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope));
114b57c0bacSJoseph Huber }
115b57c0bacSJoseph Huber 
116b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
117b57c0bacSJoseph Huber utils::enable_if_t<!utils::is_floating_point_v<V>, V>
118*3274bf6bSJoseph Huber min(Ty *Address, V Val, atomic::OrderingTy Ordering,
119*3274bf6bSJoseph Huber     MemScopeTy MemScope = MemScopeTy::device) {
120*3274bf6bSJoseph Huber   return __scoped_atomic_fetch_min(Address, Val, Ordering, MemScope);
121b57c0bacSJoseph Huber }
122b57c0bacSJoseph Huber 
123b57c0bacSJoseph Huber // TODO: Implement this with __atomic_fetch_max and remove the duplication.
124b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
125b57c0bacSJoseph Huber utils::enable_if_t<utils::is_same_v<V, float>, V>
126*3274bf6bSJoseph Huber min(Ty *Address, V Val, atomic::OrderingTy Ordering,
127*3274bf6bSJoseph Huber     MemScopeTy MemScope = MemScopeTy::device) {
128b57c0bacSJoseph Huber   if (Val >= 0)
129*3274bf6bSJoseph Huber     return utils::bitCast<float>(min(
130*3274bf6bSJoseph Huber         (int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope));
131*3274bf6bSJoseph Huber   return utils::bitCast<float>(max(
132*3274bf6bSJoseph Huber       (uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope));
133b57c0bacSJoseph Huber }
134b57c0bacSJoseph Huber 
135b57c0bacSJoseph Huber // TODO: Implement this with __atomic_fetch_max and remove the duplication.
136b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
137b57c0bacSJoseph Huber utils::enable_if_t<utils::is_same_v<V, double>, V>
138*3274bf6bSJoseph Huber min(Ty *Address, utils::remove_addrspace_t<Ty> Val, atomic::OrderingTy Ordering,
139*3274bf6bSJoseph Huber     MemScopeTy MemScope = MemScopeTy::device) {
140b57c0bacSJoseph Huber   if (Val >= 0)
141*3274bf6bSJoseph Huber     return utils::bitCast<double>(min(
142*3274bf6bSJoseph Huber         (int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope));
143*3274bf6bSJoseph Huber   return utils::bitCast<double>(max(
144*3274bf6bSJoseph Huber       (uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope));
145b57c0bacSJoseph Huber }
146b57c0bacSJoseph Huber 
147b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
148*3274bf6bSJoseph Huber V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering,
149*3274bf6bSJoseph Huber          MemScopeTy MemScope = MemScopeTy::device) {
150*3274bf6bSJoseph Huber   return __scoped_atomic_fetch_or(Address, Val, Ordering, MemScope);
151b57c0bacSJoseph Huber }
152b57c0bacSJoseph Huber 
153b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
154*3274bf6bSJoseph Huber V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering,
155*3274bf6bSJoseph Huber           MemScopeTy MemScope = MemScopeTy::device) {
156*3274bf6bSJoseph Huber   return __scoped_atomic_fetch_and(Address, Val, Ordering, MemScope);
157b57c0bacSJoseph Huber }
158b57c0bacSJoseph Huber 
159b57c0bacSJoseph Huber template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
160*3274bf6bSJoseph Huber V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering,
161*3274bf6bSJoseph Huber           MemScopeTy MemScope = MemScopeTy::device) {
162*3274bf6bSJoseph Huber   return __scoped_atomic_fetch_xor(Address, Val, Ordering, MemScope);
163b57c0bacSJoseph Huber }
164b57c0bacSJoseph Huber 
165*3274bf6bSJoseph Huber static inline uint32_t
166*3274bf6bSJoseph Huber atomicExchange(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
167*3274bf6bSJoseph Huber                MemScopeTy MemScope = MemScopeTy::device) {
168b57c0bacSJoseph Huber   uint32_t R;
169*3274bf6bSJoseph Huber   __scoped_atomic_exchange(Address, &Val, &R, Ordering, MemScope);
170b57c0bacSJoseph Huber   return R;
171b57c0bacSJoseph Huber }
172330d8983SJohannes Doerfert 
173330d8983SJohannes Doerfert ///}
174330d8983SJohannes Doerfert 
175330d8983SJohannes Doerfert } // namespace atomic
176330d8983SJohannes Doerfert 
177330d8983SJohannes Doerfert namespace synchronize {
178330d8983SJohannes Doerfert 
179330d8983SJohannes Doerfert /// Initialize the synchronization machinery. Must be called by all threads.
180330d8983SJohannes Doerfert void init(bool IsSPMD);
181330d8983SJohannes Doerfert 
182330d8983SJohannes Doerfert /// Synchronize all threads in a warp identified by \p Mask.
183330d8983SJohannes Doerfert void warp(LaneMaskTy Mask);
184330d8983SJohannes Doerfert 
185330d8983SJohannes Doerfert /// Synchronize all threads in a block and perform a fence before and after the
186330d8983SJohannes Doerfert /// barrier according to \p Ordering. Note that the fence might be part of the
187330d8983SJohannes Doerfert /// barrier.
188330d8983SJohannes Doerfert void threads(atomic::OrderingTy Ordering);
189330d8983SJohannes Doerfert 
190330d8983SJohannes Doerfert /// Synchronizing threads is allowed even if they all hit different instances of
191330d8983SJohannes Doerfert /// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
192330d8983SJohannes Doerfert /// restrictive in that it requires all threads to hit the same instance. The
193330d8983SJohannes Doerfert /// noinline is removed by the openmp-opt pass and helps to preserve the
194330d8983SJohannes Doerfert /// information till then.
195330d8983SJohannes Doerfert ///{
196330d8983SJohannes Doerfert 
197330d8983SJohannes Doerfert /// Synchronize all threads in a block, they are reaching the same instruction
198330d8983SJohannes Doerfert /// (hence all threads in the block are "aligned"). Also perform a fence before
199330d8983SJohannes Doerfert /// and after the barrier according to \p Ordering. Note that the
200330d8983SJohannes Doerfert /// fence might be part of the barrier if the target offers this.
201723a3e74SJoseph Huber [[gnu::noinline, omp::assume("ompx_aligned_barrier")]] void
20258af82b4SJoseph Huber threadsAligned(atomic::OrderingTy Ordering);
203330d8983SJohannes Doerfert 
204330d8983SJohannes Doerfert ///}
205330d8983SJohannes Doerfert 
206330d8983SJohannes Doerfert } // namespace synchronize
207330d8983SJohannes Doerfert 
208330d8983SJohannes Doerfert namespace fence {
209330d8983SJohannes Doerfert 
210330d8983SJohannes Doerfert /// Memory fence with \p Ordering semantics for the team.
211330d8983SJohannes Doerfert void team(atomic::OrderingTy Ordering);
212330d8983SJohannes Doerfert 
213330d8983SJohannes Doerfert /// Memory fence with \p Ordering semantics for the contention group.
214330d8983SJohannes Doerfert void kernel(atomic::OrderingTy Ordering);
215330d8983SJohannes Doerfert 
216330d8983SJohannes Doerfert /// Memory fence with \p Ordering semantics for the system.
217330d8983SJohannes Doerfert void system(atomic::OrderingTy Ordering);
218330d8983SJohannes Doerfert 
219330d8983SJohannes Doerfert } // namespace fence
220330d8983SJohannes Doerfert 
221330d8983SJohannes Doerfert } // namespace ompx
222330d8983SJohannes Doerfert 
223b57c0bacSJoseph Huber #pragma omp end declare target
224b57c0bacSJoseph Huber 
225330d8983SJohannes Doerfert #endif
226