xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/std/barrier (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg// <barrier> -*- C++ -*-
2*b1e83836Smrg
3*b1e83836Smrg// Copyright (C) 2020-2022 Free Software Foundation, Inc.
4*b1e83836Smrg//
5*b1e83836Smrg// This file is part of the GNU ISO C++ Library.  This library is free
6*b1e83836Smrg// software; you can redistribute it and/or modify it under the
7*b1e83836Smrg// terms of the GNU General Public License as published by the
8*b1e83836Smrg// Free Software Foundation; either version 3, or (at your option)
9*b1e83836Smrg// any later version.
10*b1e83836Smrg
11*b1e83836Smrg// This library is distributed in the hope that it will be useful,
12*b1e83836Smrg// but WITHOUT ANY WARRANTY; without even the implied warranty of
13*b1e83836Smrg// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*b1e83836Smrg// GNU General Public License for more details.
15*b1e83836Smrg
16*b1e83836Smrg// Under Section 7 of GPL version 3, you are granted additional
17*b1e83836Smrg// permissions described in the GCC Runtime Library Exception, version
18*b1e83836Smrg// 3.1, as published by the Free Software Foundation.
19*b1e83836Smrg
20*b1e83836Smrg// You should have received a copy of the GNU General Public License and
21*b1e83836Smrg// a copy of the GCC Runtime Library Exception along with this program;
22*b1e83836Smrg// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*b1e83836Smrg// <http://www.gnu.org/licenses/>.
24*b1e83836Smrg
25*b1e83836Smrg// This implementation is based on libcxx/include/barrier
26*b1e83836Smrg//===-- barrier.h --------------------------------------------------===//
27*b1e83836Smrg//
28*b1e83836Smrg// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
29*b1e83836Smrg// See https://llvm.org/LICENSE.txt for license information.
30*b1e83836Smrg// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
31*b1e83836Smrg//
32*b1e83836Smrg//===---------------------------------------------------------------===//
33*b1e83836Smrg
34*b1e83836Smrg/** @file include/barrier
35*b1e83836Smrg *  This is a Standard C++ Library header.
36*b1e83836Smrg */
37*b1e83836Smrg
38*b1e83836Smrg#ifndef _GLIBCXX_BARRIER
39*b1e83836Smrg#define _GLIBCXX_BARRIER 1
40*b1e83836Smrg
41*b1e83836Smrg#pragma GCC system_header
42*b1e83836Smrg
43*b1e83836Smrg#if __cplusplus > 201703L
44*b1e83836Smrg#include <bits/atomic_base.h>
45*b1e83836Smrg#if __cpp_lib_atomic_wait && __cpp_aligned_new
46*b1e83836Smrg#include <bits/std_thread.h>
47*b1e83836Smrg#include <bits/unique_ptr.h>
48*b1e83836Smrg
49*b1e83836Smrg#include <array>
50*b1e83836Smrg
51*b1e83836Smrg#define __cpp_lib_barrier 201907L
52*b1e83836Smrg
53*b1e83836Smrgnamespace std _GLIBCXX_VISIBILITY(default)
54*b1e83836Smrg{
55*b1e83836Smrg_GLIBCXX_BEGIN_NAMESPACE_VERSION
56*b1e83836Smrg
57*b1e83836Smrg  struct __empty_completion
58*b1e83836Smrg  {
59*b1e83836Smrg    _GLIBCXX_ALWAYS_INLINE void
60*b1e83836Smrg    operator()() noexcept
61*b1e83836Smrg    { }
62*b1e83836Smrg  };
63*b1e83836Smrg
64*b1e83836Smrg/*
65*b1e83836Smrg
66*b1e83836SmrgThe default implementation of __tree_barrier is a classic tree barrier.
67*b1e83836Smrg
68*b1e83836SmrgIt looks different from literature pseudocode for two main reasons:
69*b1e83836Smrg 1. Threads that call into std::barrier functions do not provide indices,
70*b1e83836Smrg    so a numbering step is added before the actual barrier algorithm,
71*b1e83836Smrg    appearing as an N+1 round to the N rounds of the tree barrier.
72*b1e83836Smrg 2. A great deal of attention has been paid to avoid cache line thrashing
73*b1e83836Smrg    by flattening the tree structure into cache-line sized arrays, that
74*b1e83836Smrg    are indexed in an efficient way.
75*b1e83836Smrg
76*b1e83836Smrg*/
77*b1e83836Smrg
78*b1e83836Smrg  enum class __barrier_phase_t : unsigned char { };
79*b1e83836Smrg
80*b1e83836Smrg  template<typename _CompletionF>
81*b1e83836Smrg    class __tree_barrier
82*b1e83836Smrg    {
83*b1e83836Smrg      using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
84*b1e83836Smrg      using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
85*b1e83836Smrg      static constexpr auto __phase_alignment =
86*b1e83836Smrg		      __atomic_phase_ref_t::required_alignment;
87*b1e83836Smrg
88*b1e83836Smrg      using __tickets_t = std::array<__barrier_phase_t, 64>;
89*b1e83836Smrg      struct alignas(64) /* naturally-align the heap state */ __state_t
90*b1e83836Smrg      {
91*b1e83836Smrg	alignas(__phase_alignment) __tickets_t __tickets;
92*b1e83836Smrg      };
93*b1e83836Smrg
94*b1e83836Smrg      ptrdiff_t _M_expected;
95*b1e83836Smrg      unique_ptr<__state_t[]> _M_state;
96*b1e83836Smrg      __atomic_base<ptrdiff_t> _M_expected_adjustment;
97*b1e83836Smrg      _CompletionF _M_completion;
98*b1e83836Smrg
99*b1e83836Smrg      alignas(__phase_alignment) __barrier_phase_t  _M_phase;
100*b1e83836Smrg
101*b1e83836Smrg      bool
102*b1e83836Smrg      _M_arrive(__barrier_phase_t __old_phase, size_t __current)
103*b1e83836Smrg      {
104*b1e83836Smrg	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
105*b1e83836Smrg	const auto __half_step =
106*b1e83836Smrg			   static_cast<__barrier_phase_t>(__old_phase_val + 1);
107*b1e83836Smrg	const auto __full_step =
108*b1e83836Smrg			   static_cast<__barrier_phase_t>(__old_phase_val + 2);
109*b1e83836Smrg
110*b1e83836Smrg	size_t __current_expected = _M_expected;
111*b1e83836Smrg	__current %= ((_M_expected + 1) >> 1);
112*b1e83836Smrg
113*b1e83836Smrg	for (int __round = 0; ; ++__round)
114*b1e83836Smrg	  {
115*b1e83836Smrg	    if (__current_expected <= 1)
116*b1e83836Smrg		return true;
117*b1e83836Smrg	    size_t const __end_node = ((__current_expected + 1) >> 1),
118*b1e83836Smrg			 __last_node = __end_node - 1;
119*b1e83836Smrg	    for ( ; ; ++__current)
120*b1e83836Smrg	      {
121*b1e83836Smrg		if (__current == __end_node)
122*b1e83836Smrg		  __current = 0;
123*b1e83836Smrg		auto __expect = __old_phase;
124*b1e83836Smrg		__atomic_phase_ref_t __phase(_M_state[__current]
125*b1e83836Smrg						.__tickets[__round]);
126*b1e83836Smrg		if (__current == __last_node && (__current_expected & 1))
127*b1e83836Smrg		  {
128*b1e83836Smrg		    if (__phase.compare_exchange_strong(__expect, __full_step,
129*b1e83836Smrg						        memory_order_acq_rel))
130*b1e83836Smrg		      break;     // I'm 1 in 1, go to next __round
131*b1e83836Smrg		  }
132*b1e83836Smrg		else if (__phase.compare_exchange_strong(__expect, __half_step,
133*b1e83836Smrg						         memory_order_acq_rel))
134*b1e83836Smrg		  {
135*b1e83836Smrg		    return false; // I'm 1 in 2, done with arrival
136*b1e83836Smrg		  }
137*b1e83836Smrg		else if (__expect == __half_step)
138*b1e83836Smrg		  {
139*b1e83836Smrg		    if (__phase.compare_exchange_strong(__expect, __full_step,
140*b1e83836Smrg						        memory_order_acq_rel))
141*b1e83836Smrg		      break;    // I'm 2 in 2, go to next __round
142*b1e83836Smrg		  }
143*b1e83836Smrg	      }
144*b1e83836Smrg	    __current_expected = __last_node + 1;
145*b1e83836Smrg	    __current >>= 1;
146*b1e83836Smrg	  }
147*b1e83836Smrg      }
148*b1e83836Smrg
149*b1e83836Smrg    public:
150*b1e83836Smrg      using arrival_token = __barrier_phase_t;
151*b1e83836Smrg
152*b1e83836Smrg      static constexpr ptrdiff_t
153*b1e83836Smrg      max() noexcept
154*b1e83836Smrg      { return __PTRDIFF_MAX__; }
155*b1e83836Smrg
156*b1e83836Smrg      __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
157*b1e83836Smrg	  : _M_expected(__expected), _M_expected_adjustment(0),
158*b1e83836Smrg	    _M_completion(move(__completion)),
159*b1e83836Smrg	    _M_phase(static_cast<__barrier_phase_t>(0))
160*b1e83836Smrg      {
161*b1e83836Smrg	size_t const __count = (_M_expected + 1) >> 1;
162*b1e83836Smrg
163*b1e83836Smrg	_M_state = std::make_unique<__state_t[]>(__count);
164*b1e83836Smrg      }
165*b1e83836Smrg
166*b1e83836Smrg      [[nodiscard]] arrival_token
167*b1e83836Smrg      arrive(ptrdiff_t __update)
168*b1e83836Smrg      {
169*b1e83836Smrg	std::hash<std::thread::id> __hasher;
170*b1e83836Smrg	size_t __current = __hasher(std::this_thread::get_id());
171*b1e83836Smrg	__atomic_phase_ref_t __phase(_M_phase);
172*b1e83836Smrg	const auto __old_phase = __phase.load(memory_order_relaxed);
173*b1e83836Smrg	const auto __cur = static_cast<unsigned char>(__old_phase);
174*b1e83836Smrg	for(; __update; --__update)
175*b1e83836Smrg	  {
176*b1e83836Smrg	    if(_M_arrive(__old_phase, __current))
177*b1e83836Smrg	      {
178*b1e83836Smrg		_M_completion();
179*b1e83836Smrg		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
180*b1e83836Smrg		_M_expected_adjustment.store(0, memory_order_relaxed);
181*b1e83836Smrg		auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
182*b1e83836Smrg		__phase.store(__new_phase, memory_order_release);
183*b1e83836Smrg		__phase.notify_all();
184*b1e83836Smrg	      }
185*b1e83836Smrg	  }
186*b1e83836Smrg	return __old_phase;
187*b1e83836Smrg      }
188*b1e83836Smrg
189*b1e83836Smrg      void
190*b1e83836Smrg      wait(arrival_token&& __old_phase) const
191*b1e83836Smrg      {
192*b1e83836Smrg	__atomic_phase_const_ref_t __phase(_M_phase);
193*b1e83836Smrg	auto const __test_fn = [=]
194*b1e83836Smrg	  {
195*b1e83836Smrg	    return __phase.load(memory_order_acquire) != __old_phase;
196*b1e83836Smrg	  };
197*b1e83836Smrg	std::__atomic_wait_address(&_M_phase, __test_fn);
198*b1e83836Smrg      }
199*b1e83836Smrg
200*b1e83836Smrg      void
201*b1e83836Smrg      arrive_and_drop()
202*b1e83836Smrg      {
203*b1e83836Smrg	_M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
204*b1e83836Smrg	(void)arrive(1);
205*b1e83836Smrg      }
206*b1e83836Smrg    };
207*b1e83836Smrg
208*b1e83836Smrg  template<typename _CompletionF = __empty_completion>
209*b1e83836Smrg    class barrier
210*b1e83836Smrg    {
211*b1e83836Smrg      // Note, we may introduce a "central" barrier algorithm at some point
212*b1e83836Smrg      // for more space constrained targets
213*b1e83836Smrg      using __algorithm_t = __tree_barrier<_CompletionF>;
214*b1e83836Smrg      __algorithm_t _M_b;
215*b1e83836Smrg
216*b1e83836Smrg    public:
217*b1e83836Smrg      class arrival_token final
218*b1e83836Smrg      {
219*b1e83836Smrg      public:
220*b1e83836Smrg	arrival_token(arrival_token&&) = default;
221*b1e83836Smrg	arrival_token& operator=(arrival_token&&) = default;
222*b1e83836Smrg	~arrival_token() = default;
223*b1e83836Smrg
224*b1e83836Smrg      private:
225*b1e83836Smrg	friend class barrier;
226*b1e83836Smrg	using __token = typename __algorithm_t::arrival_token;
227*b1e83836Smrg	explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
228*b1e83836Smrg	__token _M_tok;
229*b1e83836Smrg      };
230*b1e83836Smrg
231*b1e83836Smrg      static constexpr ptrdiff_t
232*b1e83836Smrg      max() noexcept
233*b1e83836Smrg      { return __algorithm_t::max(); }
234*b1e83836Smrg
235*b1e83836Smrg      explicit
236*b1e83836Smrg      barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
237*b1e83836Smrg      : _M_b(__count, std::move(__completion))
238*b1e83836Smrg      { }
239*b1e83836Smrg
240*b1e83836Smrg      barrier(barrier const&) = delete;
241*b1e83836Smrg      barrier& operator=(barrier const&) = delete;
242*b1e83836Smrg
243*b1e83836Smrg      [[nodiscard]] arrival_token
244*b1e83836Smrg      arrive(ptrdiff_t __update = 1)
245*b1e83836Smrg      { return arrival_token{_M_b.arrive(__update)}; }
246*b1e83836Smrg
247*b1e83836Smrg      void
248*b1e83836Smrg      wait(arrival_token&& __phase) const
249*b1e83836Smrg      { _M_b.wait(std::move(__phase._M_tok)); }
250*b1e83836Smrg
251*b1e83836Smrg      void
252*b1e83836Smrg      arrive_and_wait()
253*b1e83836Smrg      { wait(arrive()); }
254*b1e83836Smrg
255*b1e83836Smrg      void
256*b1e83836Smrg      arrive_and_drop()
257*b1e83836Smrg      { _M_b.arrive_and_drop(); }
258*b1e83836Smrg    };
259*b1e83836Smrg
260*b1e83836Smrg_GLIBCXX_END_NAMESPACE_VERSION
261*b1e83836Smrg} // namespace
262*b1e83836Smrg#endif // __cpp_lib_atomic_wait && __cpp_aligned_new
263*b1e83836Smrg#endif // __cplusplus > 201703L
264*b1e83836Smrg#endif // _GLIBCXX_BARRIER
265