xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/std/barrier (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1// <barrier> -*- C++ -*-
2
3// Copyright (C) 2020-2022 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25// This implementation is based on libcxx/include/barrier
26//===-- barrier.h --------------------------------------------------===//
27//
28// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
29// See https://llvm.org/LICENSE.txt for license information.
30// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
31//
32//===---------------------------------------------------------------===//
33
34/** @file include/barrier
35 *  This is a Standard C++ Library header.
36 */
37
38#ifndef _GLIBCXX_BARRIER
39#define _GLIBCXX_BARRIER 1
40
41#pragma GCC system_header
42
43#if __cplusplus > 201703L
44#include <bits/atomic_base.h>
45#if __cpp_lib_atomic_wait && __cpp_aligned_new
46#include <bits/std_thread.h>
47#include <bits/unique_ptr.h>
48
49#include <array>
50
51#define __cpp_lib_barrier 201907L
52
53namespace std _GLIBCXX_VISIBILITY(default)
54{
55_GLIBCXX_BEGIN_NAMESPACE_VERSION
56
57  struct __empty_completion
58  {
59    _GLIBCXX_ALWAYS_INLINE void
60    operator()() noexcept
61    { }
62  };
63
64/*
65
66The default implementation of __tree_barrier is a classic tree barrier.
67
68It looks different from literature pseudocode for two main reasons:
69 1. Threads that call into std::barrier functions do not provide indices,
70    so a numbering step is added before the actual barrier algorithm,
71    appearing as an N+1 round to the N rounds of the tree barrier.
72 2. A great deal of attention has been paid to avoid cache line thrashing
73    by flattening the tree structure into cache-line sized arrays, that
74    are indexed in an efficient way.
75
76*/
77
78  enum class __barrier_phase_t : unsigned char { };
79
80  template<typename _CompletionF>
81    class __tree_barrier
82    {
83      using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
84      using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
85      static constexpr auto __phase_alignment =
86		      __atomic_phase_ref_t::required_alignment;
87
88      using __tickets_t = std::array<__barrier_phase_t, 64>;
89      struct alignas(64) /* naturally-align the heap state */ __state_t
90      {
91	alignas(__phase_alignment) __tickets_t __tickets;
92      };
93
94      ptrdiff_t _M_expected;
95      unique_ptr<__state_t[]> _M_state;
96      __atomic_base<ptrdiff_t> _M_expected_adjustment;
97      _CompletionF _M_completion;
98
99      alignas(__phase_alignment) __barrier_phase_t  _M_phase;
100
101      bool
102      _M_arrive(__barrier_phase_t __old_phase, size_t __current)
103      {
104	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
105	const auto __half_step =
106			   static_cast<__barrier_phase_t>(__old_phase_val + 1);
107	const auto __full_step =
108			   static_cast<__barrier_phase_t>(__old_phase_val + 2);
109
110	size_t __current_expected = _M_expected;
111	__current %= ((_M_expected + 1) >> 1);
112
113	for (int __round = 0; ; ++__round)
114	  {
115	    if (__current_expected <= 1)
116		return true;
117	    size_t const __end_node = ((__current_expected + 1) >> 1),
118			 __last_node = __end_node - 1;
119	    for ( ; ; ++__current)
120	      {
121		if (__current == __end_node)
122		  __current = 0;
123		auto __expect = __old_phase;
124		__atomic_phase_ref_t __phase(_M_state[__current]
125						.__tickets[__round]);
126		if (__current == __last_node && (__current_expected & 1))
127		  {
128		    if (__phase.compare_exchange_strong(__expect, __full_step,
129						        memory_order_acq_rel))
130		      break;     // I'm 1 in 1, go to next __round
131		  }
132		else if (__phase.compare_exchange_strong(__expect, __half_step,
133						         memory_order_acq_rel))
134		  {
135		    return false; // I'm 1 in 2, done with arrival
136		  }
137		else if (__expect == __half_step)
138		  {
139		    if (__phase.compare_exchange_strong(__expect, __full_step,
140						        memory_order_acq_rel))
141		      break;    // I'm 2 in 2, go to next __round
142		  }
143	      }
144	    __current_expected = __last_node + 1;
145	    __current >>= 1;
146	  }
147      }
148
149    public:
150      using arrival_token = __barrier_phase_t;
151
152      static constexpr ptrdiff_t
153      max() noexcept
154      { return __PTRDIFF_MAX__; }
155
156      __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
157	  : _M_expected(__expected), _M_expected_adjustment(0),
158	    _M_completion(move(__completion)),
159	    _M_phase(static_cast<__barrier_phase_t>(0))
160      {
161	size_t const __count = (_M_expected + 1) >> 1;
162
163	_M_state = std::make_unique<__state_t[]>(__count);
164      }
165
166      [[nodiscard]] arrival_token
167      arrive(ptrdiff_t __update)
168      {
169	std::hash<std::thread::id> __hasher;
170	size_t __current = __hasher(std::this_thread::get_id());
171	__atomic_phase_ref_t __phase(_M_phase);
172	const auto __old_phase = __phase.load(memory_order_relaxed);
173	const auto __cur = static_cast<unsigned char>(__old_phase);
174	for(; __update; --__update)
175	  {
176	    if(_M_arrive(__old_phase, __current))
177	      {
178		_M_completion();
179		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
180		_M_expected_adjustment.store(0, memory_order_relaxed);
181		auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
182		__phase.store(__new_phase, memory_order_release);
183		__phase.notify_all();
184	      }
185	  }
186	return __old_phase;
187      }
188
189      void
190      wait(arrival_token&& __old_phase) const
191      {
192	__atomic_phase_const_ref_t __phase(_M_phase);
193	auto const __test_fn = [=]
194	  {
195	    return __phase.load(memory_order_acquire) != __old_phase;
196	  };
197	std::__atomic_wait_address(&_M_phase, __test_fn);
198      }
199
200      void
201      arrive_and_drop()
202      {
203	_M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
204	(void)arrive(1);
205      }
206    };
207
208  template<typename _CompletionF = __empty_completion>
209    class barrier
210    {
211      // Note, we may introduce a "central" barrier algorithm at some point
212      // for more space constrained targets
213      using __algorithm_t = __tree_barrier<_CompletionF>;
214      __algorithm_t _M_b;
215
216    public:
217      class arrival_token final
218      {
219      public:
220	arrival_token(arrival_token&&) = default;
221	arrival_token& operator=(arrival_token&&) = default;
222	~arrival_token() = default;
223
224      private:
225	friend class barrier;
226	using __token = typename __algorithm_t::arrival_token;
227	explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
228	__token _M_tok;
229      };
230
231      static constexpr ptrdiff_t
232      max() noexcept
233      { return __algorithm_t::max(); }
234
235      explicit
236      barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
237      : _M_b(__count, std::move(__completion))
238      { }
239
240      barrier(barrier const&) = delete;
241      barrier& operator=(barrier const&) = delete;
242
243      [[nodiscard]] arrival_token
244      arrive(ptrdiff_t __update = 1)
245      { return arrival_token{_M_b.arrive(__update)}; }
246
247      void
248      wait(arrival_token&& __phase) const
249      { _M_b.wait(std::move(__phase._M_tok)); }
250
251      void
252      arrive_and_wait()
253      { wait(arrive()); }
254
255      void
256      arrive_and_drop()
257      { _M_b.arrive_and_drop(); }
258    };
259
260_GLIBCXX_END_NAMESPACE_VERSION
261} // namespace
262#endif // __cpp_lib_atomic_wait && __cpp_aligned_new
263#endif // __cplusplus > 201703L
264#endif // _GLIBCXX_BARRIER
265