xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/config/gcn/bar.c (revision b2c35e17b976cf7ccd7250c86c6f5e95090ed636)
1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2    Contributed by Mentor Embedded.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This is an AMD GCN specific implementation of a barrier synchronization
27    mechanism for libgomp.  This type is private to the library.  This
28    implementation uses atomic instructions and s_barrier instruction.  It
29    uses MEMMODEL_RELAXED here because barriers are within workgroups and
30    therefore don't need to flush caches.  */
31 
32 #include <limits.h>
33 #include "libgomp.h"
34 
35 
36 void
37 gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
38 {
39   if (__builtin_expect (state & BAR_WAS_LAST, 0))
40     {
41       /* Next time we'll be awaiting TOTAL threads again.  */
42       bar->awaited = bar->total;
43       __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
44 			MEMMODEL_RELAXED);
45     }
46   if (bar->total > 1)
47     asm ("s_barrier" ::: "memory");
48 }
49 
50 void
51 gomp_barrier_wait (gomp_barrier_t *bar)
52 {
53   gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
54 }
55 
56 /* Like gomp_barrier_wait, except that if the encountering thread
57    is not the last one to hit the barrier, it returns immediately.
58    The intended usage is that a thread which intends to gomp_barrier_destroy
59    this barrier calls gomp_barrier_wait, while all other threads
60    call gomp_barrier_wait_last.  When gomp_barrier_wait returns,
61    the barrier can be safely destroyed.  */
62 
63 void
64 gomp_barrier_wait_last (gomp_barrier_t *bar)
65 {
66   /* Deferring to gomp_barrier_wait does not use the optimization opportunity
67      allowed by the interface contract for all-but-last participants.  The
68      original implementation in config/linux/bar.c handles this better.  */
69   gomp_barrier_wait (bar);
70 }
71 
72 void
73 gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
74 {
75   if (bar->total > 1)
76     asm ("s_barrier" ::: "memory");
77 }
78 
79 void
80 gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
81 {
82   unsigned int generation, gen;
83 
84   if (__builtin_expect (state & BAR_WAS_LAST, 0))
85     {
86       /* Next time we'll be awaiting TOTAL threads again.  */
87       struct gomp_thread *thr = gomp_thread ();
88       struct gomp_team *team = thr->ts.team;
89 
90       bar->awaited = bar->total;
91       team->work_share_cancelled = 0;
92       if (__builtin_expect (team->task_count, 0))
93 	{
94 	  gomp_barrier_handle_tasks (state);
95 	  state &= ~BAR_WAS_LAST;
96 	}
97       else
98 	{
99 	  state &= ~BAR_CANCELLED;
100 	  state += BAR_INCR - BAR_WAS_LAST;
101 	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
102 	  if (bar->total > 1)
103 	    asm ("s_barrier" ::: "memory");
104 	  return;
105 	}
106     }
107 
108   generation = state;
109   state &= ~BAR_CANCELLED;
110   int retry = 100;
111   do
112     {
113       if (retry-- == 0)
114 	{
115 	  /* It really shouldn't happen that barriers get out of sync, but
116 	     if they do then this will loop until they realign, so we need
117 	     to avoid an infinite loop where the thread just isn't there.  */
118 	  const char msg[] = ("Barrier sync failed (another thread died?);"
119 			      " aborting.");
120 	  write (2, msg, sizeof (msg)-1);
121 	  abort();
122 	}
123 
124       asm ("s_barrier" ::: "memory");
125       gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
126       if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
127 	{
128 	  gomp_barrier_handle_tasks (state);
129 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
130 	}
131       generation |= gen & BAR_WAITING_FOR_TASK;
132     }
133   while (gen != state + BAR_INCR);
134 }
135 
136 void
137 gomp_team_barrier_wait (gomp_barrier_t *bar)
138 {
139   gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
140 }
141 
142 void
143 gomp_team_barrier_wait_final (gomp_barrier_t *bar)
144 {
145   gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
146   if (__builtin_expect (state & BAR_WAS_LAST, 0))
147     bar->awaited_final = bar->total;
148   gomp_team_barrier_wait_end (bar, state);
149 }
150 
151 bool
152 gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
153 				   gomp_barrier_state_t state)
154 {
155   unsigned int generation, gen;
156 
157   if (__builtin_expect (state & BAR_WAS_LAST, 0))
158     {
159       /* Next time we'll be awaiting TOTAL threads again.  */
160       /* BAR_CANCELLED should never be set in state here, because
161 	 cancellation means that at least one of the threads has been
162 	 cancelled, thus on a cancellable barrier we should never see
163 	 all threads to arrive.  */
164       struct gomp_thread *thr = gomp_thread ();
165       struct gomp_team *team = thr->ts.team;
166 
167       bar->awaited = bar->total;
168       team->work_share_cancelled = 0;
169       if (__builtin_expect (team->task_count, 0))
170 	{
171 	  gomp_barrier_handle_tasks (state);
172 	  state &= ~BAR_WAS_LAST;
173 	}
174       else
175 	{
176 	  state += BAR_INCR - BAR_WAS_LAST;
177 	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
178 	  if (bar->total > 1)
179 	    asm ("s_barrier" ::: "memory");
180 	  return false;
181 	}
182     }
183 
184   if (__builtin_expect (state & BAR_CANCELLED, 0))
185     return true;
186 
187   generation = state;
188   int retry = 100;
189   do
190     {
191       if (retry-- == 0)
192 	{
193 	  /* It really shouldn't happen that barriers get out of sync, but
194 	     if they do then this will loop until they realign, so we need
195 	     to avoid an infinite loop where the thread just isn't there.  */
196 	  const char msg[] = ("Barrier sync failed (another thread died?);"
197 			      " aborting.");
198 	  write (2, msg, sizeof (msg)-1);
199 	  abort();
200 	}
201 
202       if (bar->total > 1)
203 	asm ("s_barrier" ::: "memory");
204       gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
205       if (__builtin_expect (gen & BAR_CANCELLED, 0))
206 	return true;
207       if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
208 	{
209 	  gomp_barrier_handle_tasks (state);
210 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
211 	}
212       generation |= gen & BAR_WAITING_FOR_TASK;
213     }
214   while (gen != state + BAR_INCR);
215 
216   return false;
217 }
218 
219 bool
220 gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
221 {
222   return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
223 }
224 
225 void
226 gomp_team_barrier_cancel (struct gomp_team *team)
227 {
228   gomp_mutex_lock (&team->task_lock);
229   if (team->barrier.generation & BAR_CANCELLED)
230     {
231       gomp_mutex_unlock (&team->task_lock);
232       return;
233     }
234   team->barrier.generation |= BAR_CANCELLED;
235   gomp_mutex_unlock (&team->task_lock);
236   gomp_team_barrier_wake (&team->barrier, INT_MAX);
237 }
238