1*38fd1498Szrj /* Copyright (C) 2005-2018 Free Software Foundation, Inc.
2*38fd1498Szrj Contributed by Richard Henderson <rth@redhat.com>.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of the GNU Offloading and Multi Processing Library
5*38fd1498Szrj (libgomp).
6*38fd1498Szrj
7*38fd1498Szrj Libgomp is free software; you can redistribute it and/or modify it
8*38fd1498Szrj under the terms of the GNU General Public License as published by
9*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj any later version.
11*38fd1498Szrj
12*38fd1498Szrj Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*38fd1498Szrj FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15*38fd1498Szrj more details.
16*38fd1498Szrj
17*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
18*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
19*38fd1498Szrj 3.1, as published by the Free Software Foundation.
20*38fd1498Szrj
21*38fd1498Szrj You should have received a copy of the GNU General Public License and
22*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
23*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24*38fd1498Szrj <http://www.gnu.org/licenses/>. */
25*38fd1498Szrj
26*38fd1498Szrj /* This file contains routines to manage the work-share queue for a team
27*38fd1498Szrj of threads. */
28*38fd1498Szrj
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include <stddef.h>
31*38fd1498Szrj #include <stdlib.h>
32*38fd1498Szrj #include <string.h>
33*38fd1498Szrj
34*38fd1498Szrj
35*38fd1498Szrj /* Allocate a new work share structure, preferably from current team's
36*38fd1498Szrj free gomp_work_share cache. */
37*38fd1498Szrj
38*38fd1498Szrj static struct gomp_work_share *
alloc_work_share(struct gomp_team * team)39*38fd1498Szrj alloc_work_share (struct gomp_team *team)
40*38fd1498Szrj {
41*38fd1498Szrj struct gomp_work_share *ws;
42*38fd1498Szrj unsigned int i;
43*38fd1498Szrj
44*38fd1498Szrj /* This is called in a critical section. */
45*38fd1498Szrj if (team->work_share_list_alloc != NULL)
46*38fd1498Szrj {
47*38fd1498Szrj ws = team->work_share_list_alloc;
48*38fd1498Szrj team->work_share_list_alloc = ws->next_free;
49*38fd1498Szrj return ws;
50*38fd1498Szrj }
51*38fd1498Szrj
52*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
53*38fd1498Szrj ws = team->work_share_list_free;
54*38fd1498Szrj /* We need atomic read from work_share_list_free,
55*38fd1498Szrj as free_work_share can be called concurrently. */
56*38fd1498Szrj __asm ("" : "+r" (ws));
57*38fd1498Szrj
58*38fd1498Szrj if (ws && ws->next_free)
59*38fd1498Szrj {
60*38fd1498Szrj struct gomp_work_share *next = ws->next_free;
61*38fd1498Szrj ws->next_free = NULL;
62*38fd1498Szrj team->work_share_list_alloc = next->next_free;
63*38fd1498Szrj return next;
64*38fd1498Szrj }
65*38fd1498Szrj #else
66*38fd1498Szrj gomp_mutex_lock (&team->work_share_list_free_lock);
67*38fd1498Szrj ws = team->work_share_list_free;
68*38fd1498Szrj if (ws)
69*38fd1498Szrj {
70*38fd1498Szrj team->work_share_list_alloc = ws->next_free;
71*38fd1498Szrj team->work_share_list_free = NULL;
72*38fd1498Szrj gomp_mutex_unlock (&team->work_share_list_free_lock);
73*38fd1498Szrj return ws;
74*38fd1498Szrj }
75*38fd1498Szrj gomp_mutex_unlock (&team->work_share_list_free_lock);
76*38fd1498Szrj #endif
77*38fd1498Szrj
78*38fd1498Szrj team->work_share_chunk *= 2;
79*38fd1498Szrj ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
80*38fd1498Szrj ws->next_alloc = team->work_shares[0].next_alloc;
81*38fd1498Szrj team->work_shares[0].next_alloc = ws;
82*38fd1498Szrj team->work_share_list_alloc = &ws[1];
83*38fd1498Szrj for (i = 1; i < team->work_share_chunk - 1; i++)
84*38fd1498Szrj ws[i].next_free = &ws[i + 1];
85*38fd1498Szrj ws[i].next_free = NULL;
86*38fd1498Szrj return ws;
87*38fd1498Szrj }
88*38fd1498Szrj
89*38fd1498Szrj /* Initialize an already allocated struct gomp_work_share.
90*38fd1498Szrj This shouldn't touch the next_alloc field. */
91*38fd1498Szrj
92*38fd1498Szrj void
gomp_init_work_share(struct gomp_work_share * ws,bool ordered,unsigned nthreads)93*38fd1498Szrj gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
94*38fd1498Szrj unsigned nthreads)
95*38fd1498Szrj {
96*38fd1498Szrj gomp_mutex_init (&ws->lock);
97*38fd1498Szrj if (__builtin_expect (ordered, 0))
98*38fd1498Szrj {
99*38fd1498Szrj #define INLINE_ORDERED_TEAM_IDS_CNT \
100*38fd1498Szrj ((sizeof (struct gomp_work_share) \
101*38fd1498Szrj - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
102*38fd1498Szrj / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
103*38fd1498Szrj
104*38fd1498Szrj if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
105*38fd1498Szrj ws->ordered_team_ids
106*38fd1498Szrj = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
107*38fd1498Szrj else
108*38fd1498Szrj ws->ordered_team_ids = ws->inline_ordered_team_ids;
109*38fd1498Szrj memset (ws->ordered_team_ids, '\0',
110*38fd1498Szrj nthreads * sizeof (*ws->ordered_team_ids));
111*38fd1498Szrj ws->ordered_num_used = 0;
112*38fd1498Szrj ws->ordered_owner = -1;
113*38fd1498Szrj ws->ordered_cur = 0;
114*38fd1498Szrj }
115*38fd1498Szrj else
116*38fd1498Szrj ws->ordered_team_ids = NULL;
117*38fd1498Szrj gomp_ptrlock_init (&ws->next_ws, NULL);
118*38fd1498Szrj ws->threads_completed = 0;
119*38fd1498Szrj }
120*38fd1498Szrj
121*38fd1498Szrj /* Do any needed destruction of gomp_work_share fields before it
122*38fd1498Szrj is put back into free gomp_work_share cache or freed. */
123*38fd1498Szrj
124*38fd1498Szrj void
gomp_fini_work_share(struct gomp_work_share * ws)125*38fd1498Szrj gomp_fini_work_share (struct gomp_work_share *ws)
126*38fd1498Szrj {
127*38fd1498Szrj gomp_mutex_destroy (&ws->lock);
128*38fd1498Szrj if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
129*38fd1498Szrj free (ws->ordered_team_ids);
130*38fd1498Szrj gomp_ptrlock_destroy (&ws->next_ws);
131*38fd1498Szrj }
132*38fd1498Szrj
133*38fd1498Szrj /* Free a work share struct, if not orphaned, put it into current
134*38fd1498Szrj team's free gomp_work_share cache. */
135*38fd1498Szrj
136*38fd1498Szrj static inline void
free_work_share(struct gomp_team * team,struct gomp_work_share * ws)137*38fd1498Szrj free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
138*38fd1498Szrj {
139*38fd1498Szrj gomp_fini_work_share (ws);
140*38fd1498Szrj if (__builtin_expect (team == NULL, 0))
141*38fd1498Szrj free (ws);
142*38fd1498Szrj else
143*38fd1498Szrj {
144*38fd1498Szrj struct gomp_work_share *next_ws;
145*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
146*38fd1498Szrj do
147*38fd1498Szrj {
148*38fd1498Szrj next_ws = team->work_share_list_free;
149*38fd1498Szrj ws->next_free = next_ws;
150*38fd1498Szrj }
151*38fd1498Szrj while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
152*38fd1498Szrj next_ws, ws));
153*38fd1498Szrj #else
154*38fd1498Szrj gomp_mutex_lock (&team->work_share_list_free_lock);
155*38fd1498Szrj next_ws = team->work_share_list_free;
156*38fd1498Szrj ws->next_free = next_ws;
157*38fd1498Szrj team->work_share_list_free = ws;
158*38fd1498Szrj gomp_mutex_unlock (&team->work_share_list_free_lock);
159*38fd1498Szrj #endif
160*38fd1498Szrj }
161*38fd1498Szrj }
162*38fd1498Szrj
163*38fd1498Szrj /* The current thread is ready to begin the next work sharing construct.
164*38fd1498Szrj In all cases, thr->ts.work_share is updated to point to the new
165*38fd1498Szrj structure. In all cases the work_share lock is locked. Return true
166*38fd1498Szrj if this was the first thread to reach this point. */
167*38fd1498Szrj
168*38fd1498Szrj bool
gomp_work_share_start(bool ordered)169*38fd1498Szrj gomp_work_share_start (bool ordered)
170*38fd1498Szrj {
171*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
172*38fd1498Szrj struct gomp_team *team = thr->ts.team;
173*38fd1498Szrj struct gomp_work_share *ws;
174*38fd1498Szrj
175*38fd1498Szrj /* Work sharing constructs can be orphaned. */
176*38fd1498Szrj if (team == NULL)
177*38fd1498Szrj {
178*38fd1498Szrj ws = gomp_malloc (sizeof (*ws));
179*38fd1498Szrj gomp_init_work_share (ws, ordered, 1);
180*38fd1498Szrj thr->ts.work_share = ws;
181*38fd1498Szrj return ws;
182*38fd1498Szrj }
183*38fd1498Szrj
184*38fd1498Szrj ws = thr->ts.work_share;
185*38fd1498Szrj thr->ts.last_work_share = ws;
186*38fd1498Szrj ws = gomp_ptrlock_get (&ws->next_ws);
187*38fd1498Szrj if (ws == NULL)
188*38fd1498Szrj {
189*38fd1498Szrj /* This thread encountered a new ws first. */
190*38fd1498Szrj struct gomp_work_share *ws = alloc_work_share (team);
191*38fd1498Szrj gomp_init_work_share (ws, ordered, team->nthreads);
192*38fd1498Szrj thr->ts.work_share = ws;
193*38fd1498Szrj return true;
194*38fd1498Szrj }
195*38fd1498Szrj else
196*38fd1498Szrj {
197*38fd1498Szrj thr->ts.work_share = ws;
198*38fd1498Szrj return false;
199*38fd1498Szrj }
200*38fd1498Szrj }
201*38fd1498Szrj
202*38fd1498Szrj /* The current thread is done with its current work sharing construct.
203*38fd1498Szrj This version does imply a barrier at the end of the work-share. */
204*38fd1498Szrj
205*38fd1498Szrj void
gomp_work_share_end(void)206*38fd1498Szrj gomp_work_share_end (void)
207*38fd1498Szrj {
208*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
209*38fd1498Szrj struct gomp_team *team = thr->ts.team;
210*38fd1498Szrj gomp_barrier_state_t bstate;
211*38fd1498Szrj
212*38fd1498Szrj /* Work sharing constructs can be orphaned. */
213*38fd1498Szrj if (team == NULL)
214*38fd1498Szrj {
215*38fd1498Szrj free_work_share (NULL, thr->ts.work_share);
216*38fd1498Szrj thr->ts.work_share = NULL;
217*38fd1498Szrj return;
218*38fd1498Szrj }
219*38fd1498Szrj
220*38fd1498Szrj bstate = gomp_barrier_wait_start (&team->barrier);
221*38fd1498Szrj
222*38fd1498Szrj if (gomp_barrier_last_thread (bstate))
223*38fd1498Szrj {
224*38fd1498Szrj if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
225*38fd1498Szrj {
226*38fd1498Szrj team->work_shares_to_free = thr->ts.work_share;
227*38fd1498Szrj free_work_share (team, thr->ts.last_work_share);
228*38fd1498Szrj }
229*38fd1498Szrj }
230*38fd1498Szrj
231*38fd1498Szrj gomp_team_barrier_wait_end (&team->barrier, bstate);
232*38fd1498Szrj thr->ts.last_work_share = NULL;
233*38fd1498Szrj }
234*38fd1498Szrj
235*38fd1498Szrj /* The current thread is done with its current work sharing construct.
236*38fd1498Szrj This version implies a cancellable barrier at the end of the work-share. */
237*38fd1498Szrj
238*38fd1498Szrj bool
gomp_work_share_end_cancel(void)239*38fd1498Szrj gomp_work_share_end_cancel (void)
240*38fd1498Szrj {
241*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
242*38fd1498Szrj struct gomp_team *team = thr->ts.team;
243*38fd1498Szrj gomp_barrier_state_t bstate;
244*38fd1498Szrj
245*38fd1498Szrj /* Cancellable work sharing constructs cannot be orphaned. */
246*38fd1498Szrj bstate = gomp_barrier_wait_cancel_start (&team->barrier);
247*38fd1498Szrj
248*38fd1498Szrj if (gomp_barrier_last_thread (bstate))
249*38fd1498Szrj {
250*38fd1498Szrj if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
251*38fd1498Szrj {
252*38fd1498Szrj team->work_shares_to_free = thr->ts.work_share;
253*38fd1498Szrj free_work_share (team, thr->ts.last_work_share);
254*38fd1498Szrj }
255*38fd1498Szrj }
256*38fd1498Szrj thr->ts.last_work_share = NULL;
257*38fd1498Szrj
258*38fd1498Szrj return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
259*38fd1498Szrj }
260*38fd1498Szrj
261*38fd1498Szrj /* The current thread is done with its current work sharing construct.
262*38fd1498Szrj This version does NOT imply a barrier at the end of the work-share. */
263*38fd1498Szrj
264*38fd1498Szrj void
gomp_work_share_end_nowait(void)265*38fd1498Szrj gomp_work_share_end_nowait (void)
266*38fd1498Szrj {
267*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
268*38fd1498Szrj struct gomp_team *team = thr->ts.team;
269*38fd1498Szrj struct gomp_work_share *ws = thr->ts.work_share;
270*38fd1498Szrj unsigned completed;
271*38fd1498Szrj
272*38fd1498Szrj /* Work sharing constructs can be orphaned. */
273*38fd1498Szrj if (team == NULL)
274*38fd1498Szrj {
275*38fd1498Szrj free_work_share (NULL, ws);
276*38fd1498Szrj thr->ts.work_share = NULL;
277*38fd1498Szrj return;
278*38fd1498Szrj }
279*38fd1498Szrj
280*38fd1498Szrj if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
281*38fd1498Szrj return;
282*38fd1498Szrj
283*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
284*38fd1498Szrj completed = __sync_add_and_fetch (&ws->threads_completed, 1);
285*38fd1498Szrj #else
286*38fd1498Szrj gomp_mutex_lock (&ws->lock);
287*38fd1498Szrj completed = ++ws->threads_completed;
288*38fd1498Szrj gomp_mutex_unlock (&ws->lock);
289*38fd1498Szrj #endif
290*38fd1498Szrj
291*38fd1498Szrj if (completed == team->nthreads)
292*38fd1498Szrj {
293*38fd1498Szrj team->work_shares_to_free = thr->ts.work_share;
294*38fd1498Szrj free_work_share (team, thr->ts.last_work_share);
295*38fd1498Szrj }
296*38fd1498Szrj thr->ts.last_work_share = NULL;
297*38fd1498Szrj }
298