1*404b540aSrobert /* Copyright (C) 2005 Free Software Foundation, Inc.
2*404b540aSrobert Contributed by Richard Henderson <rth@redhat.com>.
3*404b540aSrobert
4*404b540aSrobert This file is part of the GNU OpenMP Library (libgomp).
5*404b540aSrobert
6*404b540aSrobert Libgomp is free software; you can redistribute it and/or modify it
7*404b540aSrobert under the terms of the GNU Lesser General Public License as published by
8*404b540aSrobert the Free Software Foundation; either version 2.1 of the License, or
9*404b540aSrobert (at your option) any later version.
10*404b540aSrobert
11*404b540aSrobert Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12*404b540aSrobert WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13*404b540aSrobert FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
14*404b540aSrobert more details.
15*404b540aSrobert
16*404b540aSrobert You should have received a copy of the GNU Lesser General Public License
17*404b540aSrobert along with libgomp; see the file COPYING.LIB. If not, write to the
18*404b540aSrobert Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19*404b540aSrobert MA 02110-1301, USA. */
20*404b540aSrobert
21*404b540aSrobert /* As a special exception, if you link this library with other files, some
22*404b540aSrobert of which are compiled with GCC, to produce an executable, this library
23*404b540aSrobert does not by itself cause the resulting executable to be covered by the
24*404b540aSrobert GNU General Public License. This exception does not however invalidate
25*404b540aSrobert any other reasons why the executable file might be covered by the GNU
26*404b540aSrobert General Public License. */
27*404b540aSrobert
28*404b540aSrobert /* This file contains routines to manage the work-share queue for a team
29*404b540aSrobert of threads. */
30*404b540aSrobert
31*404b540aSrobert #include "libgomp.h"
32*404b540aSrobert #include <stdlib.h>
33*404b540aSrobert #include <string.h>
34*404b540aSrobert
35*404b540aSrobert
36*404b540aSrobert /* Create a new work share structure. */
37*404b540aSrobert
38*404b540aSrobert struct gomp_work_share *
gomp_new_work_share(bool ordered,unsigned nthreads)39*404b540aSrobert gomp_new_work_share (bool ordered, unsigned nthreads)
40*404b540aSrobert {
41*404b540aSrobert struct gomp_work_share *ws;
42*404b540aSrobert size_t size;
43*404b540aSrobert
44*404b540aSrobert size = sizeof (*ws);
45*404b540aSrobert if (ordered)
46*404b540aSrobert size += nthreads * sizeof (ws->ordered_team_ids[0]);
47*404b540aSrobert
48*404b540aSrobert ws = gomp_malloc_cleared (size);
49*404b540aSrobert gomp_mutex_init (&ws->lock);
50*404b540aSrobert ws->ordered_owner = -1;
51*404b540aSrobert
52*404b540aSrobert return ws;
53*404b540aSrobert }
54*404b540aSrobert
55*404b540aSrobert
56*404b540aSrobert /* Free a work share structure. */
57*404b540aSrobert
58*404b540aSrobert static void
free_work_share(struct gomp_work_share * ws)59*404b540aSrobert free_work_share (struct gomp_work_share *ws)
60*404b540aSrobert {
61*404b540aSrobert gomp_mutex_destroy (&ws->lock);
62*404b540aSrobert free (ws);
63*404b540aSrobert }
64*404b540aSrobert
65*404b540aSrobert
66*404b540aSrobert /* The current thread is ready to begin the next work sharing construct.
67*404b540aSrobert In all cases, thr->ts.work_share is updated to point to the new
68*404b540aSrobert structure. In all cases the work_share lock is locked. Return true
69*404b540aSrobert if this was the first thread to reach this point. */
70*404b540aSrobert
71*404b540aSrobert bool
gomp_work_share_start(bool ordered)72*404b540aSrobert gomp_work_share_start (bool ordered)
73*404b540aSrobert {
74*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
75*404b540aSrobert struct gomp_team *team = thr->ts.team;
76*404b540aSrobert struct gomp_work_share *ws;
77*404b540aSrobert unsigned ws_index, ws_gen;
78*404b540aSrobert
79*404b540aSrobert /* Work sharing constructs can be orphaned. */
80*404b540aSrobert if (team == NULL)
81*404b540aSrobert {
82*404b540aSrobert ws = gomp_new_work_share (ordered, 1);
83*404b540aSrobert thr->ts.work_share = ws;
84*404b540aSrobert thr->ts.static_trip = 0;
85*404b540aSrobert gomp_mutex_lock (&ws->lock);
86*404b540aSrobert return true;
87*404b540aSrobert }
88*404b540aSrobert
89*404b540aSrobert gomp_mutex_lock (&team->work_share_lock);
90*404b540aSrobert
91*404b540aSrobert /* This thread is beginning its next generation. */
92*404b540aSrobert ws_gen = ++thr->ts.work_share_generation;
93*404b540aSrobert
94*404b540aSrobert /* If this next generation is not newer than any other generation in
95*404b540aSrobert the team, then simply reference the existing construct. */
96*404b540aSrobert if (ws_gen - team->oldest_live_gen < team->num_live_gen)
97*404b540aSrobert {
98*404b540aSrobert ws_index = ws_gen & team->generation_mask;
99*404b540aSrobert ws = team->work_shares[ws_index];
100*404b540aSrobert thr->ts.work_share = ws;
101*404b540aSrobert thr->ts.static_trip = 0;
102*404b540aSrobert
103*404b540aSrobert gomp_mutex_lock (&ws->lock);
104*404b540aSrobert gomp_mutex_unlock (&team->work_share_lock);
105*404b540aSrobert
106*404b540aSrobert return false;
107*404b540aSrobert }
108*404b540aSrobert
109*404b540aSrobert /* Resize the work shares queue if we've run out of space. */
110*404b540aSrobert if (team->num_live_gen++ == team->generation_mask)
111*404b540aSrobert {
112*404b540aSrobert team->work_shares = gomp_realloc (team->work_shares,
113*404b540aSrobert 2 * team->num_live_gen
114*404b540aSrobert * sizeof (*team->work_shares));
115*404b540aSrobert
116*404b540aSrobert /* Unless oldest_live_gen is zero, the sequence of live elements
117*404b540aSrobert wraps around the end of the array. If we do nothing, we break
118*404b540aSrobert lookup of the existing elements. Fix that by unwrapping the
119*404b540aSrobert data from the front to the end. */
120*404b540aSrobert if (team->oldest_live_gen > 0)
121*404b540aSrobert memcpy (team->work_shares + team->num_live_gen,
122*404b540aSrobert team->work_shares,
123*404b540aSrobert (team->oldest_live_gen & team->generation_mask)
124*404b540aSrobert * sizeof (*team->work_shares));
125*404b540aSrobert
126*404b540aSrobert team->generation_mask = team->generation_mask * 2 + 1;
127*404b540aSrobert }
128*404b540aSrobert
129*404b540aSrobert ws_index = ws_gen & team->generation_mask;
130*404b540aSrobert ws = gomp_new_work_share (ordered, team->nthreads);
131*404b540aSrobert thr->ts.work_share = ws;
132*404b540aSrobert thr->ts.static_trip = 0;
133*404b540aSrobert team->work_shares[ws_index] = ws;
134*404b540aSrobert
135*404b540aSrobert gomp_mutex_lock (&ws->lock);
136*404b540aSrobert gomp_mutex_unlock (&team->work_share_lock);
137*404b540aSrobert
138*404b540aSrobert return true;
139*404b540aSrobert }
140*404b540aSrobert
141*404b540aSrobert
142*404b540aSrobert /* The current thread is done with its current work sharing construct.
143*404b540aSrobert This version does imply a barrier at the end of the work-share. */
144*404b540aSrobert
145*404b540aSrobert void
gomp_work_share_end(void)146*404b540aSrobert gomp_work_share_end (void)
147*404b540aSrobert {
148*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
149*404b540aSrobert struct gomp_team *team = thr->ts.team;
150*404b540aSrobert struct gomp_work_share *ws = thr->ts.work_share;
151*404b540aSrobert bool last;
152*404b540aSrobert
153*404b540aSrobert thr->ts.work_share = NULL;
154*404b540aSrobert
155*404b540aSrobert /* Work sharing constructs can be orphaned. */
156*404b540aSrobert if (team == NULL)
157*404b540aSrobert {
158*404b540aSrobert free_work_share (ws);
159*404b540aSrobert return;
160*404b540aSrobert }
161*404b540aSrobert
162*404b540aSrobert last = gomp_barrier_wait_start (&team->barrier);
163*404b540aSrobert
164*404b540aSrobert if (last)
165*404b540aSrobert {
166*404b540aSrobert unsigned ws_index;
167*404b540aSrobert
168*404b540aSrobert ws_index = thr->ts.work_share_generation & team->generation_mask;
169*404b540aSrobert team->work_shares[ws_index] = NULL;
170*404b540aSrobert team->oldest_live_gen++;
171*404b540aSrobert team->num_live_gen = 0;
172*404b540aSrobert
173*404b540aSrobert free_work_share (ws);
174*404b540aSrobert }
175*404b540aSrobert
176*404b540aSrobert gomp_barrier_wait_end (&team->barrier, last);
177*404b540aSrobert }
178*404b540aSrobert
179*404b540aSrobert
180*404b540aSrobert /* The current thread is done with its current work sharing construct.
181*404b540aSrobert This version does NOT imply a barrier at the end of the work-share. */
182*404b540aSrobert
183*404b540aSrobert void
gomp_work_share_end_nowait(void)184*404b540aSrobert gomp_work_share_end_nowait (void)
185*404b540aSrobert {
186*404b540aSrobert struct gomp_thread *thr = gomp_thread ();
187*404b540aSrobert struct gomp_team *team = thr->ts.team;
188*404b540aSrobert struct gomp_work_share *ws = thr->ts.work_share;
189*404b540aSrobert unsigned completed;
190*404b540aSrobert
191*404b540aSrobert thr->ts.work_share = NULL;
192*404b540aSrobert
193*404b540aSrobert /* Work sharing constructs can be orphaned. */
194*404b540aSrobert if (team == NULL)
195*404b540aSrobert {
196*404b540aSrobert free_work_share (ws);
197*404b540aSrobert return;
198*404b540aSrobert }
199*404b540aSrobert
200*404b540aSrobert #ifdef HAVE_SYNC_BUILTINS
201*404b540aSrobert completed = __sync_add_and_fetch (&ws->threads_completed, 1);
202*404b540aSrobert #else
203*404b540aSrobert gomp_mutex_lock (&ws->lock);
204*404b540aSrobert completed = ++ws->threads_completed;
205*404b540aSrobert gomp_mutex_unlock (&ws->lock);
206*404b540aSrobert #endif
207*404b540aSrobert
208*404b540aSrobert if (completed == team->nthreads)
209*404b540aSrobert {
210*404b540aSrobert unsigned ws_index;
211*404b540aSrobert
212*404b540aSrobert gomp_mutex_lock (&team->work_share_lock);
213*404b540aSrobert
214*404b540aSrobert ws_index = thr->ts.work_share_generation & team->generation_mask;
215*404b540aSrobert team->work_shares[ws_index] = NULL;
216*404b540aSrobert team->oldest_live_gen++;
217*404b540aSrobert team->num_live_gen--;
218*404b540aSrobert
219*404b540aSrobert gomp_mutex_unlock (&team->work_share_lock);
220*404b540aSrobert
221*404b540aSrobert free_work_share (ws);
222*404b540aSrobert }
223*404b540aSrobert }
224