1*629ff9f7SJohn Marino /* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
2*629ff9f7SJohn Marino Contributed by Richard Henderson <rth@redhat.com>.
3*629ff9f7SJohn Marino
4*629ff9f7SJohn Marino This file is part of the GNU OpenMP Library (libgomp).
5*629ff9f7SJohn Marino
6*629ff9f7SJohn Marino Libgomp is free software; you can redistribute it and/or modify it
7*629ff9f7SJohn Marino under the terms of the GNU General Public License as published by
8*629ff9f7SJohn Marino the Free Software Foundation; either version 3, or (at your option)
9*629ff9f7SJohn Marino any later version.
10*629ff9f7SJohn Marino
11*629ff9f7SJohn Marino Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12*629ff9f7SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13*629ff9f7SJohn Marino FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14*629ff9f7SJohn Marino more details.
15*629ff9f7SJohn Marino
16*629ff9f7SJohn Marino Under Section 7 of GPL version 3, you are granted additional
17*629ff9f7SJohn Marino permissions described in the GCC Runtime Library Exception, version
18*629ff9f7SJohn Marino 3.1, as published by the Free Software Foundation.
19*629ff9f7SJohn Marino
20*629ff9f7SJohn Marino You should have received a copy of the GNU General Public License and
21*629ff9f7SJohn Marino a copy of the GCC Runtime Library Exception along with this program;
22*629ff9f7SJohn Marino see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*629ff9f7SJohn Marino <http://www.gnu.org/licenses/>. */
24*629ff9f7SJohn Marino
25*629ff9f7SJohn Marino /* This file contains routines for managing work-share iteration, both
26*629ff9f7SJohn Marino for loops and sections. */
27*629ff9f7SJohn Marino
28*629ff9f7SJohn Marino #include "libgomp.h"
29*629ff9f7SJohn Marino #include <stdlib.h>
30*629ff9f7SJohn Marino
31*629ff9f7SJohn Marino typedef unsigned long long gomp_ull;
32*629ff9f7SJohn Marino
33*629ff9f7SJohn Marino /* This function implements the STATIC scheduling method. The caller should
34*629ff9f7SJohn Marino iterate *pstart <= x < *pend. Return zero if there are more iterations
35*629ff9f7SJohn Marino to perform; nonzero if not. Return less than 0 if this thread had
36*629ff9f7SJohn Marino received the absolutely last iteration. */
37*629ff9f7SJohn Marino
38*629ff9f7SJohn Marino int
gomp_iter_ull_static_next(gomp_ull * pstart,gomp_ull * pend)39*629ff9f7SJohn Marino gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
40*629ff9f7SJohn Marino {
41*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
42*629ff9f7SJohn Marino struct gomp_team *team = thr->ts.team;
43*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
44*629ff9f7SJohn Marino unsigned long nthreads = team ? team->nthreads : 1;
45*629ff9f7SJohn Marino
46*629ff9f7SJohn Marino if (thr->ts.static_trip == -1)
47*629ff9f7SJohn Marino return -1;
48*629ff9f7SJohn Marino
49*629ff9f7SJohn Marino /* Quick test for degenerate teams and orphaned constructs. */
50*629ff9f7SJohn Marino if (nthreads == 1)
51*629ff9f7SJohn Marino {
52*629ff9f7SJohn Marino *pstart = ws->next_ull;
53*629ff9f7SJohn Marino *pend = ws->end_ull;
54*629ff9f7SJohn Marino thr->ts.static_trip = -1;
55*629ff9f7SJohn Marino return ws->next_ull == ws->end_ull;
56*629ff9f7SJohn Marino }
57*629ff9f7SJohn Marino
58*629ff9f7SJohn Marino /* We interpret chunk_size zero as "unspecified", which means that we
59*629ff9f7SJohn Marino should break up the iterations such that each thread makes only one
60*629ff9f7SJohn Marino trip through the outer loop. */
61*629ff9f7SJohn Marino if (ws->chunk_size_ull == 0)
62*629ff9f7SJohn Marino {
63*629ff9f7SJohn Marino gomp_ull n, q, i, t, s0, e0, s, e;
64*629ff9f7SJohn Marino
65*629ff9f7SJohn Marino if (thr->ts.static_trip > 0)
66*629ff9f7SJohn Marino return 1;
67*629ff9f7SJohn Marino
68*629ff9f7SJohn Marino /* Compute the total number of iterations. */
69*629ff9f7SJohn Marino if (__builtin_expect (ws->mode, 0) == 0)
70*629ff9f7SJohn Marino n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71*629ff9f7SJohn Marino else
72*629ff9f7SJohn Marino n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73*629ff9f7SJohn Marino i = thr->ts.team_id;
74*629ff9f7SJohn Marino
75*629ff9f7SJohn Marino /* Compute the "zero-based" start and end points. That is, as
76*629ff9f7SJohn Marino if the loop began at zero and incremented by one. */
77*629ff9f7SJohn Marino q = n / nthreads;
78*629ff9f7SJohn Marino t = n % nthreads;
79*629ff9f7SJohn Marino if (i < t)
80*629ff9f7SJohn Marino {
81*629ff9f7SJohn Marino t = 0;
82*629ff9f7SJohn Marino q++;
83*629ff9f7SJohn Marino }
84*629ff9f7SJohn Marino s0 = q * i + t;
85*629ff9f7SJohn Marino e0 = s0 + q;
86*629ff9f7SJohn Marino
87*629ff9f7SJohn Marino /* Notice when no iterations allocated for this thread. */
88*629ff9f7SJohn Marino if (s0 >= e0)
89*629ff9f7SJohn Marino {
90*629ff9f7SJohn Marino thr->ts.static_trip = 1;
91*629ff9f7SJohn Marino return 1;
92*629ff9f7SJohn Marino }
93*629ff9f7SJohn Marino
94*629ff9f7SJohn Marino /* Transform these to the actual start and end numbers. */
95*629ff9f7SJohn Marino s = s0 * ws->incr_ull + ws->next_ull;
96*629ff9f7SJohn Marino e = e0 * ws->incr_ull + ws->next_ull;
97*629ff9f7SJohn Marino
98*629ff9f7SJohn Marino *pstart = s;
99*629ff9f7SJohn Marino *pend = e;
100*629ff9f7SJohn Marino thr->ts.static_trip = (e0 == n ? -1 : 1);
101*629ff9f7SJohn Marino return 0;
102*629ff9f7SJohn Marino }
103*629ff9f7SJohn Marino else
104*629ff9f7SJohn Marino {
105*629ff9f7SJohn Marino gomp_ull n, s0, e0, i, c, s, e;
106*629ff9f7SJohn Marino
107*629ff9f7SJohn Marino /* Otherwise, each thread gets exactly chunk_size iterations
108*629ff9f7SJohn Marino (if available) each time through the loop. */
109*629ff9f7SJohn Marino
110*629ff9f7SJohn Marino if (__builtin_expect (ws->mode, 0) == 0)
111*629ff9f7SJohn Marino n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
112*629ff9f7SJohn Marino else
113*629ff9f7SJohn Marino n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
114*629ff9f7SJohn Marino i = thr->ts.team_id;
115*629ff9f7SJohn Marino c = ws->chunk_size_ull;
116*629ff9f7SJohn Marino
117*629ff9f7SJohn Marino /* Initial guess is a C sized chunk positioned nthreads iterations
118*629ff9f7SJohn Marino in, offset by our thread number. */
119*629ff9f7SJohn Marino s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
120*629ff9f7SJohn Marino e0 = s0 + c;
121*629ff9f7SJohn Marino
122*629ff9f7SJohn Marino /* Detect overflow. */
123*629ff9f7SJohn Marino if (s0 >= n)
124*629ff9f7SJohn Marino return 1;
125*629ff9f7SJohn Marino if (e0 > n)
126*629ff9f7SJohn Marino e0 = n;
127*629ff9f7SJohn Marino
128*629ff9f7SJohn Marino /* Transform these to the actual start and end numbers. */
129*629ff9f7SJohn Marino s = s0 * ws->incr_ull + ws->next_ull;
130*629ff9f7SJohn Marino e = e0 * ws->incr_ull + ws->next_ull;
131*629ff9f7SJohn Marino
132*629ff9f7SJohn Marino *pstart = s;
133*629ff9f7SJohn Marino *pend = e;
134*629ff9f7SJohn Marino
135*629ff9f7SJohn Marino if (e0 == n)
136*629ff9f7SJohn Marino thr->ts.static_trip = -1;
137*629ff9f7SJohn Marino else
138*629ff9f7SJohn Marino thr->ts.static_trip++;
139*629ff9f7SJohn Marino return 0;
140*629ff9f7SJohn Marino }
141*629ff9f7SJohn Marino }
142*629ff9f7SJohn Marino
143*629ff9f7SJohn Marino
144*629ff9f7SJohn Marino /* This function implements the DYNAMIC scheduling method. Arguments are
145*629ff9f7SJohn Marino as for gomp_iter_ull_static_next. This function must be called with
146*629ff9f7SJohn Marino ws->lock held. */
147*629ff9f7SJohn Marino
148*629ff9f7SJohn Marino bool
gomp_iter_ull_dynamic_next_locked(gomp_ull * pstart,gomp_ull * pend)149*629ff9f7SJohn Marino gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
150*629ff9f7SJohn Marino {
151*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
152*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
153*629ff9f7SJohn Marino gomp_ull start, end, chunk, left;
154*629ff9f7SJohn Marino
155*629ff9f7SJohn Marino start = ws->next_ull;
156*629ff9f7SJohn Marino if (start == ws->end_ull)
157*629ff9f7SJohn Marino return false;
158*629ff9f7SJohn Marino
159*629ff9f7SJohn Marino chunk = ws->chunk_size_ull;
160*629ff9f7SJohn Marino left = ws->end_ull - start;
161*629ff9f7SJohn Marino if (__builtin_expect (ws->mode & 2, 0))
162*629ff9f7SJohn Marino {
163*629ff9f7SJohn Marino if (chunk < left)
164*629ff9f7SJohn Marino chunk = left;
165*629ff9f7SJohn Marino }
166*629ff9f7SJohn Marino else
167*629ff9f7SJohn Marino {
168*629ff9f7SJohn Marino if (chunk > left)
169*629ff9f7SJohn Marino chunk = left;
170*629ff9f7SJohn Marino }
171*629ff9f7SJohn Marino end = start + chunk;
172*629ff9f7SJohn Marino
173*629ff9f7SJohn Marino ws->next_ull = end;
174*629ff9f7SJohn Marino *pstart = start;
175*629ff9f7SJohn Marino *pend = end;
176*629ff9f7SJohn Marino return true;
177*629ff9f7SJohn Marino }
178*629ff9f7SJohn Marino
179*629ff9f7SJohn Marino
180*629ff9f7SJohn Marino #if defined HAVE_SYNC_BUILTINS && defined __LP64__
181*629ff9f7SJohn Marino /* Similar, but doesn't require the lock held, and uses compare-and-swap
182*629ff9f7SJohn Marino instead. Note that the only memory value that changes is ws->next_ull. */
183*629ff9f7SJohn Marino
184*629ff9f7SJohn Marino bool
gomp_iter_ull_dynamic_next(gomp_ull * pstart,gomp_ull * pend)185*629ff9f7SJohn Marino gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
186*629ff9f7SJohn Marino {
187*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
188*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
189*629ff9f7SJohn Marino gomp_ull start, end, nend, chunk;
190*629ff9f7SJohn Marino
191*629ff9f7SJohn Marino end = ws->end_ull;
192*629ff9f7SJohn Marino chunk = ws->chunk_size_ull;
193*629ff9f7SJohn Marino
194*629ff9f7SJohn Marino if (__builtin_expect (ws->mode & 1, 1))
195*629ff9f7SJohn Marino {
196*629ff9f7SJohn Marino gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
197*629ff9f7SJohn Marino if (__builtin_expect (ws->mode & 2, 0) == 0)
198*629ff9f7SJohn Marino {
199*629ff9f7SJohn Marino if (tmp >= end)
200*629ff9f7SJohn Marino return false;
201*629ff9f7SJohn Marino nend = tmp + chunk;
202*629ff9f7SJohn Marino if (nend > end)
203*629ff9f7SJohn Marino nend = end;
204*629ff9f7SJohn Marino *pstart = tmp;
205*629ff9f7SJohn Marino *pend = nend;
206*629ff9f7SJohn Marino return true;
207*629ff9f7SJohn Marino }
208*629ff9f7SJohn Marino else
209*629ff9f7SJohn Marino {
210*629ff9f7SJohn Marino if (tmp <= end)
211*629ff9f7SJohn Marino return false;
212*629ff9f7SJohn Marino nend = tmp + chunk;
213*629ff9f7SJohn Marino if (nend < end)
214*629ff9f7SJohn Marino nend = end;
215*629ff9f7SJohn Marino *pstart = tmp;
216*629ff9f7SJohn Marino *pend = nend;
217*629ff9f7SJohn Marino return true;
218*629ff9f7SJohn Marino }
219*629ff9f7SJohn Marino }
220*629ff9f7SJohn Marino
221*629ff9f7SJohn Marino start = ws->next_ull;
222*629ff9f7SJohn Marino while (1)
223*629ff9f7SJohn Marino {
224*629ff9f7SJohn Marino gomp_ull left = end - start;
225*629ff9f7SJohn Marino gomp_ull tmp;
226*629ff9f7SJohn Marino
227*629ff9f7SJohn Marino if (start == end)
228*629ff9f7SJohn Marino return false;
229*629ff9f7SJohn Marino
230*629ff9f7SJohn Marino if (__builtin_expect (ws->mode & 2, 0))
231*629ff9f7SJohn Marino {
232*629ff9f7SJohn Marino if (chunk < left)
233*629ff9f7SJohn Marino chunk = left;
234*629ff9f7SJohn Marino }
235*629ff9f7SJohn Marino else
236*629ff9f7SJohn Marino {
237*629ff9f7SJohn Marino if (chunk > left)
238*629ff9f7SJohn Marino chunk = left;
239*629ff9f7SJohn Marino }
240*629ff9f7SJohn Marino nend = start + chunk;
241*629ff9f7SJohn Marino
242*629ff9f7SJohn Marino tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
243*629ff9f7SJohn Marino if (__builtin_expect (tmp == start, 1))
244*629ff9f7SJohn Marino break;
245*629ff9f7SJohn Marino
246*629ff9f7SJohn Marino start = tmp;
247*629ff9f7SJohn Marino }
248*629ff9f7SJohn Marino
249*629ff9f7SJohn Marino *pstart = start;
250*629ff9f7SJohn Marino *pend = nend;
251*629ff9f7SJohn Marino return true;
252*629ff9f7SJohn Marino }
253*629ff9f7SJohn Marino #endif /* HAVE_SYNC_BUILTINS */
254*629ff9f7SJohn Marino
255*629ff9f7SJohn Marino
256*629ff9f7SJohn Marino /* This function implements the GUIDED scheduling method. Arguments are
257*629ff9f7SJohn Marino as for gomp_iter_ull_static_next. This function must be called with the
258*629ff9f7SJohn Marino work share lock held. */
259*629ff9f7SJohn Marino
260*629ff9f7SJohn Marino bool
gomp_iter_ull_guided_next_locked(gomp_ull * pstart,gomp_ull * pend)261*629ff9f7SJohn Marino gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
262*629ff9f7SJohn Marino {
263*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
264*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
265*629ff9f7SJohn Marino struct gomp_team *team = thr->ts.team;
266*629ff9f7SJohn Marino gomp_ull nthreads = team ? team->nthreads : 1;
267*629ff9f7SJohn Marino gomp_ull n, q;
268*629ff9f7SJohn Marino gomp_ull start, end;
269*629ff9f7SJohn Marino
270*629ff9f7SJohn Marino if (ws->next_ull == ws->end_ull)
271*629ff9f7SJohn Marino return false;
272*629ff9f7SJohn Marino
273*629ff9f7SJohn Marino start = ws->next_ull;
274*629ff9f7SJohn Marino if (__builtin_expect (ws->mode, 0) == 0)
275*629ff9f7SJohn Marino n = (ws->end_ull - start) / ws->incr_ull;
276*629ff9f7SJohn Marino else
277*629ff9f7SJohn Marino n = (start - ws->end_ull) / -ws->incr_ull;
278*629ff9f7SJohn Marino q = (n + nthreads - 1) / nthreads;
279*629ff9f7SJohn Marino
280*629ff9f7SJohn Marino if (q < ws->chunk_size_ull)
281*629ff9f7SJohn Marino q = ws->chunk_size_ull;
282*629ff9f7SJohn Marino if (q <= n)
283*629ff9f7SJohn Marino end = start + q * ws->incr_ull;
284*629ff9f7SJohn Marino else
285*629ff9f7SJohn Marino end = ws->end_ull;
286*629ff9f7SJohn Marino
287*629ff9f7SJohn Marino ws->next_ull = end;
288*629ff9f7SJohn Marino *pstart = start;
289*629ff9f7SJohn Marino *pend = end;
290*629ff9f7SJohn Marino return true;
291*629ff9f7SJohn Marino }
292*629ff9f7SJohn Marino
293*629ff9f7SJohn Marino #if defined HAVE_SYNC_BUILTINS && defined __LP64__
294*629ff9f7SJohn Marino /* Similar, but doesn't require the lock held, and uses compare-and-swap
295*629ff9f7SJohn Marino instead. Note that the only memory value that changes is ws->next_ull. */
296*629ff9f7SJohn Marino
297*629ff9f7SJohn Marino bool
gomp_iter_ull_guided_next(gomp_ull * pstart,gomp_ull * pend)298*629ff9f7SJohn Marino gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
299*629ff9f7SJohn Marino {
300*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
301*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
302*629ff9f7SJohn Marino struct gomp_team *team = thr->ts.team;
303*629ff9f7SJohn Marino gomp_ull nthreads = team ? team->nthreads : 1;
304*629ff9f7SJohn Marino gomp_ull start, end, nend, incr;
305*629ff9f7SJohn Marino gomp_ull chunk_size;
306*629ff9f7SJohn Marino
307*629ff9f7SJohn Marino start = ws->next_ull;
308*629ff9f7SJohn Marino end = ws->end_ull;
309*629ff9f7SJohn Marino incr = ws->incr_ull;
310*629ff9f7SJohn Marino chunk_size = ws->chunk_size_ull;
311*629ff9f7SJohn Marino
312*629ff9f7SJohn Marino while (1)
313*629ff9f7SJohn Marino {
314*629ff9f7SJohn Marino gomp_ull n, q;
315*629ff9f7SJohn Marino gomp_ull tmp;
316*629ff9f7SJohn Marino
317*629ff9f7SJohn Marino if (start == end)
318*629ff9f7SJohn Marino return false;
319*629ff9f7SJohn Marino
320*629ff9f7SJohn Marino if (__builtin_expect (ws->mode, 0) == 0)
321*629ff9f7SJohn Marino n = (end - start) / incr;
322*629ff9f7SJohn Marino else
323*629ff9f7SJohn Marino n = (start - end) / -incr;
324*629ff9f7SJohn Marino q = (n + nthreads - 1) / nthreads;
325*629ff9f7SJohn Marino
326*629ff9f7SJohn Marino if (q < chunk_size)
327*629ff9f7SJohn Marino q = chunk_size;
328*629ff9f7SJohn Marino if (__builtin_expect (q <= n, 1))
329*629ff9f7SJohn Marino nend = start + q * incr;
330*629ff9f7SJohn Marino else
331*629ff9f7SJohn Marino nend = end;
332*629ff9f7SJohn Marino
333*629ff9f7SJohn Marino tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
334*629ff9f7SJohn Marino if (__builtin_expect (tmp == start, 1))
335*629ff9f7SJohn Marino break;
336*629ff9f7SJohn Marino
337*629ff9f7SJohn Marino start = tmp;
338*629ff9f7SJohn Marino }
339*629ff9f7SJohn Marino
340*629ff9f7SJohn Marino *pstart = start;
341*629ff9f7SJohn Marino *pend = nend;
342*629ff9f7SJohn Marino return true;
343*629ff9f7SJohn Marino }
344*629ff9f7SJohn Marino #endif /* HAVE_SYNC_BUILTINS */
345