1*629ff9f7SJohn Marino /* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
2*629ff9f7SJohn Marino Contributed by Richard Henderson <rth@redhat.com>.
3*629ff9f7SJohn Marino
4*629ff9f7SJohn Marino This file is part of the GNU OpenMP Library (libgomp).
5*629ff9f7SJohn Marino
6*629ff9f7SJohn Marino Libgomp is free software; you can redistribute it and/or modify it
7*629ff9f7SJohn Marino under the terms of the GNU General Public License as published by
8*629ff9f7SJohn Marino the Free Software Foundation; either version 3, or (at your option)
9*629ff9f7SJohn Marino any later version.
10*629ff9f7SJohn Marino
11*629ff9f7SJohn Marino Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12*629ff9f7SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13*629ff9f7SJohn Marino FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14*629ff9f7SJohn Marino more details.
15*629ff9f7SJohn Marino
16*629ff9f7SJohn Marino Under Section 7 of GPL version 3, you are granted additional
17*629ff9f7SJohn Marino permissions described in the GCC Runtime Library Exception, version
18*629ff9f7SJohn Marino 3.1, as published by the Free Software Foundation.
19*629ff9f7SJohn Marino
20*629ff9f7SJohn Marino You should have received a copy of the GNU General Public License and
21*629ff9f7SJohn Marino a copy of the GCC Runtime Library Exception along with this program;
22*629ff9f7SJohn Marino see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*629ff9f7SJohn Marino <http://www.gnu.org/licenses/>. */
24*629ff9f7SJohn Marino
25*629ff9f7SJohn Marino /* This file contains routines for managing work-share iteration, both
26*629ff9f7SJohn Marino for loops and sections. */
27*629ff9f7SJohn Marino
28*629ff9f7SJohn Marino #include "libgomp.h"
29*629ff9f7SJohn Marino #include <stdlib.h>
30*629ff9f7SJohn Marino
31*629ff9f7SJohn Marino
32*629ff9f7SJohn Marino /* This function implements the STATIC scheduling method. The caller should
33*629ff9f7SJohn Marino iterate *pstart <= x < *pend. Return zero if there are more iterations
34*629ff9f7SJohn Marino to perform; nonzero if not. Return less than 0 if this thread had
35*629ff9f7SJohn Marino received the absolutely last iteration. */
36*629ff9f7SJohn Marino
37*629ff9f7SJohn Marino int
gomp_iter_static_next(long * pstart,long * pend)38*629ff9f7SJohn Marino gomp_iter_static_next (long *pstart, long *pend)
39*629ff9f7SJohn Marino {
40*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
41*629ff9f7SJohn Marino struct gomp_team *team = thr->ts.team;
42*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
43*629ff9f7SJohn Marino unsigned long nthreads = team ? team->nthreads : 1;
44*629ff9f7SJohn Marino
45*629ff9f7SJohn Marino if (thr->ts.static_trip == -1)
46*629ff9f7SJohn Marino return -1;
47*629ff9f7SJohn Marino
48*629ff9f7SJohn Marino /* Quick test for degenerate teams and orphaned constructs. */
49*629ff9f7SJohn Marino if (nthreads == 1)
50*629ff9f7SJohn Marino {
51*629ff9f7SJohn Marino *pstart = ws->next;
52*629ff9f7SJohn Marino *pend = ws->end;
53*629ff9f7SJohn Marino thr->ts.static_trip = -1;
54*629ff9f7SJohn Marino return ws->next == ws->end;
55*629ff9f7SJohn Marino }
56*629ff9f7SJohn Marino
57*629ff9f7SJohn Marino /* We interpret chunk_size zero as "unspecified", which means that we
58*629ff9f7SJohn Marino should break up the iterations such that each thread makes only one
59*629ff9f7SJohn Marino trip through the outer loop. */
60*629ff9f7SJohn Marino if (ws->chunk_size == 0)
61*629ff9f7SJohn Marino {
62*629ff9f7SJohn Marino unsigned long n, q, i, t;
63*629ff9f7SJohn Marino unsigned long s0, e0;
64*629ff9f7SJohn Marino long s, e;
65*629ff9f7SJohn Marino
66*629ff9f7SJohn Marino if (thr->ts.static_trip > 0)
67*629ff9f7SJohn Marino return 1;
68*629ff9f7SJohn Marino
69*629ff9f7SJohn Marino /* Compute the total number of iterations. */
70*629ff9f7SJohn Marino s = ws->incr + (ws->incr > 0 ? -1 : 1);
71*629ff9f7SJohn Marino n = (ws->end - ws->next + s) / ws->incr;
72*629ff9f7SJohn Marino i = thr->ts.team_id;
73*629ff9f7SJohn Marino
74*629ff9f7SJohn Marino /* Compute the "zero-based" start and end points. That is, as
75*629ff9f7SJohn Marino if the loop began at zero and incremented by one. */
76*629ff9f7SJohn Marino q = n / nthreads;
77*629ff9f7SJohn Marino t = n % nthreads;
78*629ff9f7SJohn Marino if (i < t)
79*629ff9f7SJohn Marino {
80*629ff9f7SJohn Marino t = 0;
81*629ff9f7SJohn Marino q++;
82*629ff9f7SJohn Marino }
83*629ff9f7SJohn Marino s0 = q * i + t;
84*629ff9f7SJohn Marino e0 = s0 + q;
85*629ff9f7SJohn Marino
86*629ff9f7SJohn Marino /* Notice when no iterations allocated for this thread. */
87*629ff9f7SJohn Marino if (s0 >= e0)
88*629ff9f7SJohn Marino {
89*629ff9f7SJohn Marino thr->ts.static_trip = 1;
90*629ff9f7SJohn Marino return 1;
91*629ff9f7SJohn Marino }
92*629ff9f7SJohn Marino
93*629ff9f7SJohn Marino /* Transform these to the actual start and end numbers. */
94*629ff9f7SJohn Marino s = (long)s0 * ws->incr + ws->next;
95*629ff9f7SJohn Marino e = (long)e0 * ws->incr + ws->next;
96*629ff9f7SJohn Marino
97*629ff9f7SJohn Marino *pstart = s;
98*629ff9f7SJohn Marino *pend = e;
99*629ff9f7SJohn Marino thr->ts.static_trip = (e0 == n ? -1 : 1);
100*629ff9f7SJohn Marino return 0;
101*629ff9f7SJohn Marino }
102*629ff9f7SJohn Marino else
103*629ff9f7SJohn Marino {
104*629ff9f7SJohn Marino unsigned long n, s0, e0, i, c;
105*629ff9f7SJohn Marino long s, e;
106*629ff9f7SJohn Marino
107*629ff9f7SJohn Marino /* Otherwise, each thread gets exactly chunk_size iterations
108*629ff9f7SJohn Marino (if available) each time through the loop. */
109*629ff9f7SJohn Marino
110*629ff9f7SJohn Marino s = ws->incr + (ws->incr > 0 ? -1 : 1);
111*629ff9f7SJohn Marino n = (ws->end - ws->next + s) / ws->incr;
112*629ff9f7SJohn Marino i = thr->ts.team_id;
113*629ff9f7SJohn Marino c = ws->chunk_size;
114*629ff9f7SJohn Marino
115*629ff9f7SJohn Marino /* Initial guess is a C sized chunk positioned nthreads iterations
116*629ff9f7SJohn Marino in, offset by our thread number. */
117*629ff9f7SJohn Marino s0 = (thr->ts.static_trip * nthreads + i) * c;
118*629ff9f7SJohn Marino e0 = s0 + c;
119*629ff9f7SJohn Marino
120*629ff9f7SJohn Marino /* Detect overflow. */
121*629ff9f7SJohn Marino if (s0 >= n)
122*629ff9f7SJohn Marino return 1;
123*629ff9f7SJohn Marino if (e0 > n)
124*629ff9f7SJohn Marino e0 = n;
125*629ff9f7SJohn Marino
126*629ff9f7SJohn Marino /* Transform these to the actual start and end numbers. */
127*629ff9f7SJohn Marino s = (long)s0 * ws->incr + ws->next;
128*629ff9f7SJohn Marino e = (long)e0 * ws->incr + ws->next;
129*629ff9f7SJohn Marino
130*629ff9f7SJohn Marino *pstart = s;
131*629ff9f7SJohn Marino *pend = e;
132*629ff9f7SJohn Marino
133*629ff9f7SJohn Marino if (e0 == n)
134*629ff9f7SJohn Marino thr->ts.static_trip = -1;
135*629ff9f7SJohn Marino else
136*629ff9f7SJohn Marino thr->ts.static_trip++;
137*629ff9f7SJohn Marino return 0;
138*629ff9f7SJohn Marino }
139*629ff9f7SJohn Marino }
140*629ff9f7SJohn Marino
141*629ff9f7SJohn Marino
142*629ff9f7SJohn Marino /* This function implements the DYNAMIC scheduling method. Arguments are
143*629ff9f7SJohn Marino as for gomp_iter_static_next. This function must be called with ws->lock
144*629ff9f7SJohn Marino held. */
145*629ff9f7SJohn Marino
146*629ff9f7SJohn Marino bool
gomp_iter_dynamic_next_locked(long * pstart,long * pend)147*629ff9f7SJohn Marino gomp_iter_dynamic_next_locked (long *pstart, long *pend)
148*629ff9f7SJohn Marino {
149*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
150*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
151*629ff9f7SJohn Marino long start, end, chunk, left;
152*629ff9f7SJohn Marino
153*629ff9f7SJohn Marino start = ws->next;
154*629ff9f7SJohn Marino if (start == ws->end)
155*629ff9f7SJohn Marino return false;
156*629ff9f7SJohn Marino
157*629ff9f7SJohn Marino chunk = ws->chunk_size;
158*629ff9f7SJohn Marino left = ws->end - start;
159*629ff9f7SJohn Marino if (ws->incr < 0)
160*629ff9f7SJohn Marino {
161*629ff9f7SJohn Marino if (chunk < left)
162*629ff9f7SJohn Marino chunk = left;
163*629ff9f7SJohn Marino }
164*629ff9f7SJohn Marino else
165*629ff9f7SJohn Marino {
166*629ff9f7SJohn Marino if (chunk > left)
167*629ff9f7SJohn Marino chunk = left;
168*629ff9f7SJohn Marino }
169*629ff9f7SJohn Marino end = start + chunk;
170*629ff9f7SJohn Marino
171*629ff9f7SJohn Marino ws->next = end;
172*629ff9f7SJohn Marino *pstart = start;
173*629ff9f7SJohn Marino *pend = end;
174*629ff9f7SJohn Marino return true;
175*629ff9f7SJohn Marino }
176*629ff9f7SJohn Marino
177*629ff9f7SJohn Marino
178*629ff9f7SJohn Marino #ifdef HAVE_SYNC_BUILTINS
179*629ff9f7SJohn Marino /* Similar, but doesn't require the lock held, and uses compare-and-swap
180*629ff9f7SJohn Marino instead. Note that the only memory value that changes is ws->next. */
181*629ff9f7SJohn Marino
182*629ff9f7SJohn Marino bool
gomp_iter_dynamic_next(long * pstart,long * pend)183*629ff9f7SJohn Marino gomp_iter_dynamic_next (long *pstart, long *pend)
184*629ff9f7SJohn Marino {
185*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
186*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
187*629ff9f7SJohn Marino long start, end, nend, chunk, incr;
188*629ff9f7SJohn Marino
189*629ff9f7SJohn Marino end = ws->end;
190*629ff9f7SJohn Marino incr = ws->incr;
191*629ff9f7SJohn Marino chunk = ws->chunk_size;
192*629ff9f7SJohn Marino
193*629ff9f7SJohn Marino if (__builtin_expect (ws->mode, 1))
194*629ff9f7SJohn Marino {
195*629ff9f7SJohn Marino long tmp = __sync_fetch_and_add (&ws->next, chunk);
196*629ff9f7SJohn Marino if (incr > 0)
197*629ff9f7SJohn Marino {
198*629ff9f7SJohn Marino if (tmp >= end)
199*629ff9f7SJohn Marino return false;
200*629ff9f7SJohn Marino nend = tmp + chunk;
201*629ff9f7SJohn Marino if (nend > end)
202*629ff9f7SJohn Marino nend = end;
203*629ff9f7SJohn Marino *pstart = tmp;
204*629ff9f7SJohn Marino *pend = nend;
205*629ff9f7SJohn Marino return true;
206*629ff9f7SJohn Marino }
207*629ff9f7SJohn Marino else
208*629ff9f7SJohn Marino {
209*629ff9f7SJohn Marino if (tmp <= end)
210*629ff9f7SJohn Marino return false;
211*629ff9f7SJohn Marino nend = tmp + chunk;
212*629ff9f7SJohn Marino if (nend < end)
213*629ff9f7SJohn Marino nend = end;
214*629ff9f7SJohn Marino *pstart = tmp;
215*629ff9f7SJohn Marino *pend = nend;
216*629ff9f7SJohn Marino return true;
217*629ff9f7SJohn Marino }
218*629ff9f7SJohn Marino }
219*629ff9f7SJohn Marino
220*629ff9f7SJohn Marino start = ws->next;
221*629ff9f7SJohn Marino while (1)
222*629ff9f7SJohn Marino {
223*629ff9f7SJohn Marino long left = end - start;
224*629ff9f7SJohn Marino long tmp;
225*629ff9f7SJohn Marino
226*629ff9f7SJohn Marino if (start == end)
227*629ff9f7SJohn Marino return false;
228*629ff9f7SJohn Marino
229*629ff9f7SJohn Marino if (incr < 0)
230*629ff9f7SJohn Marino {
231*629ff9f7SJohn Marino if (chunk < left)
232*629ff9f7SJohn Marino chunk = left;
233*629ff9f7SJohn Marino }
234*629ff9f7SJohn Marino else
235*629ff9f7SJohn Marino {
236*629ff9f7SJohn Marino if (chunk > left)
237*629ff9f7SJohn Marino chunk = left;
238*629ff9f7SJohn Marino }
239*629ff9f7SJohn Marino nend = start + chunk;
240*629ff9f7SJohn Marino
241*629ff9f7SJohn Marino tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
242*629ff9f7SJohn Marino if (__builtin_expect (tmp == start, 1))
243*629ff9f7SJohn Marino break;
244*629ff9f7SJohn Marino
245*629ff9f7SJohn Marino start = tmp;
246*629ff9f7SJohn Marino }
247*629ff9f7SJohn Marino
248*629ff9f7SJohn Marino *pstart = start;
249*629ff9f7SJohn Marino *pend = nend;
250*629ff9f7SJohn Marino return true;
251*629ff9f7SJohn Marino }
252*629ff9f7SJohn Marino #endif /* HAVE_SYNC_BUILTINS */
253*629ff9f7SJohn Marino
254*629ff9f7SJohn Marino
255*629ff9f7SJohn Marino /* This function implements the GUIDED scheduling method. Arguments are
256*629ff9f7SJohn Marino as for gomp_iter_static_next. This function must be called with the
257*629ff9f7SJohn Marino work share lock held. */
258*629ff9f7SJohn Marino
259*629ff9f7SJohn Marino bool
gomp_iter_guided_next_locked(long * pstart,long * pend)260*629ff9f7SJohn Marino gomp_iter_guided_next_locked (long *pstart, long *pend)
261*629ff9f7SJohn Marino {
262*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
263*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
264*629ff9f7SJohn Marino struct gomp_team *team = thr->ts.team;
265*629ff9f7SJohn Marino unsigned long nthreads = team ? team->nthreads : 1;
266*629ff9f7SJohn Marino unsigned long n, q;
267*629ff9f7SJohn Marino long start, end;
268*629ff9f7SJohn Marino
269*629ff9f7SJohn Marino if (ws->next == ws->end)
270*629ff9f7SJohn Marino return false;
271*629ff9f7SJohn Marino
272*629ff9f7SJohn Marino start = ws->next;
273*629ff9f7SJohn Marino n = (ws->end - start) / ws->incr;
274*629ff9f7SJohn Marino q = (n + nthreads - 1) / nthreads;
275*629ff9f7SJohn Marino
276*629ff9f7SJohn Marino if (q < ws->chunk_size)
277*629ff9f7SJohn Marino q = ws->chunk_size;
278*629ff9f7SJohn Marino if (q <= n)
279*629ff9f7SJohn Marino end = start + q * ws->incr;
280*629ff9f7SJohn Marino else
281*629ff9f7SJohn Marino end = ws->end;
282*629ff9f7SJohn Marino
283*629ff9f7SJohn Marino ws->next = end;
284*629ff9f7SJohn Marino *pstart = start;
285*629ff9f7SJohn Marino *pend = end;
286*629ff9f7SJohn Marino return true;
287*629ff9f7SJohn Marino }
288*629ff9f7SJohn Marino
289*629ff9f7SJohn Marino #ifdef HAVE_SYNC_BUILTINS
290*629ff9f7SJohn Marino /* Similar, but doesn't require the lock held, and uses compare-and-swap
291*629ff9f7SJohn Marino instead. Note that the only memory value that changes is ws->next. */
292*629ff9f7SJohn Marino
293*629ff9f7SJohn Marino bool
gomp_iter_guided_next(long * pstart,long * pend)294*629ff9f7SJohn Marino gomp_iter_guided_next (long *pstart, long *pend)
295*629ff9f7SJohn Marino {
296*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
297*629ff9f7SJohn Marino struct gomp_work_share *ws = thr->ts.work_share;
298*629ff9f7SJohn Marino struct gomp_team *team = thr->ts.team;
299*629ff9f7SJohn Marino unsigned long nthreads = team ? team->nthreads : 1;
300*629ff9f7SJohn Marino long start, end, nend, incr;
301*629ff9f7SJohn Marino unsigned long chunk_size;
302*629ff9f7SJohn Marino
303*629ff9f7SJohn Marino start = ws->next;
304*629ff9f7SJohn Marino end = ws->end;
305*629ff9f7SJohn Marino incr = ws->incr;
306*629ff9f7SJohn Marino chunk_size = ws->chunk_size;
307*629ff9f7SJohn Marino
308*629ff9f7SJohn Marino while (1)
309*629ff9f7SJohn Marino {
310*629ff9f7SJohn Marino unsigned long n, q;
311*629ff9f7SJohn Marino long tmp;
312*629ff9f7SJohn Marino
313*629ff9f7SJohn Marino if (start == end)
314*629ff9f7SJohn Marino return false;
315*629ff9f7SJohn Marino
316*629ff9f7SJohn Marino n = (end - start) / incr;
317*629ff9f7SJohn Marino q = (n + nthreads - 1) / nthreads;
318*629ff9f7SJohn Marino
319*629ff9f7SJohn Marino if (q < chunk_size)
320*629ff9f7SJohn Marino q = chunk_size;
321*629ff9f7SJohn Marino if (__builtin_expect (q <= n, 1))
322*629ff9f7SJohn Marino nend = start + q * incr;
323*629ff9f7SJohn Marino else
324*629ff9f7SJohn Marino nend = end;
325*629ff9f7SJohn Marino
326*629ff9f7SJohn Marino tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
327*629ff9f7SJohn Marino if (__builtin_expect (tmp == start, 1))
328*629ff9f7SJohn Marino break;
329*629ff9f7SJohn Marino
330*629ff9f7SJohn Marino start = tmp;
331*629ff9f7SJohn Marino }
332*629ff9f7SJohn Marino
333*629ff9f7SJohn Marino *pstart = start;
334*629ff9f7SJohn Marino *pend = nend;
335*629ff9f7SJohn Marino return true;
336*629ff9f7SJohn Marino }
337*629ff9f7SJohn Marino #endif /* HAVE_SYNC_BUILTINS */
338