xref: /dflybsd-src/contrib/gcc-4.7/libgomp/iter.c (revision 94b98a915ba84699b2a2adab9146fbc2cf617459)
1*629ff9f7SJohn Marino /* Copyright (C) 2005, 2008, 2009, 2011 Free Software Foundation, Inc.
2*629ff9f7SJohn Marino    Contributed by Richard Henderson <rth@redhat.com>.
3*629ff9f7SJohn Marino 
4*629ff9f7SJohn Marino    This file is part of the GNU OpenMP Library (libgomp).
5*629ff9f7SJohn Marino 
6*629ff9f7SJohn Marino    Libgomp is free software; you can redistribute it and/or modify it
7*629ff9f7SJohn Marino    under the terms of the GNU General Public License as published by
8*629ff9f7SJohn Marino    the Free Software Foundation; either version 3, or (at your option)
9*629ff9f7SJohn Marino    any later version.
10*629ff9f7SJohn Marino 
11*629ff9f7SJohn Marino    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12*629ff9f7SJohn Marino    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13*629ff9f7SJohn Marino    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14*629ff9f7SJohn Marino    more details.
15*629ff9f7SJohn Marino 
16*629ff9f7SJohn Marino    Under Section 7 of GPL version 3, you are granted additional
17*629ff9f7SJohn Marino    permissions described in the GCC Runtime Library Exception, version
18*629ff9f7SJohn Marino    3.1, as published by the Free Software Foundation.
19*629ff9f7SJohn Marino 
20*629ff9f7SJohn Marino    You should have received a copy of the GNU General Public License and
21*629ff9f7SJohn Marino    a copy of the GCC Runtime Library Exception along with this program;
22*629ff9f7SJohn Marino    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*629ff9f7SJohn Marino    <http://www.gnu.org/licenses/>.  */
24*629ff9f7SJohn Marino 
25*629ff9f7SJohn Marino /* This file contains routines for managing work-share iteration, both
26*629ff9f7SJohn Marino    for loops and sections.  */
27*629ff9f7SJohn Marino 
28*629ff9f7SJohn Marino #include "libgomp.h"
29*629ff9f7SJohn Marino #include <stdlib.h>
30*629ff9f7SJohn Marino 
31*629ff9f7SJohn Marino 
32*629ff9f7SJohn Marino /* This function implements the STATIC scheduling method.  The caller should
33*629ff9f7SJohn Marino    iterate *pstart <= x < *pend.  Return zero if there are more iterations
34*629ff9f7SJohn Marino    to perform; nonzero if not.  Return less than 0 if this thread had
35*629ff9f7SJohn Marino    received the absolutely last iteration.  */
36*629ff9f7SJohn Marino 
37*629ff9f7SJohn Marino int
gomp_iter_static_next(long * pstart,long * pend)38*629ff9f7SJohn Marino gomp_iter_static_next (long *pstart, long *pend)
39*629ff9f7SJohn Marino {
40*629ff9f7SJohn Marino   struct gomp_thread *thr = gomp_thread ();
41*629ff9f7SJohn Marino   struct gomp_team *team = thr->ts.team;
42*629ff9f7SJohn Marino   struct gomp_work_share *ws = thr->ts.work_share;
43*629ff9f7SJohn Marino   unsigned long nthreads = team ? team->nthreads : 1;
44*629ff9f7SJohn Marino 
45*629ff9f7SJohn Marino   if (thr->ts.static_trip == -1)
46*629ff9f7SJohn Marino     return -1;
47*629ff9f7SJohn Marino 
48*629ff9f7SJohn Marino   /* Quick test for degenerate teams and orphaned constructs.  */
49*629ff9f7SJohn Marino   if (nthreads == 1)
50*629ff9f7SJohn Marino     {
51*629ff9f7SJohn Marino       *pstart = ws->next;
52*629ff9f7SJohn Marino       *pend = ws->end;
53*629ff9f7SJohn Marino       thr->ts.static_trip = -1;
54*629ff9f7SJohn Marino       return ws->next == ws->end;
55*629ff9f7SJohn Marino     }
56*629ff9f7SJohn Marino 
57*629ff9f7SJohn Marino   /* We interpret chunk_size zero as "unspecified", which means that we
58*629ff9f7SJohn Marino      should break up the iterations such that each thread makes only one
59*629ff9f7SJohn Marino      trip through the outer loop.  */
60*629ff9f7SJohn Marino   if (ws->chunk_size == 0)
61*629ff9f7SJohn Marino     {
62*629ff9f7SJohn Marino       unsigned long n, q, i, t;
63*629ff9f7SJohn Marino       unsigned long s0, e0;
64*629ff9f7SJohn Marino       long s, e;
65*629ff9f7SJohn Marino 
66*629ff9f7SJohn Marino       if (thr->ts.static_trip > 0)
67*629ff9f7SJohn Marino 	return 1;
68*629ff9f7SJohn Marino 
69*629ff9f7SJohn Marino       /* Compute the total number of iterations.  */
70*629ff9f7SJohn Marino       s = ws->incr + (ws->incr > 0 ? -1 : 1);
71*629ff9f7SJohn Marino       n = (ws->end - ws->next + s) / ws->incr;
72*629ff9f7SJohn Marino       i = thr->ts.team_id;
73*629ff9f7SJohn Marino 
74*629ff9f7SJohn Marino       /* Compute the "zero-based" start and end points.  That is, as
75*629ff9f7SJohn Marino          if the loop began at zero and incremented by one.  */
76*629ff9f7SJohn Marino       q = n / nthreads;
77*629ff9f7SJohn Marino       t = n % nthreads;
78*629ff9f7SJohn Marino       if (i < t)
79*629ff9f7SJohn Marino 	{
80*629ff9f7SJohn Marino 	  t = 0;
81*629ff9f7SJohn Marino 	  q++;
82*629ff9f7SJohn Marino 	}
83*629ff9f7SJohn Marino       s0 = q * i + t;
84*629ff9f7SJohn Marino       e0 = s0 + q;
85*629ff9f7SJohn Marino 
86*629ff9f7SJohn Marino       /* Notice when no iterations allocated for this thread.  */
87*629ff9f7SJohn Marino       if (s0 >= e0)
88*629ff9f7SJohn Marino 	{
89*629ff9f7SJohn Marino 	  thr->ts.static_trip = 1;
90*629ff9f7SJohn Marino 	  return 1;
91*629ff9f7SJohn Marino 	}
92*629ff9f7SJohn Marino 
93*629ff9f7SJohn Marino       /* Transform these to the actual start and end numbers.  */
94*629ff9f7SJohn Marino       s = (long)s0 * ws->incr + ws->next;
95*629ff9f7SJohn Marino       e = (long)e0 * ws->incr + ws->next;
96*629ff9f7SJohn Marino 
97*629ff9f7SJohn Marino       *pstart = s;
98*629ff9f7SJohn Marino       *pend = e;
99*629ff9f7SJohn Marino       thr->ts.static_trip = (e0 == n ? -1 : 1);
100*629ff9f7SJohn Marino       return 0;
101*629ff9f7SJohn Marino     }
102*629ff9f7SJohn Marino   else
103*629ff9f7SJohn Marino     {
104*629ff9f7SJohn Marino       unsigned long n, s0, e0, i, c;
105*629ff9f7SJohn Marino       long s, e;
106*629ff9f7SJohn Marino 
107*629ff9f7SJohn Marino       /* Otherwise, each thread gets exactly chunk_size iterations
108*629ff9f7SJohn Marino 	 (if available) each time through the loop.  */
109*629ff9f7SJohn Marino 
110*629ff9f7SJohn Marino       s = ws->incr + (ws->incr > 0 ? -1 : 1);
111*629ff9f7SJohn Marino       n = (ws->end - ws->next + s) / ws->incr;
112*629ff9f7SJohn Marino       i = thr->ts.team_id;
113*629ff9f7SJohn Marino       c = ws->chunk_size;
114*629ff9f7SJohn Marino 
115*629ff9f7SJohn Marino       /* Initial guess is a C sized chunk positioned nthreads iterations
116*629ff9f7SJohn Marino 	 in, offset by our thread number.  */
117*629ff9f7SJohn Marino       s0 = (thr->ts.static_trip * nthreads + i) * c;
118*629ff9f7SJohn Marino       e0 = s0 + c;
119*629ff9f7SJohn Marino 
120*629ff9f7SJohn Marino       /* Detect overflow.  */
121*629ff9f7SJohn Marino       if (s0 >= n)
122*629ff9f7SJohn Marino 	return 1;
123*629ff9f7SJohn Marino       if (e0 > n)
124*629ff9f7SJohn Marino 	e0 = n;
125*629ff9f7SJohn Marino 
126*629ff9f7SJohn Marino       /* Transform these to the actual start and end numbers.  */
127*629ff9f7SJohn Marino       s = (long)s0 * ws->incr + ws->next;
128*629ff9f7SJohn Marino       e = (long)e0 * ws->incr + ws->next;
129*629ff9f7SJohn Marino 
130*629ff9f7SJohn Marino       *pstart = s;
131*629ff9f7SJohn Marino       *pend = e;
132*629ff9f7SJohn Marino 
133*629ff9f7SJohn Marino       if (e0 == n)
134*629ff9f7SJohn Marino 	thr->ts.static_trip = -1;
135*629ff9f7SJohn Marino       else
136*629ff9f7SJohn Marino 	thr->ts.static_trip++;
137*629ff9f7SJohn Marino       return 0;
138*629ff9f7SJohn Marino     }
139*629ff9f7SJohn Marino }
140*629ff9f7SJohn Marino 
141*629ff9f7SJohn Marino 
142*629ff9f7SJohn Marino /* This function implements the DYNAMIC scheduling method.  Arguments are
143*629ff9f7SJohn Marino    as for gomp_iter_static_next.  This function must be called with ws->lock
144*629ff9f7SJohn Marino    held.  */
145*629ff9f7SJohn Marino 
146*629ff9f7SJohn Marino bool
gomp_iter_dynamic_next_locked(long * pstart,long * pend)147*629ff9f7SJohn Marino gomp_iter_dynamic_next_locked (long *pstart, long *pend)
148*629ff9f7SJohn Marino {
149*629ff9f7SJohn Marino   struct gomp_thread *thr = gomp_thread ();
150*629ff9f7SJohn Marino   struct gomp_work_share *ws = thr->ts.work_share;
151*629ff9f7SJohn Marino   long start, end, chunk, left;
152*629ff9f7SJohn Marino 
153*629ff9f7SJohn Marino   start = ws->next;
154*629ff9f7SJohn Marino   if (start == ws->end)
155*629ff9f7SJohn Marino     return false;
156*629ff9f7SJohn Marino 
157*629ff9f7SJohn Marino   chunk = ws->chunk_size;
158*629ff9f7SJohn Marino   left = ws->end - start;
159*629ff9f7SJohn Marino   if (ws->incr < 0)
160*629ff9f7SJohn Marino     {
161*629ff9f7SJohn Marino       if (chunk < left)
162*629ff9f7SJohn Marino 	chunk = left;
163*629ff9f7SJohn Marino     }
164*629ff9f7SJohn Marino   else
165*629ff9f7SJohn Marino     {
166*629ff9f7SJohn Marino       if (chunk > left)
167*629ff9f7SJohn Marino 	chunk = left;
168*629ff9f7SJohn Marino     }
169*629ff9f7SJohn Marino   end = start + chunk;
170*629ff9f7SJohn Marino 
171*629ff9f7SJohn Marino   ws->next = end;
172*629ff9f7SJohn Marino   *pstart = start;
173*629ff9f7SJohn Marino   *pend = end;
174*629ff9f7SJohn Marino   return true;
175*629ff9f7SJohn Marino }
176*629ff9f7SJohn Marino 
177*629ff9f7SJohn Marino 
178*629ff9f7SJohn Marino #ifdef HAVE_SYNC_BUILTINS
179*629ff9f7SJohn Marino /* Similar, but doesn't require the lock held, and uses compare-and-swap
180*629ff9f7SJohn Marino    instead.  Note that the only memory value that changes is ws->next.  */
181*629ff9f7SJohn Marino 
182*629ff9f7SJohn Marino bool
gomp_iter_dynamic_next(long * pstart,long * pend)183*629ff9f7SJohn Marino gomp_iter_dynamic_next (long *pstart, long *pend)
184*629ff9f7SJohn Marino {
185*629ff9f7SJohn Marino   struct gomp_thread *thr = gomp_thread ();
186*629ff9f7SJohn Marino   struct gomp_work_share *ws = thr->ts.work_share;
187*629ff9f7SJohn Marino   long start, end, nend, chunk, incr;
188*629ff9f7SJohn Marino 
189*629ff9f7SJohn Marino   end = ws->end;
190*629ff9f7SJohn Marino   incr = ws->incr;
191*629ff9f7SJohn Marino   chunk = ws->chunk_size;
192*629ff9f7SJohn Marino 
193*629ff9f7SJohn Marino   if (__builtin_expect (ws->mode, 1))
194*629ff9f7SJohn Marino     {
195*629ff9f7SJohn Marino       long tmp = __sync_fetch_and_add (&ws->next, chunk);
196*629ff9f7SJohn Marino       if (incr > 0)
197*629ff9f7SJohn Marino 	{
198*629ff9f7SJohn Marino 	  if (tmp >= end)
199*629ff9f7SJohn Marino 	    return false;
200*629ff9f7SJohn Marino 	  nend = tmp + chunk;
201*629ff9f7SJohn Marino 	  if (nend > end)
202*629ff9f7SJohn Marino 	    nend = end;
203*629ff9f7SJohn Marino 	  *pstart = tmp;
204*629ff9f7SJohn Marino 	  *pend = nend;
205*629ff9f7SJohn Marino 	  return true;
206*629ff9f7SJohn Marino 	}
207*629ff9f7SJohn Marino       else
208*629ff9f7SJohn Marino 	{
209*629ff9f7SJohn Marino 	  if (tmp <= end)
210*629ff9f7SJohn Marino 	    return false;
211*629ff9f7SJohn Marino 	  nend = tmp + chunk;
212*629ff9f7SJohn Marino 	  if (nend < end)
213*629ff9f7SJohn Marino 	    nend = end;
214*629ff9f7SJohn Marino 	  *pstart = tmp;
215*629ff9f7SJohn Marino 	  *pend = nend;
216*629ff9f7SJohn Marino 	  return true;
217*629ff9f7SJohn Marino 	}
218*629ff9f7SJohn Marino     }
219*629ff9f7SJohn Marino 
220*629ff9f7SJohn Marino   start = ws->next;
221*629ff9f7SJohn Marino   while (1)
222*629ff9f7SJohn Marino     {
223*629ff9f7SJohn Marino       long left = end - start;
224*629ff9f7SJohn Marino       long tmp;
225*629ff9f7SJohn Marino 
226*629ff9f7SJohn Marino       if (start == end)
227*629ff9f7SJohn Marino 	return false;
228*629ff9f7SJohn Marino 
229*629ff9f7SJohn Marino       if (incr < 0)
230*629ff9f7SJohn Marino 	{
231*629ff9f7SJohn Marino 	  if (chunk < left)
232*629ff9f7SJohn Marino 	    chunk = left;
233*629ff9f7SJohn Marino 	}
234*629ff9f7SJohn Marino       else
235*629ff9f7SJohn Marino 	{
236*629ff9f7SJohn Marino 	  if (chunk > left)
237*629ff9f7SJohn Marino 	    chunk = left;
238*629ff9f7SJohn Marino 	}
239*629ff9f7SJohn Marino       nend = start + chunk;
240*629ff9f7SJohn Marino 
241*629ff9f7SJohn Marino       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
242*629ff9f7SJohn Marino       if (__builtin_expect (tmp == start, 1))
243*629ff9f7SJohn Marino 	break;
244*629ff9f7SJohn Marino 
245*629ff9f7SJohn Marino       start = tmp;
246*629ff9f7SJohn Marino     }
247*629ff9f7SJohn Marino 
248*629ff9f7SJohn Marino   *pstart = start;
249*629ff9f7SJohn Marino   *pend = nend;
250*629ff9f7SJohn Marino   return true;
251*629ff9f7SJohn Marino }
252*629ff9f7SJohn Marino #endif /* HAVE_SYNC_BUILTINS */
253*629ff9f7SJohn Marino 
254*629ff9f7SJohn Marino 
255*629ff9f7SJohn Marino /* This function implements the GUIDED scheduling method.  Arguments are
256*629ff9f7SJohn Marino    as for gomp_iter_static_next.  This function must be called with the
257*629ff9f7SJohn Marino    work share lock held.  */
258*629ff9f7SJohn Marino 
259*629ff9f7SJohn Marino bool
gomp_iter_guided_next_locked(long * pstart,long * pend)260*629ff9f7SJohn Marino gomp_iter_guided_next_locked (long *pstart, long *pend)
261*629ff9f7SJohn Marino {
262*629ff9f7SJohn Marino   struct gomp_thread *thr = gomp_thread ();
263*629ff9f7SJohn Marino   struct gomp_work_share *ws = thr->ts.work_share;
264*629ff9f7SJohn Marino   struct gomp_team *team = thr->ts.team;
265*629ff9f7SJohn Marino   unsigned long nthreads = team ? team->nthreads : 1;
266*629ff9f7SJohn Marino   unsigned long n, q;
267*629ff9f7SJohn Marino   long start, end;
268*629ff9f7SJohn Marino 
269*629ff9f7SJohn Marino   if (ws->next == ws->end)
270*629ff9f7SJohn Marino     return false;
271*629ff9f7SJohn Marino 
272*629ff9f7SJohn Marino   start = ws->next;
273*629ff9f7SJohn Marino   n = (ws->end - start) / ws->incr;
274*629ff9f7SJohn Marino   q = (n + nthreads - 1) / nthreads;
275*629ff9f7SJohn Marino 
276*629ff9f7SJohn Marino   if (q < ws->chunk_size)
277*629ff9f7SJohn Marino     q = ws->chunk_size;
278*629ff9f7SJohn Marino   if (q <= n)
279*629ff9f7SJohn Marino     end = start + q * ws->incr;
280*629ff9f7SJohn Marino   else
281*629ff9f7SJohn Marino     end = ws->end;
282*629ff9f7SJohn Marino 
283*629ff9f7SJohn Marino   ws->next = end;
284*629ff9f7SJohn Marino   *pstart = start;
285*629ff9f7SJohn Marino   *pend = end;
286*629ff9f7SJohn Marino   return true;
287*629ff9f7SJohn Marino }
288*629ff9f7SJohn Marino 
289*629ff9f7SJohn Marino #ifdef HAVE_SYNC_BUILTINS
290*629ff9f7SJohn Marino /* Similar, but doesn't require the lock held, and uses compare-and-swap
291*629ff9f7SJohn Marino    instead.  Note that the only memory value that changes is ws->next.  */
292*629ff9f7SJohn Marino 
293*629ff9f7SJohn Marino bool
gomp_iter_guided_next(long * pstart,long * pend)294*629ff9f7SJohn Marino gomp_iter_guided_next (long *pstart, long *pend)
295*629ff9f7SJohn Marino {
296*629ff9f7SJohn Marino   struct gomp_thread *thr = gomp_thread ();
297*629ff9f7SJohn Marino   struct gomp_work_share *ws = thr->ts.work_share;
298*629ff9f7SJohn Marino   struct gomp_team *team = thr->ts.team;
299*629ff9f7SJohn Marino   unsigned long nthreads = team ? team->nthreads : 1;
300*629ff9f7SJohn Marino   long start, end, nend, incr;
301*629ff9f7SJohn Marino   unsigned long chunk_size;
302*629ff9f7SJohn Marino 
303*629ff9f7SJohn Marino   start = ws->next;
304*629ff9f7SJohn Marino   end = ws->end;
305*629ff9f7SJohn Marino   incr = ws->incr;
306*629ff9f7SJohn Marino   chunk_size = ws->chunk_size;
307*629ff9f7SJohn Marino 
308*629ff9f7SJohn Marino   while (1)
309*629ff9f7SJohn Marino     {
310*629ff9f7SJohn Marino       unsigned long n, q;
311*629ff9f7SJohn Marino       long tmp;
312*629ff9f7SJohn Marino 
313*629ff9f7SJohn Marino       if (start == end)
314*629ff9f7SJohn Marino 	return false;
315*629ff9f7SJohn Marino 
316*629ff9f7SJohn Marino       n = (end - start) / incr;
317*629ff9f7SJohn Marino       q = (n + nthreads - 1) / nthreads;
318*629ff9f7SJohn Marino 
319*629ff9f7SJohn Marino       if (q < chunk_size)
320*629ff9f7SJohn Marino 	q = chunk_size;
321*629ff9f7SJohn Marino       if (__builtin_expect (q <= n, 1))
322*629ff9f7SJohn Marino 	nend = start + q * incr;
323*629ff9f7SJohn Marino       else
324*629ff9f7SJohn Marino 	nend = end;
325*629ff9f7SJohn Marino 
326*629ff9f7SJohn Marino       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
327*629ff9f7SJohn Marino       if (__builtin_expect (tmp == start, 1))
328*629ff9f7SJohn Marino 	break;
329*629ff9f7SJohn Marino 
330*629ff9f7SJohn Marino       start = tmp;
331*629ff9f7SJohn Marino     }
332*629ff9f7SJohn Marino 
333*629ff9f7SJohn Marino   *pstart = start;
334*629ff9f7SJohn Marino   *pend = nend;
335*629ff9f7SJohn Marino   return true;
336*629ff9f7SJohn Marino }
337*629ff9f7SJohn Marino #endif /* HAVE_SYNC_BUILTINS */
338