xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/iter.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU OpenMP Library (libgomp).
5 
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15 
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19 
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24 
25 /* This file contains routines for managing work-share iteration, both
26    for loops and sections.  */
27 
28 #include "libgomp.h"
29 #include <stdlib.h>
30 
31 
32 /* This function implements the STATIC scheduling method.  The caller should
33    iterate *pstart <= x < *pend.  Return zero if there are more iterations
34    to perform; nonzero if not.  Return less than 0 if this thread had
35    received the absolutely last iteration.  */
36 
37 int
38 gomp_iter_static_next (long *pstart, long *pend)
39 {
40   struct gomp_thread *thr = gomp_thread ();
41   struct gomp_team *team = thr->ts.team;
42   struct gomp_work_share *ws = thr->ts.work_share;
43   unsigned long nthreads = team ? team->nthreads : 1;
44 
45   if (thr->ts.static_trip == -1)
46     return -1;
47 
48   /* Quick test for degenerate teams and orphaned constructs.  */
49   if (nthreads == 1)
50     {
51       *pstart = ws->next;
52       *pend = ws->end;
53       thr->ts.static_trip = -1;
54       return ws->next == ws->end;
55     }
56 
57   /* We interpret chunk_size zero as "unspecified", which means that we
58      should break up the iterations such that each thread makes only one
59      trip through the outer loop.  */
60   if (ws->chunk_size == 0)
61     {
62       unsigned long n, q, i;
63       unsigned long s0, e0;
64       long s, e;
65 
66       if (thr->ts.static_trip > 0)
67 	return 1;
68 
69       /* Compute the total number of iterations.  */
70       s = ws->incr + (ws->incr > 0 ? -1 : 1);
71       n = (ws->end - ws->next + s) / ws->incr;
72       i = thr->ts.team_id;
73 
74       /* Compute the "zero-based" start and end points.  That is, as
75          if the loop began at zero and incremented by one.  */
76       q = n / nthreads;
77       q += (q * nthreads != n);
78       s0 = q * i;
79       e0 = s0 + q;
80       if (e0 > n)
81         e0 = n;
82 
83       /* Notice when no iterations allocated for this thread.  */
84       if (s0 >= e0)
85 	{
86 	  thr->ts.static_trip = 1;
87 	  return 1;
88 	}
89 
90       /* Transform these to the actual start and end numbers.  */
91       s = (long)s0 * ws->incr + ws->next;
92       e = (long)e0 * ws->incr + ws->next;
93 
94       *pstart = s;
95       *pend = e;
96       thr->ts.static_trip = (e0 == n ? -1 : 1);
97       return 0;
98     }
99   else
100     {
101       unsigned long n, s0, e0, i, c;
102       long s, e;
103 
104       /* Otherwise, each thread gets exactly chunk_size iterations
105 	 (if available) each time through the loop.  */
106 
107       s = ws->incr + (ws->incr > 0 ? -1 : 1);
108       n = (ws->end - ws->next + s) / ws->incr;
109       i = thr->ts.team_id;
110       c = ws->chunk_size;
111 
112       /* Initial guess is a C sized chunk positioned nthreads iterations
113 	 in, offset by our thread number.  */
114       s0 = (thr->ts.static_trip * nthreads + i) * c;
115       e0 = s0 + c;
116 
117       /* Detect overflow.  */
118       if (s0 >= n)
119 	return 1;
120       if (e0 > n)
121 	e0 = n;
122 
123       /* Transform these to the actual start and end numbers.  */
124       s = (long)s0 * ws->incr + ws->next;
125       e = (long)e0 * ws->incr + ws->next;
126 
127       *pstart = s;
128       *pend = e;
129 
130       if (e0 == n)
131 	thr->ts.static_trip = -1;
132       else
133 	thr->ts.static_trip++;
134       return 0;
135     }
136 }
137 
138 
139 /* This function implements the DYNAMIC scheduling method.  Arguments are
140    as for gomp_iter_static_next.  This function must be called with ws->lock
141    held.  */
142 
143 bool
144 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
145 {
146   struct gomp_thread *thr = gomp_thread ();
147   struct gomp_work_share *ws = thr->ts.work_share;
148   long start, end, chunk, left;
149 
150   start = ws->next;
151   if (start == ws->end)
152     return false;
153 
154   chunk = ws->chunk_size;
155   left = ws->end - start;
156   if (ws->incr < 0)
157     {
158       if (chunk < left)
159 	chunk = left;
160     }
161   else
162     {
163       if (chunk > left)
164 	chunk = left;
165     }
166   end = start + chunk;
167 
168   ws->next = end;
169   *pstart = start;
170   *pend = end;
171   return true;
172 }
173 
174 
175 #ifdef HAVE_SYNC_BUILTINS
176 /* Similar, but doesn't require the lock held, and uses compare-and-swap
177    instead.  Note that the only memory value that changes is ws->next.  */
178 
179 bool
180 gomp_iter_dynamic_next (long *pstart, long *pend)
181 {
182   struct gomp_thread *thr = gomp_thread ();
183   struct gomp_work_share *ws = thr->ts.work_share;
184   long start, end, nend, chunk, incr;
185 
186   end = ws->end;
187   incr = ws->incr;
188   chunk = ws->chunk_size;
189 
190   if (__builtin_expect (ws->mode, 1))
191     {
192       long tmp = __sync_fetch_and_add (&ws->next, chunk);
193       if (incr > 0)
194 	{
195 	  if (tmp >= end)
196 	    return false;
197 	  nend = tmp + chunk;
198 	  if (nend > end)
199 	    nend = end;
200 	  *pstart = tmp;
201 	  *pend = nend;
202 	  return true;
203 	}
204       else
205 	{
206 	  if (tmp <= end)
207 	    return false;
208 	  nend = tmp + chunk;
209 	  if (nend < end)
210 	    nend = end;
211 	  *pstart = tmp;
212 	  *pend = nend;
213 	  return true;
214 	}
215     }
216 
217   start = ws->next;
218   while (1)
219     {
220       long left = end - start;
221       long tmp;
222 
223       if (start == end)
224 	return false;
225 
226       if (incr < 0)
227 	{
228 	  if (chunk < left)
229 	    chunk = left;
230 	}
231       else
232 	{
233 	  if (chunk > left)
234 	    chunk = left;
235 	}
236       nend = start + chunk;
237 
238       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
239       if (__builtin_expect (tmp == start, 1))
240 	break;
241 
242       start = tmp;
243     }
244 
245   *pstart = start;
246   *pend = nend;
247   return true;
248 }
249 #endif /* HAVE_SYNC_BUILTINS */
250 
251 
252 /* This function implements the GUIDED scheduling method.  Arguments are
253    as for gomp_iter_static_next.  This function must be called with the
254    work share lock held.  */
255 
256 bool
257 gomp_iter_guided_next_locked (long *pstart, long *pend)
258 {
259   struct gomp_thread *thr = gomp_thread ();
260   struct gomp_work_share *ws = thr->ts.work_share;
261   struct gomp_team *team = thr->ts.team;
262   unsigned long nthreads = team ? team->nthreads : 1;
263   unsigned long n, q;
264   long start, end;
265 
266   if (ws->next == ws->end)
267     return false;
268 
269   start = ws->next;
270   n = (ws->end - start) / ws->incr;
271   q = (n + nthreads - 1) / nthreads;
272 
273   if (q < ws->chunk_size)
274     q = ws->chunk_size;
275   if (q <= n)
276     end = start + q * ws->incr;
277   else
278     end = ws->end;
279 
280   ws->next = end;
281   *pstart = start;
282   *pend = end;
283   return true;
284 }
285 
286 #ifdef HAVE_SYNC_BUILTINS
287 /* Similar, but doesn't require the lock held, and uses compare-and-swap
288    instead.  Note that the only memory value that changes is ws->next.  */
289 
290 bool
291 gomp_iter_guided_next (long *pstart, long *pend)
292 {
293   struct gomp_thread *thr = gomp_thread ();
294   struct gomp_work_share *ws = thr->ts.work_share;
295   struct gomp_team *team = thr->ts.team;
296   unsigned long nthreads = team ? team->nthreads : 1;
297   long start, end, nend, incr;
298   unsigned long chunk_size;
299 
300   start = ws->next;
301   end = ws->end;
302   incr = ws->incr;
303   chunk_size = ws->chunk_size;
304 
305   while (1)
306     {
307       unsigned long n, q;
308       long tmp;
309 
310       if (start == end)
311 	return false;
312 
313       n = (end - start) / incr;
314       q = (n + nthreads - 1) / nthreads;
315 
316       if (q < chunk_size)
317 	q = chunk_size;
318       if (__builtin_expect (q <= n, 1))
319 	nend = start + q * incr;
320       else
321 	nend = end;
322 
323       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
324       if (__builtin_expect (tmp == start, 1))
325 	break;
326 
327       start = tmp;
328     }
329 
330   *pstart = start;
331   *pend = nend;
332   return true;
333 }
334 #endif /* HAVE_SYNC_BUILTINS */
335