xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/iter_ull.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU OpenMP Library (libgomp).
5 
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15 
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19 
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24 
25 /* This file contains routines for managing work-share iteration, both
26    for loops and sections.  */
27 
28 #include "libgomp.h"
29 #include <stdlib.h>
30 
31 typedef unsigned long long gomp_ull;
32 
33 /* This function implements the STATIC scheduling method.  The caller should
34    iterate *pstart <= x < *pend.  Return zero if there are more iterations
35    to perform; nonzero if not.  Return less than 0 if this thread had
36    received the absolutely last iteration.  */
37 
38 int
39 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
40 {
41   struct gomp_thread *thr = gomp_thread ();
42   struct gomp_team *team = thr->ts.team;
43   struct gomp_work_share *ws = thr->ts.work_share;
44   unsigned long nthreads = team ? team->nthreads : 1;
45 
46   if (thr->ts.static_trip == -1)
47     return -1;
48 
49   /* Quick test for degenerate teams and orphaned constructs.  */
50   if (nthreads == 1)
51     {
52       *pstart = ws->next_ull;
53       *pend = ws->end_ull;
54       thr->ts.static_trip = -1;
55       return ws->next_ull == ws->end_ull;
56     }
57 
58   /* We interpret chunk_size zero as "unspecified", which means that we
59      should break up the iterations such that each thread makes only one
60      trip through the outer loop.  */
61   if (ws->chunk_size_ull == 0)
62     {
63       gomp_ull n, q, i, s0, e0, s, e;
64 
65       if (thr->ts.static_trip > 0)
66 	return 1;
67 
68       /* Compute the total number of iterations.  */
69       if (__builtin_expect (ws->mode, 0) == 0)
70 	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
71       else
72 	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
73       i = thr->ts.team_id;
74 
75       /* Compute the "zero-based" start and end points.  That is, as
76 	 if the loop began at zero and incremented by one.  */
77       q = n / nthreads;
78       q += (q * nthreads != n);
79       s0 = q * i;
80       e0 = s0 + q;
81       if (e0 > n)
82 	e0 = n;
83 
84       /* Notice when no iterations allocated for this thread.  */
85       if (s0 >= e0)
86 	{
87 	  thr->ts.static_trip = 1;
88 	  return 1;
89 	}
90 
91       /* Transform these to the actual start and end numbers.  */
92       s = s0 * ws->incr_ull + ws->next_ull;
93       e = e0 * ws->incr_ull + ws->next_ull;
94 
95       *pstart = s;
96       *pend = e;
97       thr->ts.static_trip = (e0 == n ? -1 : 1);
98       return 0;
99     }
100   else
101     {
102       gomp_ull n, s0, e0, i, c, s, e;
103 
104       /* Otherwise, each thread gets exactly chunk_size iterations
105 	 (if available) each time through the loop.  */
106 
107       if (__builtin_expect (ws->mode, 0) == 0)
108 	n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
109       else
110 	n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
111       i = thr->ts.team_id;
112       c = ws->chunk_size_ull;
113 
114       /* Initial guess is a C sized chunk positioned nthreads iterations
115 	 in, offset by our thread number.  */
116       s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
117       e0 = s0 + c;
118 
119       /* Detect overflow.  */
120       if (s0 >= n)
121 	return 1;
122       if (e0 > n)
123 	e0 = n;
124 
125       /* Transform these to the actual start and end numbers.  */
126       s = s0 * ws->incr_ull + ws->next_ull;
127       e = e0 * ws->incr_ull + ws->next_ull;
128 
129       *pstart = s;
130       *pend = e;
131 
132       if (e0 == n)
133 	thr->ts.static_trip = -1;
134       else
135 	thr->ts.static_trip++;
136       return 0;
137     }
138 }
139 
140 
141 /* This function implements the DYNAMIC scheduling method.  Arguments are
142    as for gomp_iter_ull_static_next.  This function must be called with
143    ws->lock held.  */
144 
145 bool
146 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
147 {
148   struct gomp_thread *thr = gomp_thread ();
149   struct gomp_work_share *ws = thr->ts.work_share;
150   gomp_ull start, end, chunk, left;
151 
152   start = ws->next_ull;
153   if (start == ws->end_ull)
154     return false;
155 
156   chunk = ws->chunk_size_ull;
157   left = ws->end_ull - start;
158   if (__builtin_expect (ws->mode & 2, 0))
159     {
160       if (chunk < left)
161 	chunk = left;
162     }
163   else
164     {
165       if (chunk > left)
166 	chunk = left;
167     }
168   end = start + chunk;
169 
170   ws->next_ull = end;
171   *pstart = start;
172   *pend = end;
173   return true;
174 }
175 
176 
177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
178 /* Similar, but doesn't require the lock held, and uses compare-and-swap
179    instead.  Note that the only memory value that changes is ws->next_ull.  */
180 
181 bool
182 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
183 {
184   struct gomp_thread *thr = gomp_thread ();
185   struct gomp_work_share *ws = thr->ts.work_share;
186   gomp_ull start, end, nend, chunk;
187 
188   end = ws->end_ull;
189   chunk = ws->chunk_size_ull;
190 
191   if (__builtin_expect (ws->mode & 1, 1))
192     {
193       gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
194       if (__builtin_expect (ws->mode & 2, 0) == 0)
195 	{
196 	  if (tmp >= end)
197 	    return false;
198 	  nend = tmp + chunk;
199 	  if (nend > end)
200 	    nend = end;
201 	  *pstart = tmp;
202 	  *pend = nend;
203 	  return true;
204 	}
205       else
206 	{
207 	  if (tmp <= end)
208 	    return false;
209 	  nend = tmp + chunk;
210 	  if (nend < end)
211 	    nend = end;
212 	  *pstart = tmp;
213 	  *pend = nend;
214 	  return true;
215 	}
216     }
217 
218   start = ws->next_ull;
219   while (1)
220     {
221       gomp_ull left = end - start;
222       gomp_ull tmp;
223 
224       if (start == end)
225 	return false;
226 
227       if (__builtin_expect (ws->mode & 2, 0))
228 	{
229 	  if (chunk < left)
230 	    chunk = left;
231 	}
232       else
233 	{
234 	  if (chunk > left)
235 	    chunk = left;
236 	}
237       nend = start + chunk;
238 
239       tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
240       if (__builtin_expect (tmp == start, 1))
241 	break;
242 
243       start = tmp;
244     }
245 
246   *pstart = start;
247   *pend = nend;
248   return true;
249 }
250 #endif /* HAVE_SYNC_BUILTINS */
251 
252 
253 /* This function implements the GUIDED scheduling method.  Arguments are
254    as for gomp_iter_ull_static_next.  This function must be called with the
255    work share lock held.  */
256 
257 bool
258 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
259 {
260   struct gomp_thread *thr = gomp_thread ();
261   struct gomp_work_share *ws = thr->ts.work_share;
262   struct gomp_team *team = thr->ts.team;
263   gomp_ull nthreads = team ? team->nthreads : 1;
264   gomp_ull n, q;
265   gomp_ull start, end;
266 
267   if (ws->next_ull == ws->end_ull)
268     return false;
269 
270   start = ws->next_ull;
271   if (__builtin_expect (ws->mode, 0) == 0)
272     n = (ws->end_ull - start) / ws->incr_ull;
273   else
274     n = (start - ws->end_ull) / -ws->incr_ull;
275   q = (n + nthreads - 1) / nthreads;
276 
277   if (q < ws->chunk_size_ull)
278     q = ws->chunk_size_ull;
279   if (q <= n)
280     end = start + q * ws->incr_ull;
281   else
282     end = ws->end_ull;
283 
284   ws->next_ull = end;
285   *pstart = start;
286   *pend = end;
287   return true;
288 }
289 
290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
291 /* Similar, but doesn't require the lock held, and uses compare-and-swap
292    instead.  Note that the only memory value that changes is ws->next_ull.  */
293 
294 bool
295 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
296 {
297   struct gomp_thread *thr = gomp_thread ();
298   struct gomp_work_share *ws = thr->ts.work_share;
299   struct gomp_team *team = thr->ts.team;
300   gomp_ull nthreads = team ? team->nthreads : 1;
301   gomp_ull start, end, nend, incr;
302   gomp_ull chunk_size;
303 
304   start = ws->next_ull;
305   end = ws->end_ull;
306   incr = ws->incr_ull;
307   chunk_size = ws->chunk_size_ull;
308 
309   while (1)
310     {
311       gomp_ull n, q;
312       gomp_ull tmp;
313 
314       if (start == end)
315 	return false;
316 
317       if (__builtin_expect (ws->mode, 0) == 0)
318 	n = (end - start) / incr;
319       else
320 	n = (start - end) / -incr;
321       q = (n + nthreads - 1) / nthreads;
322 
323       if (q < chunk_size)
324 	q = chunk_size;
325       if (__builtin_expect (q <= n, 1))
326 	nend = start + q * incr;
327       else
328 	nend = end;
329 
330       tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
331       if (__builtin_expect (tmp == start, 1))
332 	break;
333 
334       start = tmp;
335     }
336 
337   *pstart = start;
338   *pend = nend;
339   return true;
340 }
341 #endif /* HAVE_SYNC_BUILTINS */
342