1*629ff9f7SJohn Marino /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
2*629ff9f7SJohn Marino Contributed by Richard Henderson <rth@redhat.com>.
3*629ff9f7SJohn Marino
4*629ff9f7SJohn Marino This file is part of the GNU OpenMP Library (libgomp).
5*629ff9f7SJohn Marino
6*629ff9f7SJohn Marino Libgomp is free software; you can redistribute it and/or modify it
7*629ff9f7SJohn Marino under the terms of the GNU General Public License as published by
8*629ff9f7SJohn Marino the Free Software Foundation; either version 3, or (at your option)
9*629ff9f7SJohn Marino any later version.
10*629ff9f7SJohn Marino
11*629ff9f7SJohn Marino Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12*629ff9f7SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13*629ff9f7SJohn Marino FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14*629ff9f7SJohn Marino more details.
15*629ff9f7SJohn Marino
16*629ff9f7SJohn Marino Under Section 7 of GPL version 3, you are granted additional
17*629ff9f7SJohn Marino permissions described in the GCC Runtime Library Exception, version
18*629ff9f7SJohn Marino 3.1, as published by the Free Software Foundation.
19*629ff9f7SJohn Marino
20*629ff9f7SJohn Marino You should have received a copy of the GNU General Public License and
21*629ff9f7SJohn Marino a copy of the GCC Runtime Library Exception along with this program;
22*629ff9f7SJohn Marino see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*629ff9f7SJohn Marino <http://www.gnu.org/licenses/>. */
24*629ff9f7SJohn Marino
25*629ff9f7SJohn Marino /* This file handles the LOOP (FOR/DO) construct. */
26*629ff9f7SJohn Marino
27*629ff9f7SJohn Marino #include <limits.h>
28*629ff9f7SJohn Marino #include <stdlib.h>
29*629ff9f7SJohn Marino #include "libgomp.h"
30*629ff9f7SJohn Marino
31*629ff9f7SJohn Marino typedef unsigned long long gomp_ull;
32*629ff9f7SJohn Marino
33*629ff9f7SJohn Marino /* Initialize the given work share construct from the given arguments. */
34*629ff9f7SJohn Marino
35*629ff9f7SJohn Marino static inline void
gomp_loop_ull_init(struct gomp_work_share * ws,bool up,gomp_ull start,gomp_ull end,gomp_ull incr,enum gomp_schedule_type sched,gomp_ull chunk_size)36*629ff9f7SJohn Marino gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
37*629ff9f7SJohn Marino gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
38*629ff9f7SJohn Marino gomp_ull chunk_size)
39*629ff9f7SJohn Marino {
40*629ff9f7SJohn Marino ws->sched = sched;
41*629ff9f7SJohn Marino ws->chunk_size_ull = chunk_size;
42*629ff9f7SJohn Marino /* Canonicalize loops that have zero iterations to ->next == ->end. */
43*629ff9f7SJohn Marino ws->end_ull = ((up && start > end) || (!up && start < end))
44*629ff9f7SJohn Marino ? start : end;
45*629ff9f7SJohn Marino ws->incr_ull = incr;
46*629ff9f7SJohn Marino ws->next_ull = start;
47*629ff9f7SJohn Marino ws->mode = 0;
48*629ff9f7SJohn Marino if (sched == GFS_DYNAMIC)
49*629ff9f7SJohn Marino {
50*629ff9f7SJohn Marino ws->chunk_size_ull *= incr;
51*629ff9f7SJohn Marino
52*629ff9f7SJohn Marino #if defined HAVE_SYNC_BUILTINS && defined __LP64__
53*629ff9f7SJohn Marino {
54*629ff9f7SJohn Marino /* For dynamic scheduling prepare things to make each iteration
55*629ff9f7SJohn Marino faster. */
56*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
57*629ff9f7SJohn Marino struct gomp_team *team = thr->ts.team;
58*629ff9f7SJohn Marino long nthreads = team ? team->nthreads : 1;
59*629ff9f7SJohn Marino
60*629ff9f7SJohn Marino if (__builtin_expect (up, 1))
61*629ff9f7SJohn Marino {
62*629ff9f7SJohn Marino /* Cheap overflow protection. */
63*629ff9f7SJohn Marino if (__builtin_expect ((nthreads | ws->chunk_size_ull)
64*629ff9f7SJohn Marino < 1ULL << (sizeof (gomp_ull)
65*629ff9f7SJohn Marino * __CHAR_BIT__ / 2 - 1), 1))
66*629ff9f7SJohn Marino ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
67*629ff9f7SJohn Marino - (nthreads + 1) * ws->chunk_size_ull);
68*629ff9f7SJohn Marino }
69*629ff9f7SJohn Marino /* Cheap overflow protection. */
70*629ff9f7SJohn Marino else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
71*629ff9f7SJohn Marino < 1ULL << (sizeof (gomp_ull)
72*629ff9f7SJohn Marino * __CHAR_BIT__ / 2 - 1), 1))
73*629ff9f7SJohn Marino ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
74*629ff9f7SJohn Marino - (__LONG_LONG_MAX__ * 2ULL + 1));
75*629ff9f7SJohn Marino }
76*629ff9f7SJohn Marino #endif
77*629ff9f7SJohn Marino }
78*629ff9f7SJohn Marino if (!up)
79*629ff9f7SJohn Marino ws->mode |= 2;
80*629ff9f7SJohn Marino }
81*629ff9f7SJohn Marino
82*629ff9f7SJohn Marino /* The *_start routines are called when first encountering a loop construct
83*629ff9f7SJohn Marino that is not bound directly to a parallel construct. The first thread
84*629ff9f7SJohn Marino that arrives will create the work-share construct; subsequent threads
85*629ff9f7SJohn Marino will see the construct exists and allocate work from it.
86*629ff9f7SJohn Marino
87*629ff9f7SJohn Marino START, END, INCR are the bounds of the loop; due to the restrictions of
88*629ff9f7SJohn Marino OpenMP, these values must be the same in every thread. This is not
89*629ff9f7SJohn Marino verified (nor is it entirely verifiable, since START is not necessarily
90*629ff9f7SJohn Marino retained intact in the work-share data structure). CHUNK_SIZE is the
91*629ff9f7SJohn Marino scheduling parameter; again this must be identical in all threads.
92*629ff9f7SJohn Marino
93*629ff9f7SJohn Marino Returns true if there's any work for this thread to perform. If so,
94*629ff9f7SJohn Marino *ISTART and *IEND are filled with the bounds of the iteration block
95*629ff9f7SJohn Marino allocated to this thread. Returns false if all work was assigned to
96*629ff9f7SJohn Marino other threads prior to this thread's arrival. */
97*629ff9f7SJohn Marino
98*629ff9f7SJohn Marino static bool
gomp_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)99*629ff9f7SJohn Marino gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
100*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
101*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
102*629ff9f7SJohn Marino {
103*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
104*629ff9f7SJohn Marino
105*629ff9f7SJohn Marino thr->ts.static_trip = 0;
106*629ff9f7SJohn Marino if (gomp_work_share_start (false))
107*629ff9f7SJohn Marino {
108*629ff9f7SJohn Marino gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
109*629ff9f7SJohn Marino GFS_STATIC, chunk_size);
110*629ff9f7SJohn Marino gomp_work_share_init_done ();
111*629ff9f7SJohn Marino }
112*629ff9f7SJohn Marino
113*629ff9f7SJohn Marino return !gomp_iter_ull_static_next (istart, iend);
114*629ff9f7SJohn Marino }
115*629ff9f7SJohn Marino
116*629ff9f7SJohn Marino static bool
gomp_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)117*629ff9f7SJohn Marino gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
118*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
119*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
120*629ff9f7SJohn Marino {
121*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
122*629ff9f7SJohn Marino bool ret;
123*629ff9f7SJohn Marino
124*629ff9f7SJohn Marino if (gomp_work_share_start (false))
125*629ff9f7SJohn Marino {
126*629ff9f7SJohn Marino gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
127*629ff9f7SJohn Marino GFS_DYNAMIC, chunk_size);
128*629ff9f7SJohn Marino gomp_work_share_init_done ();
129*629ff9f7SJohn Marino }
130*629ff9f7SJohn Marino
131*629ff9f7SJohn Marino #if defined HAVE_SYNC_BUILTINS && defined __LP64__
132*629ff9f7SJohn Marino ret = gomp_iter_ull_dynamic_next (istart, iend);
133*629ff9f7SJohn Marino #else
134*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
135*629ff9f7SJohn Marino ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
136*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
137*629ff9f7SJohn Marino #endif
138*629ff9f7SJohn Marino
139*629ff9f7SJohn Marino return ret;
140*629ff9f7SJohn Marino }
141*629ff9f7SJohn Marino
142*629ff9f7SJohn Marino static bool
gomp_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)143*629ff9f7SJohn Marino gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
144*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
145*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
146*629ff9f7SJohn Marino {
147*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
148*629ff9f7SJohn Marino bool ret;
149*629ff9f7SJohn Marino
150*629ff9f7SJohn Marino if (gomp_work_share_start (false))
151*629ff9f7SJohn Marino {
152*629ff9f7SJohn Marino gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
153*629ff9f7SJohn Marino GFS_GUIDED, chunk_size);
154*629ff9f7SJohn Marino gomp_work_share_init_done ();
155*629ff9f7SJohn Marino }
156*629ff9f7SJohn Marino
157*629ff9f7SJohn Marino #if defined HAVE_SYNC_BUILTINS && defined __LP64__
158*629ff9f7SJohn Marino ret = gomp_iter_ull_guided_next (istart, iend);
159*629ff9f7SJohn Marino #else
160*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
161*629ff9f7SJohn Marino ret = gomp_iter_ull_guided_next_locked (istart, iend);
162*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
163*629ff9f7SJohn Marino #endif
164*629ff9f7SJohn Marino
165*629ff9f7SJohn Marino return ret;
166*629ff9f7SJohn Marino }
167*629ff9f7SJohn Marino
168*629ff9f7SJohn Marino bool
GOMP_loop_ull_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)169*629ff9f7SJohn Marino GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
170*629ff9f7SJohn Marino gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
171*629ff9f7SJohn Marino {
172*629ff9f7SJohn Marino struct gomp_task_icv *icv = gomp_icv (false);
173*629ff9f7SJohn Marino switch (icv->run_sched_var)
174*629ff9f7SJohn Marino {
175*629ff9f7SJohn Marino case GFS_STATIC:
176*629ff9f7SJohn Marino return gomp_loop_ull_static_start (up, start, end, incr,
177*629ff9f7SJohn Marino icv->run_sched_modifier,
178*629ff9f7SJohn Marino istart, iend);
179*629ff9f7SJohn Marino case GFS_DYNAMIC:
180*629ff9f7SJohn Marino return gomp_loop_ull_dynamic_start (up, start, end, incr,
181*629ff9f7SJohn Marino icv->run_sched_modifier,
182*629ff9f7SJohn Marino istart, iend);
183*629ff9f7SJohn Marino case GFS_GUIDED:
184*629ff9f7SJohn Marino return gomp_loop_ull_guided_start (up, start, end, incr,
185*629ff9f7SJohn Marino icv->run_sched_modifier,
186*629ff9f7SJohn Marino istart, iend);
187*629ff9f7SJohn Marino case GFS_AUTO:
188*629ff9f7SJohn Marino /* For now map to schedule(static), later on we could play with feedback
189*629ff9f7SJohn Marino driven choice. */
190*629ff9f7SJohn Marino return gomp_loop_ull_static_start (up, start, end, incr,
191*629ff9f7SJohn Marino 0, istart, iend);
192*629ff9f7SJohn Marino default:
193*629ff9f7SJohn Marino abort ();
194*629ff9f7SJohn Marino }
195*629ff9f7SJohn Marino }
196*629ff9f7SJohn Marino
197*629ff9f7SJohn Marino /* The *_ordered_*_start routines are similar. The only difference is that
198*629ff9f7SJohn Marino this work-share construct is initialized to expect an ORDERED section. */
199*629ff9f7SJohn Marino
200*629ff9f7SJohn Marino static bool
gomp_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)201*629ff9f7SJohn Marino gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
202*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
203*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
204*629ff9f7SJohn Marino {
205*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
206*629ff9f7SJohn Marino
207*629ff9f7SJohn Marino thr->ts.static_trip = 0;
208*629ff9f7SJohn Marino if (gomp_work_share_start (true))
209*629ff9f7SJohn Marino {
210*629ff9f7SJohn Marino gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
211*629ff9f7SJohn Marino GFS_STATIC, chunk_size);
212*629ff9f7SJohn Marino gomp_ordered_static_init ();
213*629ff9f7SJohn Marino gomp_work_share_init_done ();
214*629ff9f7SJohn Marino }
215*629ff9f7SJohn Marino
216*629ff9f7SJohn Marino return !gomp_iter_ull_static_next (istart, iend);
217*629ff9f7SJohn Marino }
218*629ff9f7SJohn Marino
219*629ff9f7SJohn Marino static bool
gomp_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)220*629ff9f7SJohn Marino gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
221*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
222*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
223*629ff9f7SJohn Marino {
224*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
225*629ff9f7SJohn Marino bool ret;
226*629ff9f7SJohn Marino
227*629ff9f7SJohn Marino if (gomp_work_share_start (true))
228*629ff9f7SJohn Marino {
229*629ff9f7SJohn Marino gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
230*629ff9f7SJohn Marino GFS_DYNAMIC, chunk_size);
231*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
232*629ff9f7SJohn Marino gomp_work_share_init_done ();
233*629ff9f7SJohn Marino }
234*629ff9f7SJohn Marino else
235*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
236*629ff9f7SJohn Marino
237*629ff9f7SJohn Marino ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
238*629ff9f7SJohn Marino if (ret)
239*629ff9f7SJohn Marino gomp_ordered_first ();
240*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
241*629ff9f7SJohn Marino
242*629ff9f7SJohn Marino return ret;
243*629ff9f7SJohn Marino }
244*629ff9f7SJohn Marino
245*629ff9f7SJohn Marino static bool
gomp_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)246*629ff9f7SJohn Marino gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
247*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
248*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
249*629ff9f7SJohn Marino {
250*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
251*629ff9f7SJohn Marino bool ret;
252*629ff9f7SJohn Marino
253*629ff9f7SJohn Marino if (gomp_work_share_start (true))
254*629ff9f7SJohn Marino {
255*629ff9f7SJohn Marino gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
256*629ff9f7SJohn Marino GFS_GUIDED, chunk_size);
257*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
258*629ff9f7SJohn Marino gomp_work_share_init_done ();
259*629ff9f7SJohn Marino }
260*629ff9f7SJohn Marino else
261*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
262*629ff9f7SJohn Marino
263*629ff9f7SJohn Marino ret = gomp_iter_ull_guided_next_locked (istart, iend);
264*629ff9f7SJohn Marino if (ret)
265*629ff9f7SJohn Marino gomp_ordered_first ();
266*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
267*629ff9f7SJohn Marino
268*629ff9f7SJohn Marino return ret;
269*629ff9f7SJohn Marino }
270*629ff9f7SJohn Marino
271*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)272*629ff9f7SJohn Marino GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
273*629ff9f7SJohn Marino gomp_ull incr, gomp_ull *istart,
274*629ff9f7SJohn Marino gomp_ull *iend)
275*629ff9f7SJohn Marino {
276*629ff9f7SJohn Marino struct gomp_task_icv *icv = gomp_icv (false);
277*629ff9f7SJohn Marino switch (icv->run_sched_var)
278*629ff9f7SJohn Marino {
279*629ff9f7SJohn Marino case GFS_STATIC:
280*629ff9f7SJohn Marino return gomp_loop_ull_ordered_static_start (up, start, end, incr,
281*629ff9f7SJohn Marino icv->run_sched_modifier,
282*629ff9f7SJohn Marino istart, iend);
283*629ff9f7SJohn Marino case GFS_DYNAMIC:
284*629ff9f7SJohn Marino return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
285*629ff9f7SJohn Marino icv->run_sched_modifier,
286*629ff9f7SJohn Marino istart, iend);
287*629ff9f7SJohn Marino case GFS_GUIDED:
288*629ff9f7SJohn Marino return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
289*629ff9f7SJohn Marino icv->run_sched_modifier,
290*629ff9f7SJohn Marino istart, iend);
291*629ff9f7SJohn Marino case GFS_AUTO:
292*629ff9f7SJohn Marino /* For now map to schedule(static), later on we could play with feedback
293*629ff9f7SJohn Marino driven choice. */
294*629ff9f7SJohn Marino return gomp_loop_ull_ordered_static_start (up, start, end, incr,
295*629ff9f7SJohn Marino 0, istart, iend);
296*629ff9f7SJohn Marino default:
297*629ff9f7SJohn Marino abort ();
298*629ff9f7SJohn Marino }
299*629ff9f7SJohn Marino }
300*629ff9f7SJohn Marino
301*629ff9f7SJohn Marino /* The *_next routines are called when the thread completes processing of
302*629ff9f7SJohn Marino the iteration block currently assigned to it. If the work-share
303*629ff9f7SJohn Marino construct is bound directly to a parallel construct, then the iteration
304*629ff9f7SJohn Marino bounds may have been set up before the parallel. In which case, this
305*629ff9f7SJohn Marino may be the first iteration for the thread.
306*629ff9f7SJohn Marino
307*629ff9f7SJohn Marino Returns true if there is work remaining to be performed; *ISTART and
308*629ff9f7SJohn Marino *IEND are filled with a new iteration block. Returns false if all work
309*629ff9f7SJohn Marino has been assigned. */
310*629ff9f7SJohn Marino
311*629ff9f7SJohn Marino static bool
gomp_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)312*629ff9f7SJohn Marino gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
313*629ff9f7SJohn Marino {
314*629ff9f7SJohn Marino return !gomp_iter_ull_static_next (istart, iend);
315*629ff9f7SJohn Marino }
316*629ff9f7SJohn Marino
317*629ff9f7SJohn Marino static bool
gomp_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)318*629ff9f7SJohn Marino gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
319*629ff9f7SJohn Marino {
320*629ff9f7SJohn Marino bool ret;
321*629ff9f7SJohn Marino
322*629ff9f7SJohn Marino #if defined HAVE_SYNC_BUILTINS && defined __LP64__
323*629ff9f7SJohn Marino ret = gomp_iter_ull_dynamic_next (istart, iend);
324*629ff9f7SJohn Marino #else
325*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
326*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
327*629ff9f7SJohn Marino ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
328*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
329*629ff9f7SJohn Marino #endif
330*629ff9f7SJohn Marino
331*629ff9f7SJohn Marino return ret;
332*629ff9f7SJohn Marino }
333*629ff9f7SJohn Marino
334*629ff9f7SJohn Marino static bool
gomp_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)335*629ff9f7SJohn Marino gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
336*629ff9f7SJohn Marino {
337*629ff9f7SJohn Marino bool ret;
338*629ff9f7SJohn Marino
339*629ff9f7SJohn Marino #if defined HAVE_SYNC_BUILTINS && defined __LP64__
340*629ff9f7SJohn Marino ret = gomp_iter_ull_guided_next (istart, iend);
341*629ff9f7SJohn Marino #else
342*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
343*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
344*629ff9f7SJohn Marino ret = gomp_iter_ull_guided_next_locked (istart, iend);
345*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
346*629ff9f7SJohn Marino #endif
347*629ff9f7SJohn Marino
348*629ff9f7SJohn Marino return ret;
349*629ff9f7SJohn Marino }
350*629ff9f7SJohn Marino
351*629ff9f7SJohn Marino bool
GOMP_loop_ull_runtime_next(gomp_ull * istart,gomp_ull * iend)352*629ff9f7SJohn Marino GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
353*629ff9f7SJohn Marino {
354*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
355*629ff9f7SJohn Marino
356*629ff9f7SJohn Marino switch (thr->ts.work_share->sched)
357*629ff9f7SJohn Marino {
358*629ff9f7SJohn Marino case GFS_STATIC:
359*629ff9f7SJohn Marino case GFS_AUTO:
360*629ff9f7SJohn Marino return gomp_loop_ull_static_next (istart, iend);
361*629ff9f7SJohn Marino case GFS_DYNAMIC:
362*629ff9f7SJohn Marino return gomp_loop_ull_dynamic_next (istart, iend);
363*629ff9f7SJohn Marino case GFS_GUIDED:
364*629ff9f7SJohn Marino return gomp_loop_ull_guided_next (istart, iend);
365*629ff9f7SJohn Marino default:
366*629ff9f7SJohn Marino abort ();
367*629ff9f7SJohn Marino }
368*629ff9f7SJohn Marino }
369*629ff9f7SJohn Marino
370*629ff9f7SJohn Marino /* The *_ordered_*_next routines are called when the thread completes
371*629ff9f7SJohn Marino processing of the iteration block currently assigned to it.
372*629ff9f7SJohn Marino
373*629ff9f7SJohn Marino Returns true if there is work remaining to be performed; *ISTART and
374*629ff9f7SJohn Marino *IEND are filled with a new iteration block. Returns false if all work
375*629ff9f7SJohn Marino has been assigned. */
376*629ff9f7SJohn Marino
377*629ff9f7SJohn Marino static bool
gomp_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)378*629ff9f7SJohn Marino gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
379*629ff9f7SJohn Marino {
380*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
381*629ff9f7SJohn Marino int test;
382*629ff9f7SJohn Marino
383*629ff9f7SJohn Marino gomp_ordered_sync ();
384*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
385*629ff9f7SJohn Marino test = gomp_iter_ull_static_next (istart, iend);
386*629ff9f7SJohn Marino if (test >= 0)
387*629ff9f7SJohn Marino gomp_ordered_static_next ();
388*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
389*629ff9f7SJohn Marino
390*629ff9f7SJohn Marino return test == 0;
391*629ff9f7SJohn Marino }
392*629ff9f7SJohn Marino
393*629ff9f7SJohn Marino static bool
gomp_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)394*629ff9f7SJohn Marino gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
395*629ff9f7SJohn Marino {
396*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
397*629ff9f7SJohn Marino bool ret;
398*629ff9f7SJohn Marino
399*629ff9f7SJohn Marino gomp_ordered_sync ();
400*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
401*629ff9f7SJohn Marino ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
402*629ff9f7SJohn Marino if (ret)
403*629ff9f7SJohn Marino gomp_ordered_next ();
404*629ff9f7SJohn Marino else
405*629ff9f7SJohn Marino gomp_ordered_last ();
406*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
407*629ff9f7SJohn Marino
408*629ff9f7SJohn Marino return ret;
409*629ff9f7SJohn Marino }
410*629ff9f7SJohn Marino
411*629ff9f7SJohn Marino static bool
gomp_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)412*629ff9f7SJohn Marino gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
413*629ff9f7SJohn Marino {
414*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
415*629ff9f7SJohn Marino bool ret;
416*629ff9f7SJohn Marino
417*629ff9f7SJohn Marino gomp_ordered_sync ();
418*629ff9f7SJohn Marino gomp_mutex_lock (&thr->ts.work_share->lock);
419*629ff9f7SJohn Marino ret = gomp_iter_ull_guided_next_locked (istart, iend);
420*629ff9f7SJohn Marino if (ret)
421*629ff9f7SJohn Marino gomp_ordered_next ();
422*629ff9f7SJohn Marino else
423*629ff9f7SJohn Marino gomp_ordered_last ();
424*629ff9f7SJohn Marino gomp_mutex_unlock (&thr->ts.work_share->lock);
425*629ff9f7SJohn Marino
426*629ff9f7SJohn Marino return ret;
427*629ff9f7SJohn Marino }
428*629ff9f7SJohn Marino
429*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_runtime_next(gomp_ull * istart,gomp_ull * iend)430*629ff9f7SJohn Marino GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
431*629ff9f7SJohn Marino {
432*629ff9f7SJohn Marino struct gomp_thread *thr = gomp_thread ();
433*629ff9f7SJohn Marino
434*629ff9f7SJohn Marino switch (thr->ts.work_share->sched)
435*629ff9f7SJohn Marino {
436*629ff9f7SJohn Marino case GFS_STATIC:
437*629ff9f7SJohn Marino case GFS_AUTO:
438*629ff9f7SJohn Marino return gomp_loop_ull_ordered_static_next (istart, iend);
439*629ff9f7SJohn Marino case GFS_DYNAMIC:
440*629ff9f7SJohn Marino return gomp_loop_ull_ordered_dynamic_next (istart, iend);
441*629ff9f7SJohn Marino case GFS_GUIDED:
442*629ff9f7SJohn Marino return gomp_loop_ull_ordered_guided_next (istart, iend);
443*629ff9f7SJohn Marino default:
444*629ff9f7SJohn Marino abort ();
445*629ff9f7SJohn Marino }
446*629ff9f7SJohn Marino }
447*629ff9f7SJohn Marino
448*629ff9f7SJohn Marino /* We use static functions above so that we're sure that the "runtime"
449*629ff9f7SJohn Marino function can defer to the proper routine without interposition. We
450*629ff9f7SJohn Marino export the static function with a strong alias when possible, or with
451*629ff9f7SJohn Marino a wrapper function otherwise. */
452*629ff9f7SJohn Marino
453*629ff9f7SJohn Marino #ifdef HAVE_ATTRIBUTE_ALIAS
454*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
455*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_static_start")));
456*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
457*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_dynamic_start")));
458*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
459*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_guided_start")));
460*629ff9f7SJohn Marino
461*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
462*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_ordered_static_start")));
463*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
464*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
465*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
466*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
467*629ff9f7SJohn Marino
468*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
469*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_static_next")));
470*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
471*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_dynamic_next")));
472*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
473*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_guided_next")));
474*629ff9f7SJohn Marino
475*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
476*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_ordered_static_next")));
477*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
478*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
479*629ff9f7SJohn Marino extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
480*629ff9f7SJohn Marino __attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
481*629ff9f7SJohn Marino #else
482*629ff9f7SJohn Marino bool
GOMP_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)483*629ff9f7SJohn Marino GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
484*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
485*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
486*629ff9f7SJohn Marino {
487*629ff9f7SJohn Marino return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
488*629ff9f7SJohn Marino iend);
489*629ff9f7SJohn Marino }
490*629ff9f7SJohn Marino
491*629ff9f7SJohn Marino bool
GOMP_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)492*629ff9f7SJohn Marino GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
493*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
494*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
495*629ff9f7SJohn Marino {
496*629ff9f7SJohn Marino return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
497*629ff9f7SJohn Marino iend);
498*629ff9f7SJohn Marino }
499*629ff9f7SJohn Marino
500*629ff9f7SJohn Marino bool
GOMP_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)501*629ff9f7SJohn Marino GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
502*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
503*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
504*629ff9f7SJohn Marino {
505*629ff9f7SJohn Marino return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
506*629ff9f7SJohn Marino iend);
507*629ff9f7SJohn Marino }
508*629ff9f7SJohn Marino
509*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)510*629ff9f7SJohn Marino GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
511*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
512*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
513*629ff9f7SJohn Marino {
514*629ff9f7SJohn Marino return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
515*629ff9f7SJohn Marino istart, iend);
516*629ff9f7SJohn Marino }
517*629ff9f7SJohn Marino
518*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)519*629ff9f7SJohn Marino GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
520*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
521*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
522*629ff9f7SJohn Marino {
523*629ff9f7SJohn Marino return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
524*629ff9f7SJohn Marino istart, iend);
525*629ff9f7SJohn Marino }
526*629ff9f7SJohn Marino
527*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)528*629ff9f7SJohn Marino GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
529*629ff9f7SJohn Marino gomp_ull incr, gomp_ull chunk_size,
530*629ff9f7SJohn Marino gomp_ull *istart, gomp_ull *iend)
531*629ff9f7SJohn Marino {
532*629ff9f7SJohn Marino return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
533*629ff9f7SJohn Marino istart, iend);
534*629ff9f7SJohn Marino }
535*629ff9f7SJohn Marino
536*629ff9f7SJohn Marino bool
GOMP_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)537*629ff9f7SJohn Marino GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
538*629ff9f7SJohn Marino {
539*629ff9f7SJohn Marino return gomp_loop_ull_static_next (istart, iend);
540*629ff9f7SJohn Marino }
541*629ff9f7SJohn Marino
542*629ff9f7SJohn Marino bool
GOMP_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)543*629ff9f7SJohn Marino GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
544*629ff9f7SJohn Marino {
545*629ff9f7SJohn Marino return gomp_loop_ull_dynamic_next (istart, iend);
546*629ff9f7SJohn Marino }
547*629ff9f7SJohn Marino
548*629ff9f7SJohn Marino bool
GOMP_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)549*629ff9f7SJohn Marino GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
550*629ff9f7SJohn Marino {
551*629ff9f7SJohn Marino return gomp_loop_ull_guided_next (istart, iend);
552*629ff9f7SJohn Marino }
553*629ff9f7SJohn Marino
554*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)555*629ff9f7SJohn Marino GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
556*629ff9f7SJohn Marino {
557*629ff9f7SJohn Marino return gomp_loop_ull_ordered_static_next (istart, iend);
558*629ff9f7SJohn Marino }
559*629ff9f7SJohn Marino
560*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)561*629ff9f7SJohn Marino GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
562*629ff9f7SJohn Marino {
563*629ff9f7SJohn Marino return gomp_loop_ull_ordered_dynamic_next (istart, iend);
564*629ff9f7SJohn Marino }
565*629ff9f7SJohn Marino
566*629ff9f7SJohn Marino bool
GOMP_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)567*629ff9f7SJohn Marino GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
568*629ff9f7SJohn Marino {
569*629ff9f7SJohn Marino return gomp_loop_ull_ordered_guided_next (istart, iend);
570*629ff9f7SJohn Marino }
571*629ff9f7SJohn Marino #endif
572