xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/loop_ull.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
24fee23f9Smrg    Contributed by Richard Henderson <rth@redhat.com>.
34fee23f9Smrg 
44d5abbe8Smrg    This file is part of the GNU Offloading and Multi Processing Library
54d5abbe8Smrg    (libgomp).
64fee23f9Smrg 
74fee23f9Smrg    Libgomp is free software; you can redistribute it and/or modify it
84fee23f9Smrg    under the terms of the GNU General Public License as published by
94fee23f9Smrg    the Free Software Foundation; either version 3, or (at your option)
104fee23f9Smrg    any later version.
114fee23f9Smrg 
124fee23f9Smrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
134fee23f9Smrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
144fee23f9Smrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
154fee23f9Smrg    more details.
164fee23f9Smrg 
174fee23f9Smrg    Under Section 7 of GPL version 3, you are granted additional
184fee23f9Smrg    permissions described in the GCC Runtime Library Exception, version
194fee23f9Smrg    3.1, as published by the Free Software Foundation.
204fee23f9Smrg 
214fee23f9Smrg    You should have received a copy of the GNU General Public License and
224fee23f9Smrg    a copy of the GCC Runtime Library Exception along with this program;
234fee23f9Smrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
244fee23f9Smrg    <http://www.gnu.org/licenses/>.  */
254fee23f9Smrg 
264fee23f9Smrg /* This file handles the LOOP (FOR/DO) construct.  */
274fee23f9Smrg 
284fee23f9Smrg #include <limits.h>
294fee23f9Smrg #include <stdlib.h>
30181254a7Smrg #include <string.h>
314fee23f9Smrg #include "libgomp.h"
324fee23f9Smrg 
33181254a7Smrg ialias (GOMP_loop_ull_runtime_next)
34181254a7Smrg ialias_redirect (GOMP_taskgroup_reduction_register)
35181254a7Smrg 
364fee23f9Smrg typedef unsigned long long gomp_ull;
374fee23f9Smrg 
384fee23f9Smrg /* Initialize the given work share construct from the given arguments.  */
394fee23f9Smrg 
404fee23f9Smrg static inline void
gomp_loop_ull_init(struct gomp_work_share * ws,bool up,gomp_ull start,gomp_ull end,gomp_ull incr,enum gomp_schedule_type sched,gomp_ull chunk_size)414fee23f9Smrg gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
424fee23f9Smrg 		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
434fee23f9Smrg 		    gomp_ull chunk_size)
444fee23f9Smrg {
454fee23f9Smrg   ws->sched = sched;
464fee23f9Smrg   ws->chunk_size_ull = chunk_size;
474fee23f9Smrg   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
484fee23f9Smrg   ws->end_ull = ((up && start > end) || (!up && start < end))
494fee23f9Smrg 		? start : end;
504fee23f9Smrg   ws->incr_ull = incr;
514fee23f9Smrg   ws->next_ull = start;
524fee23f9Smrg   ws->mode = 0;
534fee23f9Smrg   if (sched == GFS_DYNAMIC)
544fee23f9Smrg     {
554fee23f9Smrg       ws->chunk_size_ull *= incr;
564fee23f9Smrg 
574fee23f9Smrg #if defined HAVE_SYNC_BUILTINS && defined __LP64__
584fee23f9Smrg       {
594fee23f9Smrg 	/* For dynamic scheduling prepare things to make each iteration
604fee23f9Smrg 	   faster.  */
614fee23f9Smrg 	struct gomp_thread *thr = gomp_thread ();
624fee23f9Smrg 	struct gomp_team *team = thr->ts.team;
634fee23f9Smrg 	long nthreads = team ? team->nthreads : 1;
644fee23f9Smrg 
654fee23f9Smrg 	if (__builtin_expect (up, 1))
664fee23f9Smrg 	  {
674fee23f9Smrg 	    /* Cheap overflow protection.  */
684fee23f9Smrg 	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
694fee23f9Smrg 				  < 1ULL << (sizeof (gomp_ull)
704fee23f9Smrg 					     * __CHAR_BIT__ / 2 - 1), 1))
714fee23f9Smrg 	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
724fee23f9Smrg 					- (nthreads + 1) * ws->chunk_size_ull);
734fee23f9Smrg 	  }
744fee23f9Smrg 	/* Cheap overflow protection.  */
754fee23f9Smrg 	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
764fee23f9Smrg 				   < 1ULL << (sizeof (gomp_ull)
774fee23f9Smrg 					      * __CHAR_BIT__ / 2 - 1), 1))
784fee23f9Smrg 	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
794fee23f9Smrg 				    - (__LONG_LONG_MAX__ * 2ULL + 1));
804fee23f9Smrg       }
814fee23f9Smrg #endif
824fee23f9Smrg     }
834fee23f9Smrg   if (!up)
844fee23f9Smrg     ws->mode |= 2;
854fee23f9Smrg }
864fee23f9Smrg 
874fee23f9Smrg /* The *_start routines are called when first encountering a loop construct
884fee23f9Smrg    that is not bound directly to a parallel construct.  The first thread
894fee23f9Smrg    that arrives will create the work-share construct; subsequent threads
904fee23f9Smrg    will see the construct exists and allocate work from it.
914fee23f9Smrg 
924fee23f9Smrg    START, END, INCR are the bounds of the loop; due to the restrictions of
934fee23f9Smrg    OpenMP, these values must be the same in every thread.  This is not
944fee23f9Smrg    verified (nor is it entirely verifiable, since START is not necessarily
954fee23f9Smrg    retained intact in the work-share data structure).  CHUNK_SIZE is the
964fee23f9Smrg    scheduling parameter; again this must be identical in all threads.
974fee23f9Smrg 
984fee23f9Smrg    Returns true if there's any work for this thread to perform.  If so,
994fee23f9Smrg    *ISTART and *IEND are filled with the bounds of the iteration block
1004fee23f9Smrg    allocated to this thread.  Returns false if all work was assigned to
1014fee23f9Smrg    other threads prior to this thread's arrival.  */
1024fee23f9Smrg 
1034fee23f9Smrg static bool
gomp_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)1044fee23f9Smrg gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
1054fee23f9Smrg 			    gomp_ull incr, gomp_ull chunk_size,
1064fee23f9Smrg 			    gomp_ull *istart, gomp_ull *iend)
1074fee23f9Smrg {
1084fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
1094fee23f9Smrg 
1104fee23f9Smrg   thr->ts.static_trip = 0;
111181254a7Smrg   if (gomp_work_share_start (0))
1124fee23f9Smrg     {
1134fee23f9Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
1144fee23f9Smrg 			  GFS_STATIC, chunk_size);
1154fee23f9Smrg       gomp_work_share_init_done ();
1164fee23f9Smrg     }
1174fee23f9Smrg 
1184fee23f9Smrg   return !gomp_iter_ull_static_next (istart, iend);
1194fee23f9Smrg }
1204fee23f9Smrg 
1214fee23f9Smrg static bool
gomp_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)1224fee23f9Smrg gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
1234fee23f9Smrg 			     gomp_ull incr, gomp_ull chunk_size,
1244fee23f9Smrg 			     gomp_ull *istart, gomp_ull *iend)
1254fee23f9Smrg {
1264fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
1274fee23f9Smrg   bool ret;
1284fee23f9Smrg 
129181254a7Smrg   if (gomp_work_share_start (0))
1304fee23f9Smrg     {
1314fee23f9Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
1324fee23f9Smrg 			  GFS_DYNAMIC, chunk_size);
1334fee23f9Smrg       gomp_work_share_init_done ();
1344fee23f9Smrg     }
1354fee23f9Smrg 
1364fee23f9Smrg #if defined HAVE_SYNC_BUILTINS && defined __LP64__
1374fee23f9Smrg   ret = gomp_iter_ull_dynamic_next (istart, iend);
1384fee23f9Smrg #else
1394fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
1404fee23f9Smrg   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
1414fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
1424fee23f9Smrg #endif
1434fee23f9Smrg 
1444fee23f9Smrg   return ret;
1454fee23f9Smrg }
1464fee23f9Smrg 
1474fee23f9Smrg static bool
gomp_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)1484fee23f9Smrg gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
1494fee23f9Smrg 			    gomp_ull incr, gomp_ull chunk_size,
1504fee23f9Smrg 			    gomp_ull *istart, gomp_ull *iend)
1514fee23f9Smrg {
1524fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
1534fee23f9Smrg   bool ret;
1544fee23f9Smrg 
155181254a7Smrg   if (gomp_work_share_start (0))
1564fee23f9Smrg     {
1574fee23f9Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
1584fee23f9Smrg 			  GFS_GUIDED, chunk_size);
1594fee23f9Smrg       gomp_work_share_init_done ();
1604fee23f9Smrg     }
1614fee23f9Smrg 
1624fee23f9Smrg #if defined HAVE_SYNC_BUILTINS && defined __LP64__
1634fee23f9Smrg   ret = gomp_iter_ull_guided_next (istart, iend);
1644fee23f9Smrg #else
1654fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
1664fee23f9Smrg   ret = gomp_iter_ull_guided_next_locked (istart, iend);
1674fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
1684fee23f9Smrg #endif
1694fee23f9Smrg 
1704fee23f9Smrg   return ret;
1714fee23f9Smrg }
1724fee23f9Smrg 
1734fee23f9Smrg bool
GOMP_loop_ull_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)1744fee23f9Smrg GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
1754fee23f9Smrg 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
1764fee23f9Smrg {
1774fee23f9Smrg   struct gomp_task_icv *icv = gomp_icv (false);
178181254a7Smrg   switch (icv->run_sched_var & ~GFS_MONOTONIC)
1794fee23f9Smrg     {
1804fee23f9Smrg     case GFS_STATIC:
1814fee23f9Smrg       return gomp_loop_ull_static_start (up, start, end, incr,
182f9a78e0eSmrg 					 icv->run_sched_chunk_size,
1834fee23f9Smrg 					 istart, iend);
1844fee23f9Smrg     case GFS_DYNAMIC:
1854fee23f9Smrg       return gomp_loop_ull_dynamic_start (up, start, end, incr,
186f9a78e0eSmrg 					  icv->run_sched_chunk_size,
1874fee23f9Smrg 					  istart, iend);
1884fee23f9Smrg     case GFS_GUIDED:
1894fee23f9Smrg       return gomp_loop_ull_guided_start (up, start, end, incr,
190f9a78e0eSmrg 					 icv->run_sched_chunk_size,
1914fee23f9Smrg 					 istart, iend);
1924fee23f9Smrg     case GFS_AUTO:
1934fee23f9Smrg       /* For now map to schedule(static), later on we could play with feedback
1944fee23f9Smrg 	 driven choice.  */
1954fee23f9Smrg       return gomp_loop_ull_static_start (up, start, end, incr,
1964fee23f9Smrg 					 0, istart, iend);
1974fee23f9Smrg     default:
1984fee23f9Smrg       abort ();
1994fee23f9Smrg     }
2004fee23f9Smrg }
2014fee23f9Smrg 
202181254a7Smrg static long
gomp_adjust_sched(long sched,gomp_ull * chunk_size)203181254a7Smrg gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204181254a7Smrg {
205181254a7Smrg   sched &= ~GFS_MONOTONIC;
206181254a7Smrg   switch (sched)
207181254a7Smrg     {
208181254a7Smrg     case GFS_STATIC:
209181254a7Smrg     case GFS_DYNAMIC:
210181254a7Smrg     case GFS_GUIDED:
211181254a7Smrg       return sched;
212181254a7Smrg     /* GFS_RUNTIME is used for runtime schedule without monotonic
213181254a7Smrg        or nonmonotonic modifiers on the clause.
214181254a7Smrg        GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215181254a7Smrg        modifier.  */
216181254a7Smrg     case GFS_RUNTIME:
217181254a7Smrg     /* GFS_AUTO is used for runtime schedule with nonmonotonic
218181254a7Smrg        modifier.  */
219181254a7Smrg     case GFS_AUTO:
220181254a7Smrg       {
221181254a7Smrg 	struct gomp_task_icv *icv = gomp_icv (false);
222181254a7Smrg 	sched = icv->run_sched_var & ~GFS_MONOTONIC;
223181254a7Smrg 	switch (sched)
224181254a7Smrg 	  {
225181254a7Smrg 	  case GFS_STATIC:
226181254a7Smrg 	  case GFS_DYNAMIC:
227181254a7Smrg 	  case GFS_GUIDED:
228181254a7Smrg 	    *chunk_size = icv->run_sched_chunk_size;
229181254a7Smrg 	    break;
230181254a7Smrg 	  case GFS_AUTO:
231181254a7Smrg 	    sched = GFS_STATIC;
232181254a7Smrg 	    *chunk_size = 0;
233181254a7Smrg 	    break;
234181254a7Smrg 	  default:
235181254a7Smrg 	    abort ();
236181254a7Smrg 	  }
237181254a7Smrg 	return sched;
238181254a7Smrg       }
239181254a7Smrg     default:
240181254a7Smrg       abort ();
241181254a7Smrg     }
242181254a7Smrg }
243181254a7Smrg 
244181254a7Smrg bool
GOMP_loop_ull_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)245181254a7Smrg GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246181254a7Smrg 		     gomp_ull incr, long sched, gomp_ull chunk_size,
247181254a7Smrg 		     gomp_ull *istart, gomp_ull *iend,
248181254a7Smrg 		     uintptr_t *reductions, void **mem)
249181254a7Smrg {
250181254a7Smrg   struct gomp_thread *thr = gomp_thread ();
251181254a7Smrg 
252181254a7Smrg   thr->ts.static_trip = 0;
253181254a7Smrg   if (reductions)
254181254a7Smrg     gomp_workshare_taskgroup_start ();
255181254a7Smrg   if (gomp_work_share_start (0))
256181254a7Smrg     {
257181254a7Smrg       sched = gomp_adjust_sched (sched, &chunk_size);
258181254a7Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259181254a7Smrg       			  sched, chunk_size);
260181254a7Smrg       if (reductions)
261181254a7Smrg 	{
262181254a7Smrg 	  GOMP_taskgroup_reduction_register (reductions);
263181254a7Smrg 	  thr->task->taskgroup->workshare = true;
264181254a7Smrg 	  thr->ts.work_share->task_reductions = reductions;
265181254a7Smrg 	}
266181254a7Smrg       if (mem)
267181254a7Smrg 	{
268181254a7Smrg 	  uintptr_t size = (uintptr_t) *mem;
269181254a7Smrg #define INLINE_ORDERED_TEAM_IDS_OFF \
270181254a7Smrg   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
271181254a7Smrg     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
272*b1e83836Smrg 	  if (sizeof (struct gomp_work_share)
273*b1e83836Smrg 	      <= INLINE_ORDERED_TEAM_IDS_OFF
274*b1e83836Smrg 	      || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
275*b1e83836Smrg 	      || size > (sizeof (struct gomp_work_share)
276181254a7Smrg 			- INLINE_ORDERED_TEAM_IDS_OFF))
277181254a7Smrg 	    *mem
278181254a7Smrg 	      = (void *) (thr->ts.work_share->ordered_team_ids
279181254a7Smrg 			  = gomp_malloc_cleared (size));
280181254a7Smrg 	  else
281181254a7Smrg 	    *mem = memset (((char *) thr->ts.work_share)
282181254a7Smrg 			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
283181254a7Smrg 	}
284181254a7Smrg       gomp_work_share_init_done ();
285181254a7Smrg     }
286181254a7Smrg   else
287181254a7Smrg     {
288181254a7Smrg       if (reductions)
289181254a7Smrg 	{
290181254a7Smrg 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
291181254a7Smrg 	  gomp_workshare_task_reduction_register (reductions,
292181254a7Smrg 						  first_reductions);
293181254a7Smrg 	}
294181254a7Smrg       if (mem)
295181254a7Smrg 	{
296181254a7Smrg 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
297181254a7Smrg 	       & (__alignof__ (long long) - 1)) == 0)
298181254a7Smrg 	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
299181254a7Smrg 	  else
300181254a7Smrg 	    {
301181254a7Smrg 	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
302181254a7Smrg 	      p += __alignof__ (long long) - 1;
303181254a7Smrg 	      p &= ~(__alignof__ (long long) - 1);
304181254a7Smrg 	      *mem = (void *) p;
305181254a7Smrg 	    }
306181254a7Smrg 	}
307181254a7Smrg     }
308181254a7Smrg 
309181254a7Smrg   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
310181254a7Smrg }
311181254a7Smrg 
3124fee23f9Smrg /* The *_ordered_*_start routines are similar.  The only difference is that
3134fee23f9Smrg    this work-share construct is initialized to expect an ORDERED section.  */
3144fee23f9Smrg 
3154fee23f9Smrg static bool
gomp_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)3164fee23f9Smrg gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
3174fee23f9Smrg 				    gomp_ull incr, gomp_ull chunk_size,
3184fee23f9Smrg 				    gomp_ull *istart, gomp_ull *iend)
3194fee23f9Smrg {
3204fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
3214fee23f9Smrg 
3224fee23f9Smrg   thr->ts.static_trip = 0;
323181254a7Smrg   if (gomp_work_share_start (1))
3244fee23f9Smrg     {
3254fee23f9Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
3264fee23f9Smrg 			  GFS_STATIC, chunk_size);
3274fee23f9Smrg       gomp_ordered_static_init ();
3284fee23f9Smrg       gomp_work_share_init_done ();
3294fee23f9Smrg     }
3304fee23f9Smrg 
3314fee23f9Smrg   return !gomp_iter_ull_static_next (istart, iend);
3324fee23f9Smrg }
3334fee23f9Smrg 
3344fee23f9Smrg static bool
gomp_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)3354fee23f9Smrg gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
3364fee23f9Smrg 				     gomp_ull incr, gomp_ull chunk_size,
3374fee23f9Smrg 				     gomp_ull *istart, gomp_ull *iend)
3384fee23f9Smrg {
3394fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
3404fee23f9Smrg   bool ret;
3414fee23f9Smrg 
342181254a7Smrg   if (gomp_work_share_start (1))
3434fee23f9Smrg     {
3444fee23f9Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
3454fee23f9Smrg 			  GFS_DYNAMIC, chunk_size);
3464fee23f9Smrg       gomp_mutex_lock (&thr->ts.work_share->lock);
3474fee23f9Smrg       gomp_work_share_init_done ();
3484fee23f9Smrg     }
3494fee23f9Smrg   else
3504fee23f9Smrg     gomp_mutex_lock (&thr->ts.work_share->lock);
3514fee23f9Smrg 
3524fee23f9Smrg   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
3534fee23f9Smrg   if (ret)
3544fee23f9Smrg     gomp_ordered_first ();
3554fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
3564fee23f9Smrg 
3574fee23f9Smrg   return ret;
3584fee23f9Smrg }
3594fee23f9Smrg 
3604fee23f9Smrg static bool
gomp_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)3614fee23f9Smrg gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
3624fee23f9Smrg 				    gomp_ull incr, gomp_ull chunk_size,
3634fee23f9Smrg 				    gomp_ull *istart, gomp_ull *iend)
3644fee23f9Smrg {
3654fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
3664fee23f9Smrg   bool ret;
3674fee23f9Smrg 
368181254a7Smrg   if (gomp_work_share_start (1))
3694fee23f9Smrg     {
3704fee23f9Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
3714fee23f9Smrg 			  GFS_GUIDED, chunk_size);
3724fee23f9Smrg       gomp_mutex_lock (&thr->ts.work_share->lock);
3734fee23f9Smrg       gomp_work_share_init_done ();
3744fee23f9Smrg     }
3754fee23f9Smrg   else
3764fee23f9Smrg     gomp_mutex_lock (&thr->ts.work_share->lock);
3774fee23f9Smrg 
3784fee23f9Smrg   ret = gomp_iter_ull_guided_next_locked (istart, iend);
3794fee23f9Smrg   if (ret)
3804fee23f9Smrg     gomp_ordered_first ();
3814fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
3824fee23f9Smrg 
3834fee23f9Smrg   return ret;
3844fee23f9Smrg }
3854fee23f9Smrg 
3864fee23f9Smrg bool
GOMP_loop_ull_ordered_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)3874fee23f9Smrg GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
3884fee23f9Smrg 				     gomp_ull incr, gomp_ull *istart,
3894fee23f9Smrg 				     gomp_ull *iend)
3904fee23f9Smrg {
3914fee23f9Smrg   struct gomp_task_icv *icv = gomp_icv (false);
392181254a7Smrg   switch (icv->run_sched_var & ~GFS_MONOTONIC)
3934fee23f9Smrg     {
3944fee23f9Smrg     case GFS_STATIC:
3954fee23f9Smrg       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
396f9a78e0eSmrg 						 icv->run_sched_chunk_size,
3974fee23f9Smrg 						 istart, iend);
3984fee23f9Smrg     case GFS_DYNAMIC:
3994fee23f9Smrg       return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
400f9a78e0eSmrg 						  icv->run_sched_chunk_size,
4014fee23f9Smrg 						  istart, iend);
4024fee23f9Smrg     case GFS_GUIDED:
4034fee23f9Smrg       return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
404f9a78e0eSmrg 						 icv->run_sched_chunk_size,
4054fee23f9Smrg 						 istart, iend);
4064fee23f9Smrg     case GFS_AUTO:
4074fee23f9Smrg       /* For now map to schedule(static), later on we could play with feedback
4084fee23f9Smrg 	 driven choice.  */
4094fee23f9Smrg       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
4104fee23f9Smrg 						 0, istart, iend);
4114fee23f9Smrg     default:
4124fee23f9Smrg       abort ();
4134fee23f9Smrg     }
4144fee23f9Smrg }
4154fee23f9Smrg 
416181254a7Smrg bool
GOMP_loop_ull_ordered_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)417181254a7Smrg GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
418181254a7Smrg 			     gomp_ull incr, long sched, gomp_ull chunk_size,
419181254a7Smrg 			     gomp_ull *istart, gomp_ull *iend,
420181254a7Smrg 			     uintptr_t *reductions, void **mem)
421181254a7Smrg {
422181254a7Smrg   struct gomp_thread *thr = gomp_thread ();
423181254a7Smrg   size_t ordered = 1;
424181254a7Smrg   bool ret;
425181254a7Smrg 
426181254a7Smrg   thr->ts.static_trip = 0;
427181254a7Smrg   if (reductions)
428181254a7Smrg     gomp_workshare_taskgroup_start ();
429181254a7Smrg   if (mem)
430181254a7Smrg     ordered += (uintptr_t) *mem;
431181254a7Smrg   if (gomp_work_share_start (ordered))
432181254a7Smrg     {
433181254a7Smrg       sched = gomp_adjust_sched (sched, &chunk_size);
434181254a7Smrg       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
435181254a7Smrg 			  sched, chunk_size);
436181254a7Smrg       if (reductions)
437181254a7Smrg 	{
438181254a7Smrg 	  GOMP_taskgroup_reduction_register (reductions);
439181254a7Smrg 	  thr->task->taskgroup->workshare = true;
440181254a7Smrg 	  thr->ts.work_share->task_reductions = reductions;
441181254a7Smrg 	}
442181254a7Smrg       if (sched == GFS_STATIC)
443181254a7Smrg 	gomp_ordered_static_init ();
444181254a7Smrg       else
445181254a7Smrg 	gomp_mutex_lock (&thr->ts.work_share->lock);
446181254a7Smrg       gomp_work_share_init_done ();
447181254a7Smrg     }
448181254a7Smrg   else
449181254a7Smrg     {
450181254a7Smrg       if (reductions)
451181254a7Smrg 	{
452181254a7Smrg 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
453181254a7Smrg 	  gomp_workshare_task_reduction_register (reductions,
454181254a7Smrg 						  first_reductions);
455181254a7Smrg 	}
456181254a7Smrg       sched = thr->ts.work_share->sched;
457181254a7Smrg       if (sched != GFS_STATIC)
458181254a7Smrg 	gomp_mutex_lock (&thr->ts.work_share->lock);
459181254a7Smrg     }
460181254a7Smrg 
461181254a7Smrg   if (mem)
462181254a7Smrg     {
463181254a7Smrg       uintptr_t p
464181254a7Smrg 	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
465181254a7Smrg 		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
466181254a7Smrg       p += __alignof__ (long long) - 1;
467181254a7Smrg       p &= ~(__alignof__ (long long) - 1);
468181254a7Smrg       *mem = (void *) p;
469181254a7Smrg     }
470181254a7Smrg 
471181254a7Smrg   switch (sched)
472181254a7Smrg     {
473181254a7Smrg     case GFS_STATIC:
474181254a7Smrg     case GFS_AUTO:
475181254a7Smrg       return !gomp_iter_ull_static_next (istart, iend);
476181254a7Smrg     case GFS_DYNAMIC:
477181254a7Smrg       ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
478181254a7Smrg       break;
479181254a7Smrg     case GFS_GUIDED:
480181254a7Smrg       ret = gomp_iter_ull_guided_next_locked (istart, iend);
481181254a7Smrg       break;
482181254a7Smrg     default:
483181254a7Smrg       abort ();
484181254a7Smrg     }
485181254a7Smrg 
486181254a7Smrg   if (ret)
487181254a7Smrg     gomp_ordered_first ();
488181254a7Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
489181254a7Smrg   return ret;
490181254a7Smrg }
491181254a7Smrg 
492f9a78e0eSmrg /* The *_doacross_*_start routines are similar.  The only difference is that
493f9a78e0eSmrg    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
494f9a78e0eSmrg    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
495f9a78e0eSmrg    and other COUNTS array elements tell the library number of iterations
496f9a78e0eSmrg    in the ordered inner loops.  */
497f9a78e0eSmrg 
498f9a78e0eSmrg static bool
gomp_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)499f9a78e0eSmrg gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
500f9a78e0eSmrg 				     gomp_ull chunk_size, gomp_ull *istart,
501f9a78e0eSmrg 				     gomp_ull *iend)
502f9a78e0eSmrg {
503f9a78e0eSmrg   struct gomp_thread *thr = gomp_thread ();
504f9a78e0eSmrg 
505f9a78e0eSmrg   thr->ts.static_trip = 0;
506181254a7Smrg   if (gomp_work_share_start (0))
507f9a78e0eSmrg     {
508f9a78e0eSmrg       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
509f9a78e0eSmrg 			  GFS_STATIC, chunk_size);
510181254a7Smrg       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
511f9a78e0eSmrg       gomp_work_share_init_done ();
512f9a78e0eSmrg     }
513f9a78e0eSmrg 
514f9a78e0eSmrg   return !gomp_iter_ull_static_next (istart, iend);
515f9a78e0eSmrg }
516f9a78e0eSmrg 
517f9a78e0eSmrg static bool
gomp_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)518f9a78e0eSmrg gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
519f9a78e0eSmrg 				      gomp_ull chunk_size, gomp_ull *istart,
520f9a78e0eSmrg 				      gomp_ull *iend)
521f9a78e0eSmrg {
522f9a78e0eSmrg   struct gomp_thread *thr = gomp_thread ();
523f9a78e0eSmrg   bool ret;
524f9a78e0eSmrg 
525181254a7Smrg   if (gomp_work_share_start (0))
526f9a78e0eSmrg     {
527f9a78e0eSmrg       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
528f9a78e0eSmrg 			  GFS_DYNAMIC, chunk_size);
529181254a7Smrg       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
530f9a78e0eSmrg       gomp_work_share_init_done ();
531f9a78e0eSmrg     }
532f9a78e0eSmrg 
533f9a78e0eSmrg #if defined HAVE_SYNC_BUILTINS && defined __LP64__
534f9a78e0eSmrg   ret = gomp_iter_ull_dynamic_next (istart, iend);
535f9a78e0eSmrg #else
536f9a78e0eSmrg   gomp_mutex_lock (&thr->ts.work_share->lock);
537f9a78e0eSmrg   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
538f9a78e0eSmrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
539f9a78e0eSmrg #endif
540f9a78e0eSmrg 
541f9a78e0eSmrg   return ret;
542f9a78e0eSmrg }
543f9a78e0eSmrg 
544f9a78e0eSmrg static bool
gomp_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)545f9a78e0eSmrg gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
546f9a78e0eSmrg 				     gomp_ull chunk_size, gomp_ull *istart,
547f9a78e0eSmrg 				     gomp_ull *iend)
548f9a78e0eSmrg {
549f9a78e0eSmrg   struct gomp_thread *thr = gomp_thread ();
550f9a78e0eSmrg   bool ret;
551f9a78e0eSmrg 
552181254a7Smrg   if (gomp_work_share_start (0))
553f9a78e0eSmrg     {
554f9a78e0eSmrg       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
555f9a78e0eSmrg 			  GFS_GUIDED, chunk_size);
556181254a7Smrg       gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
557f9a78e0eSmrg       gomp_work_share_init_done ();
558f9a78e0eSmrg     }
559f9a78e0eSmrg 
560f9a78e0eSmrg #if defined HAVE_SYNC_BUILTINS && defined __LP64__
561f9a78e0eSmrg   ret = gomp_iter_ull_guided_next (istart, iend);
562f9a78e0eSmrg #else
563f9a78e0eSmrg   gomp_mutex_lock (&thr->ts.work_share->lock);
564f9a78e0eSmrg   ret = gomp_iter_ull_guided_next_locked (istart, iend);
565f9a78e0eSmrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
566f9a78e0eSmrg #endif
567f9a78e0eSmrg 
568f9a78e0eSmrg   return ret;
569f9a78e0eSmrg }
570f9a78e0eSmrg 
571f9a78e0eSmrg bool
GOMP_loop_ull_doacross_runtime_start(unsigned ncounts,gomp_ull * counts,gomp_ull * istart,gomp_ull * iend)572f9a78e0eSmrg GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
573f9a78e0eSmrg 				      gomp_ull *istart, gomp_ull *iend)
574f9a78e0eSmrg {
575f9a78e0eSmrg   struct gomp_task_icv *icv = gomp_icv (false);
576181254a7Smrg   switch (icv->run_sched_var & ~GFS_MONOTONIC)
577f9a78e0eSmrg     {
578f9a78e0eSmrg     case GFS_STATIC:
579f9a78e0eSmrg       return gomp_loop_ull_doacross_static_start (ncounts, counts,
580f9a78e0eSmrg 						  icv->run_sched_chunk_size,
581f9a78e0eSmrg 						  istart, iend);
582f9a78e0eSmrg     case GFS_DYNAMIC:
583f9a78e0eSmrg       return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
584f9a78e0eSmrg 						   icv->run_sched_chunk_size,
585f9a78e0eSmrg 						   istart, iend);
586f9a78e0eSmrg     case GFS_GUIDED:
587f9a78e0eSmrg       return gomp_loop_ull_doacross_guided_start (ncounts, counts,
588f9a78e0eSmrg 						  icv->run_sched_chunk_size,
589f9a78e0eSmrg 						  istart, iend);
590f9a78e0eSmrg     case GFS_AUTO:
591f9a78e0eSmrg       /* For now map to schedule(static), later on we could play with feedback
592f9a78e0eSmrg 	 driven choice.  */
593f9a78e0eSmrg       return gomp_loop_ull_doacross_static_start (ncounts, counts,
594f9a78e0eSmrg 						  0, istart, iend);
595f9a78e0eSmrg     default:
596f9a78e0eSmrg       abort ();
597f9a78e0eSmrg     }
598f9a78e0eSmrg }
599f9a78e0eSmrg 
600181254a7Smrg bool
GOMP_loop_ull_doacross_start(unsigned ncounts,gomp_ull * counts,long sched,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend,uintptr_t * reductions,void ** mem)601181254a7Smrg GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
602181254a7Smrg 			      long sched, gomp_ull chunk_size,
603181254a7Smrg 			      gomp_ull *istart, gomp_ull *iend,
604181254a7Smrg 			      uintptr_t *reductions, void **mem)
605181254a7Smrg {
606181254a7Smrg   struct gomp_thread *thr = gomp_thread ();
607181254a7Smrg 
608181254a7Smrg   thr->ts.static_trip = 0;
609181254a7Smrg   if (reductions)
610181254a7Smrg     gomp_workshare_taskgroup_start ();
611181254a7Smrg   if (gomp_work_share_start (0))
612181254a7Smrg     {
613181254a7Smrg       size_t extra = 0;
614181254a7Smrg       if (mem)
615181254a7Smrg 	extra = (uintptr_t) *mem;
616181254a7Smrg       sched = gomp_adjust_sched (sched, &chunk_size);
617181254a7Smrg       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
618181254a7Smrg 			  sched, chunk_size);
619181254a7Smrg       gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
620181254a7Smrg       if (reductions)
621181254a7Smrg 	{
622181254a7Smrg 	  GOMP_taskgroup_reduction_register (reductions);
623181254a7Smrg 	  thr->task->taskgroup->workshare = true;
624181254a7Smrg 	  thr->ts.work_share->task_reductions = reductions;
625181254a7Smrg 	}
626181254a7Smrg       gomp_work_share_init_done ();
627181254a7Smrg     }
628181254a7Smrg   else
629181254a7Smrg     {
630181254a7Smrg       if (reductions)
631181254a7Smrg 	{
632181254a7Smrg 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
633181254a7Smrg 	  gomp_workshare_task_reduction_register (reductions,
634181254a7Smrg 						  first_reductions);
635181254a7Smrg 	}
636181254a7Smrg       sched = thr->ts.work_share->sched;
637181254a7Smrg     }
638181254a7Smrg 
639181254a7Smrg   if (mem)
640181254a7Smrg     *mem = thr->ts.work_share->doacross->extra;
641181254a7Smrg 
642181254a7Smrg   return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
643181254a7Smrg }
644181254a7Smrg 
6454fee23f9Smrg /* The *_next routines are called when the thread completes processing of
6464fee23f9Smrg    the iteration block currently assigned to it.  If the work-share
6474fee23f9Smrg    construct is bound directly to a parallel construct, then the iteration
6484fee23f9Smrg    bounds may have been set up before the parallel.  In which case, this
6494fee23f9Smrg    may be the first iteration for the thread.
6504fee23f9Smrg 
6514fee23f9Smrg    Returns true if there is work remaining to be performed; *ISTART and
6524fee23f9Smrg    *IEND are filled with a new iteration block.  Returns false if all work
6534fee23f9Smrg    has been assigned.  */
6544fee23f9Smrg 
6554fee23f9Smrg static bool
gomp_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)6564fee23f9Smrg gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
6574fee23f9Smrg {
6584fee23f9Smrg   return !gomp_iter_ull_static_next (istart, iend);
6594fee23f9Smrg }
6604fee23f9Smrg 
6614fee23f9Smrg static bool
gomp_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)6624fee23f9Smrg gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
6634fee23f9Smrg {
6644fee23f9Smrg   bool ret;
6654fee23f9Smrg 
6664fee23f9Smrg #if defined HAVE_SYNC_BUILTINS && defined __LP64__
6674fee23f9Smrg   ret = gomp_iter_ull_dynamic_next (istart, iend);
6684fee23f9Smrg #else
6694fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
6704fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
6714fee23f9Smrg   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
6724fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
6734fee23f9Smrg #endif
6744fee23f9Smrg 
6754fee23f9Smrg   return ret;
6764fee23f9Smrg }
6774fee23f9Smrg 
6784fee23f9Smrg static bool
gomp_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)6794fee23f9Smrg gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
6804fee23f9Smrg {
6814fee23f9Smrg   bool ret;
6824fee23f9Smrg 
6834fee23f9Smrg #if defined HAVE_SYNC_BUILTINS && defined __LP64__
6844fee23f9Smrg   ret = gomp_iter_ull_guided_next (istart, iend);
6854fee23f9Smrg #else
6864fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
6874fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
6884fee23f9Smrg   ret = gomp_iter_ull_guided_next_locked (istart, iend);
6894fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
6904fee23f9Smrg #endif
6914fee23f9Smrg 
6924fee23f9Smrg   return ret;
6934fee23f9Smrg }
6944fee23f9Smrg 
6954fee23f9Smrg bool
GOMP_loop_ull_runtime_next(gomp_ull * istart,gomp_ull * iend)6964fee23f9Smrg GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
6974fee23f9Smrg {
6984fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
6994fee23f9Smrg 
7004fee23f9Smrg   switch (thr->ts.work_share->sched)
7014fee23f9Smrg     {
7024fee23f9Smrg     case GFS_STATIC:
7034fee23f9Smrg     case GFS_AUTO:
7044fee23f9Smrg       return gomp_loop_ull_static_next (istart, iend);
7054fee23f9Smrg     case GFS_DYNAMIC:
7064fee23f9Smrg       return gomp_loop_ull_dynamic_next (istart, iend);
7074fee23f9Smrg     case GFS_GUIDED:
7084fee23f9Smrg       return gomp_loop_ull_guided_next (istart, iend);
7094fee23f9Smrg     default:
7104fee23f9Smrg       abort ();
7114fee23f9Smrg     }
7124fee23f9Smrg }
7134fee23f9Smrg 
7144fee23f9Smrg /* The *_ordered_*_next routines are called when the thread completes
7154fee23f9Smrg    processing of the iteration block currently assigned to it.
7164fee23f9Smrg 
7174fee23f9Smrg    Returns true if there is work remaining to be performed; *ISTART and
7184fee23f9Smrg    *IEND are filled with a new iteration block.  Returns false if all work
7194fee23f9Smrg    has been assigned.  */
7204fee23f9Smrg 
7214fee23f9Smrg static bool
gomp_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)7224fee23f9Smrg gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
7234fee23f9Smrg {
7244fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
7254fee23f9Smrg   int test;
7264fee23f9Smrg 
7274fee23f9Smrg   gomp_ordered_sync ();
7284fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
7294fee23f9Smrg   test = gomp_iter_ull_static_next (istart, iend);
7304fee23f9Smrg   if (test >= 0)
7314fee23f9Smrg     gomp_ordered_static_next ();
7324fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
7334fee23f9Smrg 
7344fee23f9Smrg   return test == 0;
7354fee23f9Smrg }
7364fee23f9Smrg 
7374fee23f9Smrg static bool
gomp_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)7384fee23f9Smrg gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
7394fee23f9Smrg {
7404fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
7414fee23f9Smrg   bool ret;
7424fee23f9Smrg 
7434fee23f9Smrg   gomp_ordered_sync ();
7444fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
7454fee23f9Smrg   ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
7464fee23f9Smrg   if (ret)
7474fee23f9Smrg     gomp_ordered_next ();
7484fee23f9Smrg   else
7494fee23f9Smrg     gomp_ordered_last ();
7504fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
7514fee23f9Smrg 
7524fee23f9Smrg   return ret;
7534fee23f9Smrg }
7544fee23f9Smrg 
7554fee23f9Smrg static bool
gomp_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)7564fee23f9Smrg gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
7574fee23f9Smrg {
7584fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
7594fee23f9Smrg   bool ret;
7604fee23f9Smrg 
7614fee23f9Smrg   gomp_ordered_sync ();
7624fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
7634fee23f9Smrg   ret = gomp_iter_ull_guided_next_locked (istart, iend);
7644fee23f9Smrg   if (ret)
7654fee23f9Smrg     gomp_ordered_next ();
7664fee23f9Smrg   else
7674fee23f9Smrg     gomp_ordered_last ();
7684fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
7694fee23f9Smrg 
7704fee23f9Smrg   return ret;
7714fee23f9Smrg }
7724fee23f9Smrg 
7734fee23f9Smrg bool
GOMP_loop_ull_ordered_runtime_next(gomp_ull * istart,gomp_ull * iend)7744fee23f9Smrg GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
7754fee23f9Smrg {
7764fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
7774fee23f9Smrg 
7784fee23f9Smrg   switch (thr->ts.work_share->sched)
7794fee23f9Smrg     {
7804fee23f9Smrg     case GFS_STATIC:
7814fee23f9Smrg     case GFS_AUTO:
7824fee23f9Smrg       return gomp_loop_ull_ordered_static_next (istart, iend);
7834fee23f9Smrg     case GFS_DYNAMIC:
7844fee23f9Smrg       return gomp_loop_ull_ordered_dynamic_next (istart, iend);
7854fee23f9Smrg     case GFS_GUIDED:
7864fee23f9Smrg       return gomp_loop_ull_ordered_guided_next (istart, iend);
7874fee23f9Smrg     default:
7884fee23f9Smrg       abort ();
7894fee23f9Smrg     }
7904fee23f9Smrg }
7914fee23f9Smrg 
7924fee23f9Smrg /* We use static functions above so that we're sure that the "runtime"
7934fee23f9Smrg    function can defer to the proper routine without interposition.  We
7944fee23f9Smrg    export the static function with a strong alias when possible, or with
7954fee23f9Smrg    a wrapper function otherwise.  */
7964fee23f9Smrg 
7974fee23f9Smrg #ifdef HAVE_ATTRIBUTE_ALIAS
7984fee23f9Smrg extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
7994fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_static_start")));
8004fee23f9Smrg extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
8014fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
8024fee23f9Smrg extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
8034fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_guided_start")));
804f9a78e0eSmrg extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
805f9a78e0eSmrg 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
806f9a78e0eSmrg extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
807f9a78e0eSmrg 	__attribute__((alias ("gomp_loop_ull_guided_start")));
808181254a7Smrg extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
809181254a7Smrg 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
810181254a7Smrg extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
811181254a7Smrg 	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
8124fee23f9Smrg 
8134fee23f9Smrg extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
8144fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
8154fee23f9Smrg extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
8164fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
8174fee23f9Smrg extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
8184fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
8194fee23f9Smrg 
820f9a78e0eSmrg extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
821f9a78e0eSmrg 	__attribute__((alias ("gomp_loop_ull_doacross_static_start")));
822f9a78e0eSmrg extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
823f9a78e0eSmrg 	__attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
824f9a78e0eSmrg extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
825f9a78e0eSmrg 	__attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
826f9a78e0eSmrg 
8274fee23f9Smrg extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
8284fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_static_next")));
8294fee23f9Smrg extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
8304fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
8314fee23f9Smrg extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
8324fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_guided_next")));
833f9a78e0eSmrg extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
834f9a78e0eSmrg 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
835f9a78e0eSmrg extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
836f9a78e0eSmrg 	__attribute__((alias ("gomp_loop_ull_guided_next")));
837181254a7Smrg extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
838181254a7Smrg 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
839181254a7Smrg extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
840181254a7Smrg 	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
8414fee23f9Smrg 
8424fee23f9Smrg extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
8434fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
8444fee23f9Smrg extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
8454fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
8464fee23f9Smrg extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
8474fee23f9Smrg 	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
8484fee23f9Smrg #else
8494fee23f9Smrg bool
GOMP_loop_ull_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)8504fee23f9Smrg GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
8514fee23f9Smrg 			    gomp_ull incr, gomp_ull chunk_size,
8524fee23f9Smrg 			    gomp_ull *istart, gomp_ull *iend)
8534fee23f9Smrg {
8544fee23f9Smrg   return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
8554fee23f9Smrg 				     iend);
8564fee23f9Smrg }
8574fee23f9Smrg 
8584fee23f9Smrg bool
GOMP_loop_ull_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)8594fee23f9Smrg GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
8604fee23f9Smrg 			     gomp_ull incr, gomp_ull chunk_size,
8614fee23f9Smrg 			     gomp_ull *istart, gomp_ull *iend)
8624fee23f9Smrg {
8634fee23f9Smrg   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
8644fee23f9Smrg 				      iend);
8654fee23f9Smrg }
8664fee23f9Smrg 
8674fee23f9Smrg bool
GOMP_loop_ull_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)8684fee23f9Smrg GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
8694fee23f9Smrg 			    gomp_ull incr, gomp_ull chunk_size,
8704fee23f9Smrg 			    gomp_ull *istart, gomp_ull *iend)
8714fee23f9Smrg {
8724fee23f9Smrg   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
8734fee23f9Smrg 				     iend);
8744fee23f9Smrg }
8754fee23f9Smrg 
8764fee23f9Smrg bool
GOMP_loop_ull_nonmonotonic_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)877f9a78e0eSmrg GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
878f9a78e0eSmrg 					  gomp_ull end, gomp_ull incr,
879f9a78e0eSmrg 					  gomp_ull chunk_size,
880f9a78e0eSmrg 					  gomp_ull *istart, gomp_ull *iend)
881f9a78e0eSmrg {
882f9a78e0eSmrg   return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
883f9a78e0eSmrg 				      iend);
884f9a78e0eSmrg }
885f9a78e0eSmrg 
886f9a78e0eSmrg bool
GOMP_loop_ull_nonmonotonic_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)887f9a78e0eSmrg GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
888f9a78e0eSmrg 					 gomp_ull incr, gomp_ull chunk_size,
889f9a78e0eSmrg 					 gomp_ull *istart, gomp_ull *iend)
890f9a78e0eSmrg {
891f9a78e0eSmrg   return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
892f9a78e0eSmrg 				     iend);
893f9a78e0eSmrg }
894f9a78e0eSmrg 
895f9a78e0eSmrg bool
GOMP_loop_ull_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)896181254a7Smrg GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
897181254a7Smrg 					  gomp_ull end, gomp_ull incr,
898181254a7Smrg 					  gomp_ull *istart, gomp_ull *iend)
899181254a7Smrg {
900181254a7Smrg   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
901181254a7Smrg }
902181254a7Smrg 
903181254a7Smrg bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull * istart,gomp_ull * iend)904181254a7Smrg GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
905181254a7Smrg 						gomp_ull end, gomp_ull incr,
906181254a7Smrg 						gomp_ull *istart,
907181254a7Smrg 						gomp_ull *iend)
908181254a7Smrg {
909181254a7Smrg   return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
910181254a7Smrg }
911181254a7Smrg 
912181254a7Smrg bool
GOMP_loop_ull_ordered_static_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)9134fee23f9Smrg GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
9144fee23f9Smrg 				    gomp_ull incr, gomp_ull chunk_size,
9154fee23f9Smrg 				    gomp_ull *istart, gomp_ull *iend)
9164fee23f9Smrg {
9174fee23f9Smrg   return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
9184fee23f9Smrg 					     istart, iend);
9194fee23f9Smrg }
9204fee23f9Smrg 
9214fee23f9Smrg bool
GOMP_loop_ull_ordered_dynamic_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)9224fee23f9Smrg GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
9234fee23f9Smrg 				     gomp_ull incr, gomp_ull chunk_size,
9244fee23f9Smrg 				     gomp_ull *istart, gomp_ull *iend)
9254fee23f9Smrg {
9264fee23f9Smrg   return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
9274fee23f9Smrg 					      istart, iend);
9284fee23f9Smrg }
9294fee23f9Smrg 
9304fee23f9Smrg bool
GOMP_loop_ull_ordered_guided_start(bool up,gomp_ull start,gomp_ull end,gomp_ull incr,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)9314fee23f9Smrg GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
9324fee23f9Smrg 				    gomp_ull incr, gomp_ull chunk_size,
9334fee23f9Smrg 				    gomp_ull *istart, gomp_ull *iend)
9344fee23f9Smrg {
9354fee23f9Smrg   return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
9364fee23f9Smrg 					     istart, iend);
9374fee23f9Smrg }
9384fee23f9Smrg 
9394fee23f9Smrg bool
GOMP_loop_ull_doacross_static_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)940f9a78e0eSmrg GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
941f9a78e0eSmrg 				     gomp_ull chunk_size, gomp_ull *istart,
942f9a78e0eSmrg 				     gomp_ull *iend)
943f9a78e0eSmrg {
944f9a78e0eSmrg   return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
945f9a78e0eSmrg 					      istart, iend);
946f9a78e0eSmrg }
947f9a78e0eSmrg 
948f9a78e0eSmrg bool
GOMP_loop_ull_doacross_dynamic_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)949f9a78e0eSmrg GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
950f9a78e0eSmrg 				      gomp_ull chunk_size, gomp_ull *istart,
951f9a78e0eSmrg 				      gomp_ull *iend)
952f9a78e0eSmrg {
953f9a78e0eSmrg   return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
954f9a78e0eSmrg 					       istart, iend);
955f9a78e0eSmrg }
956f9a78e0eSmrg 
957f9a78e0eSmrg bool
GOMP_loop_ull_doacross_guided_start(unsigned ncounts,gomp_ull * counts,gomp_ull chunk_size,gomp_ull * istart,gomp_ull * iend)958f9a78e0eSmrg GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
959f9a78e0eSmrg 				     gomp_ull chunk_size, gomp_ull *istart,
960f9a78e0eSmrg 				     gomp_ull *iend)
961f9a78e0eSmrg {
962f9a78e0eSmrg   return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
963f9a78e0eSmrg 					      istart, iend);
964f9a78e0eSmrg }
965f9a78e0eSmrg 
966f9a78e0eSmrg bool
GOMP_loop_ull_static_next(gomp_ull * istart,gomp_ull * iend)9674fee23f9Smrg GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
9684fee23f9Smrg {
9694fee23f9Smrg   return gomp_loop_ull_static_next (istart, iend);
9704fee23f9Smrg }
9714fee23f9Smrg 
9724fee23f9Smrg bool
GOMP_loop_ull_dynamic_next(gomp_ull * istart,gomp_ull * iend)9734fee23f9Smrg GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
9744fee23f9Smrg {
9754fee23f9Smrg   return gomp_loop_ull_dynamic_next (istart, iend);
9764fee23f9Smrg }
9774fee23f9Smrg 
9784fee23f9Smrg bool
GOMP_loop_ull_guided_next(gomp_ull * istart,gomp_ull * iend)9794fee23f9Smrg GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
9804fee23f9Smrg {
9814fee23f9Smrg   return gomp_loop_ull_guided_next (istart, iend);
9824fee23f9Smrg }
9834fee23f9Smrg 
9844fee23f9Smrg bool
GOMP_loop_ull_nonmonotonic_dynamic_next(gomp_ull * istart,gomp_ull * iend)985f9a78e0eSmrg GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
986f9a78e0eSmrg {
987f9a78e0eSmrg   return gomp_loop_ull_dynamic_next (istart, iend);
988f9a78e0eSmrg }
989f9a78e0eSmrg 
990f9a78e0eSmrg bool
GOMP_loop_ull_nonmonotonic_guided_next(gomp_ull * istart,gomp_ull * iend)991f9a78e0eSmrg GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
992f9a78e0eSmrg {
993f9a78e0eSmrg   return gomp_loop_ull_guided_next (istart, iend);
994f9a78e0eSmrg }
995f9a78e0eSmrg 
996f9a78e0eSmrg bool
GOMP_loop_ull_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)997181254a7Smrg GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
998181254a7Smrg {
999181254a7Smrg   return GOMP_loop_ull_runtime_next (istart, iend);
1000181254a7Smrg }
1001181254a7Smrg 
1002181254a7Smrg bool
GOMP_loop_ull_maybe_nonmonotonic_runtime_next(gomp_ull * istart,gomp_ull * iend)1003181254a7Smrg GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
1004181254a7Smrg 					       gomp_ull *iend)
1005181254a7Smrg {
1006181254a7Smrg   return GOMP_loop_ull_runtime_next (istart, iend);
1007181254a7Smrg }
1008181254a7Smrg 
1009181254a7Smrg bool
GOMP_loop_ull_ordered_static_next(gomp_ull * istart,gomp_ull * iend)10104fee23f9Smrg GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
10114fee23f9Smrg {
10124fee23f9Smrg   return gomp_loop_ull_ordered_static_next (istart, iend);
10134fee23f9Smrg }
10144fee23f9Smrg 
10154fee23f9Smrg bool
GOMP_loop_ull_ordered_dynamic_next(gomp_ull * istart,gomp_ull * iend)10164fee23f9Smrg GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
10174fee23f9Smrg {
10184fee23f9Smrg   return gomp_loop_ull_ordered_dynamic_next (istart, iend);
10194fee23f9Smrg }
10204fee23f9Smrg 
10214fee23f9Smrg bool
GOMP_loop_ull_ordered_guided_next(gomp_ull * istart,gomp_ull * iend)10224fee23f9Smrg GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
10234fee23f9Smrg {
10244fee23f9Smrg   return gomp_loop_ull_ordered_guided_next (istart, iend);
10254fee23f9Smrg }
10264fee23f9Smrg #endif
1027