xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/parallel.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
24fee23f9Smrg    Contributed by Richard Henderson <rth@redhat.com>.
34fee23f9Smrg 
44d5abbe8Smrg    This file is part of the GNU Offloading and Multi Processing Library
54d5abbe8Smrg    (libgomp).
64fee23f9Smrg 
74fee23f9Smrg    Libgomp is free software; you can redistribute it and/or modify it
84fee23f9Smrg    under the terms of the GNU General Public License as published by
94fee23f9Smrg    the Free Software Foundation; either version 3, or (at your option)
104fee23f9Smrg    any later version.
114fee23f9Smrg 
124fee23f9Smrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
134fee23f9Smrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
144fee23f9Smrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
154fee23f9Smrg    more details.
164fee23f9Smrg 
174fee23f9Smrg    Under Section 7 of GPL version 3, you are granted additional
184fee23f9Smrg    permissions described in the GCC Runtime Library Exception, version
194fee23f9Smrg    3.1, as published by the Free Software Foundation.
204fee23f9Smrg 
214fee23f9Smrg    You should have received a copy of the GNU General Public License and
224fee23f9Smrg    a copy of the GCC Runtime Library Exception along with this program;
234fee23f9Smrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
244fee23f9Smrg    <http://www.gnu.org/licenses/>.  */
254fee23f9Smrg 
264fee23f9Smrg /* This file handles the (bare) PARALLEL construct.  */
274fee23f9Smrg 
284fee23f9Smrg #include "libgomp.h"
294fee23f9Smrg #include <limits.h>
304fee23f9Smrg 
314fee23f9Smrg 
324fee23f9Smrg /* Determine the number of threads to be launched for a PARALLEL construct.
334fee23f9Smrg    This algorithm is explicitly described in OpenMP 3.0 section 2.4.1.
344fee23f9Smrg    SPECIFIED is a combination of the NUM_THREADS clause and the IF clause.
354fee23f9Smrg    If the IF clause is false, SPECIFIED is forced to 1.  When NUM_THREADS
364fee23f9Smrg    is not present, SPECIFIED is 0.  */
374fee23f9Smrg 
384fee23f9Smrg unsigned
gomp_resolve_num_threads(unsigned specified,unsigned count)394fee23f9Smrg gomp_resolve_num_threads (unsigned specified, unsigned count)
404fee23f9Smrg {
414d5abbe8Smrg   struct gomp_thread *thr = gomp_thread ();
424fee23f9Smrg   struct gomp_task_icv *icv;
434fee23f9Smrg   unsigned threads_requested, max_num_threads, num_threads;
444d5abbe8Smrg   unsigned long busy;
454d5abbe8Smrg   struct gomp_thread_pool *pool;
464fee23f9Smrg 
474fee23f9Smrg   icv = gomp_icv (false);
484fee23f9Smrg 
494fee23f9Smrg   if (specified == 1)
504fee23f9Smrg     return 1;
51*b1e83836Smrg 
52*b1e83836Smrg   if (thr->ts.active_level >= 1
53*b1e83836Smrg   /* Accelerators with fixed thread counts require this to return 1 for
54*b1e83836Smrg      nested parallel regions.  */
55*b1e83836Smrg #if !defined(__AMDGCN__) && !defined(__nvptx__)
56*b1e83836Smrg       && icv->max_active_levels_var <= 1
57*b1e83836Smrg #endif
58*b1e83836Smrg       )
594fee23f9Smrg     return 1;
60*b1e83836Smrg   else if (thr->ts.active_level >= icv->max_active_levels_var)
614fee23f9Smrg     return 1;
624fee23f9Smrg 
634fee23f9Smrg   /* If NUM_THREADS not specified, use nthreads_var.  */
644fee23f9Smrg   if (specified == 0)
654fee23f9Smrg     threads_requested = icv->nthreads_var;
664fee23f9Smrg   else
674fee23f9Smrg     threads_requested = specified;
684fee23f9Smrg 
694fee23f9Smrg   max_num_threads = threads_requested;
704fee23f9Smrg 
714fee23f9Smrg   /* If dynamic threads are enabled, bound the number of threads
724fee23f9Smrg      that we launch.  */
734fee23f9Smrg   if (icv->dyn_var)
744fee23f9Smrg     {
754fee23f9Smrg       unsigned dyn = gomp_dynamic_max_threads ();
764fee23f9Smrg       if (dyn < max_num_threads)
774fee23f9Smrg 	max_num_threads = dyn;
784fee23f9Smrg 
794fee23f9Smrg       /* Optimization for parallel sections.  */
804fee23f9Smrg       if (count && count < max_num_threads)
814fee23f9Smrg 	max_num_threads = count;
824fee23f9Smrg     }
834fee23f9Smrg 
844d5abbe8Smrg   /* UINT_MAX stands for infinity.  */
854d5abbe8Smrg   if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1)
864fee23f9Smrg       || max_num_threads == 1)
874fee23f9Smrg     return max_num_threads;
884fee23f9Smrg 
894d5abbe8Smrg   /* The threads_busy counter lives in thread_pool, if there
904d5abbe8Smrg      isn't a thread_pool yet, there must be just one thread
914d5abbe8Smrg      in the contention group.  If thr->team is NULL, this isn't
924d5abbe8Smrg      nested parallel, so there is just one thread in the
934d5abbe8Smrg      contention group as well, no need to handle it atomically.  */
944d5abbe8Smrg   pool = thr->thread_pool;
95f9a78e0eSmrg   if (thr->ts.team == NULL || pool == NULL)
964d5abbe8Smrg     {
974d5abbe8Smrg       num_threads = max_num_threads;
984d5abbe8Smrg       if (num_threads > icv->thread_limit_var)
994d5abbe8Smrg 	num_threads = icv->thread_limit_var;
1004d5abbe8Smrg       if (pool)
1014d5abbe8Smrg 	pool->threads_busy = num_threads;
1024d5abbe8Smrg       return num_threads;
1034d5abbe8Smrg     }
1044d5abbe8Smrg 
1054fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1064fee23f9Smrg   do
1074fee23f9Smrg     {
1084d5abbe8Smrg       busy = pool->threads_busy;
1094fee23f9Smrg       num_threads = max_num_threads;
1104d5abbe8Smrg       if (icv->thread_limit_var - busy + 1 < num_threads)
1114d5abbe8Smrg 	num_threads = icv->thread_limit_var - busy + 1;
1124fee23f9Smrg     }
1134d5abbe8Smrg   while (__sync_val_compare_and_swap (&pool->threads_busy,
1144d5abbe8Smrg 				      busy, busy + num_threads - 1)
1154d5abbe8Smrg 	 != busy);
1164fee23f9Smrg #else
1174d5abbe8Smrg   gomp_mutex_lock (&gomp_managed_threads_lock);
1184fee23f9Smrg   num_threads = max_num_threads;
1194d5abbe8Smrg   busy = pool->threads_busy;
1204d5abbe8Smrg   if (icv->thread_limit_var - busy + 1 < num_threads)
1214d5abbe8Smrg     num_threads = icv->thread_limit_var - busy + 1;
1224d5abbe8Smrg   pool->threads_busy += num_threads - 1;
1234d5abbe8Smrg   gomp_mutex_unlock (&gomp_managed_threads_lock);
1244fee23f9Smrg #endif
1254fee23f9Smrg 
1264fee23f9Smrg   return num_threads;
1274fee23f9Smrg }
1284fee23f9Smrg 
1294fee23f9Smrg void
GOMP_parallel_start(void (* fn)(void *),void * data,unsigned num_threads)1304fee23f9Smrg GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
1314fee23f9Smrg {
1324fee23f9Smrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
133181254a7Smrg   gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
134181254a7Smrg 		   NULL);
1354fee23f9Smrg }
1364fee23f9Smrg 
1374fee23f9Smrg void
GOMP_parallel_end(void)1384fee23f9Smrg GOMP_parallel_end (void)
1394fee23f9Smrg {
1404d5abbe8Smrg   struct gomp_task_icv *icv = gomp_icv (false);
1414d5abbe8Smrg   if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0))
1424fee23f9Smrg     {
1434fee23f9Smrg       struct gomp_thread *thr = gomp_thread ();
1444fee23f9Smrg       struct gomp_team *team = thr->ts.team;
14548fb7bfaSmrg       unsigned int nthreads = team ? team->nthreads : 1;
14648fb7bfaSmrg       gomp_team_end ();
14748fb7bfaSmrg       if (nthreads > 1)
1484fee23f9Smrg 	{
1494d5abbe8Smrg 	  /* If not nested, there is just one thread in the
1504d5abbe8Smrg 	     contention group left, no need for atomicity.  */
1514d5abbe8Smrg 	  if (thr->ts.team == NULL)
1524d5abbe8Smrg 	    thr->thread_pool->threads_busy = 1;
1534d5abbe8Smrg 	  else
1544d5abbe8Smrg 	    {
1554fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1564d5abbe8Smrg 	      __sync_fetch_and_add (&thr->thread_pool->threads_busy,
1574d5abbe8Smrg 				    1UL - nthreads);
1584fee23f9Smrg #else
1594d5abbe8Smrg 	      gomp_mutex_lock (&gomp_managed_threads_lock);
1604d5abbe8Smrg 	      thr->thread_pool->threads_busy -= nthreads - 1;
1614d5abbe8Smrg 	      gomp_mutex_unlock (&gomp_managed_threads_lock);
1624fee23f9Smrg #endif
1634fee23f9Smrg 	    }
1644fee23f9Smrg 	}
1654d5abbe8Smrg     }
16648fb7bfaSmrg   else
1674fee23f9Smrg     gomp_team_end ();
1684fee23f9Smrg }
ialias(GOMP_parallel_end)1694d5abbe8Smrg ialias (GOMP_parallel_end)
1704fee23f9Smrg 
1714d5abbe8Smrg void
172181254a7Smrg GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
173181254a7Smrg 	       unsigned int flags)
1744d5abbe8Smrg {
1754d5abbe8Smrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
176181254a7Smrg   gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
177181254a7Smrg 		   NULL);
1784d5abbe8Smrg   fn (data);
1794d5abbe8Smrg   ialias_call (GOMP_parallel_end) ();
1804d5abbe8Smrg }
1814d5abbe8Smrg 
182181254a7Smrg unsigned
GOMP_parallel_reductions(void (* fn)(void *),void * data,unsigned num_threads,unsigned int flags)183181254a7Smrg GOMP_parallel_reductions (void (*fn) (void *), void *data,
184181254a7Smrg 			  unsigned num_threads, unsigned int flags)
185181254a7Smrg {
186181254a7Smrg   struct gomp_taskgroup *taskgroup;
187181254a7Smrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
188181254a7Smrg   uintptr_t *rdata = *(uintptr_t **)data;
189181254a7Smrg   taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
190181254a7Smrg   gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
191181254a7Smrg 		   taskgroup);
192181254a7Smrg   fn (data);
193181254a7Smrg   ialias_call (GOMP_parallel_end) ();
194181254a7Smrg   gomp_sem_destroy (&taskgroup->taskgroup_sem);
195181254a7Smrg   free (taskgroup);
196181254a7Smrg   return num_threads;
197181254a7Smrg }
198181254a7Smrg 
1994d5abbe8Smrg bool
GOMP_cancellation_point(int which)2004d5abbe8Smrg GOMP_cancellation_point (int which)
2014d5abbe8Smrg {
2024d5abbe8Smrg   if (!gomp_cancel_var)
2034d5abbe8Smrg     return false;
2044d5abbe8Smrg 
2054d5abbe8Smrg   struct gomp_thread *thr = gomp_thread ();
2064d5abbe8Smrg   struct gomp_team *team = thr->ts.team;
2074d5abbe8Smrg   if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
2084d5abbe8Smrg     {
2094d5abbe8Smrg       if (team == NULL)
2104d5abbe8Smrg 	return false;
2114d5abbe8Smrg       return team->work_share_cancelled != 0;
2124d5abbe8Smrg     }
2134d5abbe8Smrg   else if (which & GOMP_CANCEL_TASKGROUP)
2144d5abbe8Smrg     {
215181254a7Smrg       if (thr->task->taskgroup)
216181254a7Smrg 	{
217181254a7Smrg 	  if (thr->task->taskgroup->cancelled)
2184d5abbe8Smrg 	    return true;
219181254a7Smrg 	  if (thr->task->taskgroup->workshare
220181254a7Smrg 	      && thr->task->taskgroup->prev
221181254a7Smrg 	      && thr->task->taskgroup->prev->cancelled)
222181254a7Smrg 	    return true;
223181254a7Smrg 	}
2244d5abbe8Smrg       /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
2254d5abbe8Smrg 	 as #pragma omp cancel parallel also cancels all explicit
2264d5abbe8Smrg 	 tasks.  */
2274d5abbe8Smrg     }
2284d5abbe8Smrg   if (team)
2294d5abbe8Smrg     return gomp_team_barrier_cancelled (&team->barrier);
2304d5abbe8Smrg   return false;
2314d5abbe8Smrg }
ialias(GOMP_cancellation_point)2324d5abbe8Smrg ialias (GOMP_cancellation_point)
2334d5abbe8Smrg 
2344d5abbe8Smrg bool
2354d5abbe8Smrg GOMP_cancel (int which, bool do_cancel)
2364d5abbe8Smrg {
2374d5abbe8Smrg   if (!gomp_cancel_var)
2384d5abbe8Smrg     return false;
2394d5abbe8Smrg 
2404d5abbe8Smrg   if (!do_cancel)
2414d5abbe8Smrg     return ialias_call (GOMP_cancellation_point) (which);
2424d5abbe8Smrg 
2434d5abbe8Smrg   struct gomp_thread *thr = gomp_thread ();
2444d5abbe8Smrg   struct gomp_team *team = thr->ts.team;
2454d5abbe8Smrg   if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
2464d5abbe8Smrg     {
2474d5abbe8Smrg       /* In orphaned worksharing region, all we want to cancel
2484d5abbe8Smrg 	 is current thread.  */
2494d5abbe8Smrg       if (team != NULL)
2504d5abbe8Smrg 	team->work_share_cancelled = 1;
2514d5abbe8Smrg       return true;
2524d5abbe8Smrg     }
2534d5abbe8Smrg   else if (which & GOMP_CANCEL_TASKGROUP)
2544d5abbe8Smrg     {
255181254a7Smrg       if (thr->task->taskgroup)
256181254a7Smrg 	{
257181254a7Smrg 	  struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
258181254a7Smrg 	  if (taskgroup->workshare && taskgroup->prev)
259181254a7Smrg 	    taskgroup = taskgroup->prev;
260181254a7Smrg 	  if (!taskgroup->cancelled)
2614d5abbe8Smrg 	    {
2624d5abbe8Smrg 	      gomp_mutex_lock (&team->task_lock);
263181254a7Smrg 	      taskgroup->cancelled = true;
2644d5abbe8Smrg 	      gomp_mutex_unlock (&team->task_lock);
2654d5abbe8Smrg 	    }
266181254a7Smrg 	}
2674d5abbe8Smrg       return true;
2684d5abbe8Smrg     }
2694d5abbe8Smrg   team->team_cancelled = 1;
2704d5abbe8Smrg   gomp_team_barrier_cancel (team);
2714d5abbe8Smrg   return true;
2724d5abbe8Smrg }
2734fee23f9Smrg 
2744fee23f9Smrg /* The public OpenMP API for thread and team related inquiries.  */
2754fee23f9Smrg 
2764fee23f9Smrg int
omp_get_num_threads(void)2774fee23f9Smrg omp_get_num_threads (void)
2784fee23f9Smrg {
2794fee23f9Smrg   struct gomp_team *team = gomp_thread ()->ts.team;
2804fee23f9Smrg   return team ? team->nthreads : 1;
2814fee23f9Smrg }
2824fee23f9Smrg 
2834fee23f9Smrg int
omp_get_thread_num(void)2844fee23f9Smrg omp_get_thread_num (void)
2854fee23f9Smrg {
2864fee23f9Smrg   return gomp_thread ()->ts.team_id;
2874fee23f9Smrg }
2884fee23f9Smrg 
2894fee23f9Smrg /* This wasn't right for OpenMP 2.5.  Active region used to be non-zero
2904fee23f9Smrg    when the IF clause doesn't evaluate to false, starting with OpenMP 3.0
2914fee23f9Smrg    it is non-zero with more than one thread in the team.  */
2924fee23f9Smrg 
2934fee23f9Smrg int
omp_in_parallel(void)2944fee23f9Smrg omp_in_parallel (void)
2954fee23f9Smrg {
2964fee23f9Smrg   return gomp_thread ()->ts.active_level > 0;
2974fee23f9Smrg }
2984fee23f9Smrg 
2994fee23f9Smrg int
omp_get_level(void)3004fee23f9Smrg omp_get_level (void)
3014fee23f9Smrg {
3024fee23f9Smrg   return gomp_thread ()->ts.level;
3034fee23f9Smrg }
3044fee23f9Smrg 
3054fee23f9Smrg int
omp_get_ancestor_thread_num(int level)3064fee23f9Smrg omp_get_ancestor_thread_num (int level)
3074fee23f9Smrg {
3084fee23f9Smrg   struct gomp_team_state *ts = &gomp_thread ()->ts;
3094fee23f9Smrg   if (level < 0 || level > ts->level)
3104fee23f9Smrg     return -1;
3114fee23f9Smrg   for (level = ts->level - level; level > 0; --level)
3124fee23f9Smrg     ts = &ts->team->prev_ts;
3134fee23f9Smrg   return ts->team_id;
3144fee23f9Smrg }
3154fee23f9Smrg 
3164fee23f9Smrg int
omp_get_team_size(int level)3174fee23f9Smrg omp_get_team_size (int level)
3184fee23f9Smrg {
3194fee23f9Smrg   struct gomp_team_state *ts = &gomp_thread ()->ts;
3204fee23f9Smrg   if (level < 0 || level > ts->level)
3214fee23f9Smrg     return -1;
3224fee23f9Smrg   for (level = ts->level - level; level > 0; --level)
3234fee23f9Smrg     ts = &ts->team->prev_ts;
3244fee23f9Smrg   if (ts->team == NULL)
3254fee23f9Smrg     return 1;
3264fee23f9Smrg   else
3274fee23f9Smrg     return ts->team->nthreads;
3284fee23f9Smrg }
3294fee23f9Smrg 
3304fee23f9Smrg int
omp_get_active_level(void)3314fee23f9Smrg omp_get_active_level (void)
3324fee23f9Smrg {
3334fee23f9Smrg   return gomp_thread ()->ts.active_level;
3344fee23f9Smrg }
3354fee23f9Smrg 
3364fee23f9Smrg ialias (omp_get_num_threads)
3374fee23f9Smrg ialias (omp_get_thread_num)
3384fee23f9Smrg ialias (omp_in_parallel)
3394fee23f9Smrg ialias (omp_get_level)
3404fee23f9Smrg ialias (omp_get_ancestor_thread_num)
3414fee23f9Smrg ialias (omp_get_team_size)
3424fee23f9Smrg ialias (omp_get_active_level)
343