xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/parallel.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1*8feb0f0bSmrg /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
21debfc3dSmrg    Contributed by Richard Henderson <rth@redhat.com>.
31debfc3dSmrg 
41debfc3dSmrg    This file is part of the GNU Offloading and Multi Processing Library
51debfc3dSmrg    (libgomp).
61debfc3dSmrg 
71debfc3dSmrg    Libgomp is free software; you can redistribute it and/or modify it
81debfc3dSmrg    under the terms of the GNU General Public License as published by
91debfc3dSmrg    the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg    any later version.
111debfc3dSmrg 
121debfc3dSmrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
141debfc3dSmrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
151debfc3dSmrg    more details.
161debfc3dSmrg 
171debfc3dSmrg    Under Section 7 of GPL version 3, you are granted additional
181debfc3dSmrg    permissions described in the GCC Runtime Library Exception, version
191debfc3dSmrg    3.1, as published by the Free Software Foundation.
201debfc3dSmrg 
211debfc3dSmrg    You should have received a copy of the GNU General Public License and
221debfc3dSmrg    a copy of the GCC Runtime Library Exception along with this program;
231debfc3dSmrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
241debfc3dSmrg    <http://www.gnu.org/licenses/>.  */
251debfc3dSmrg 
261debfc3dSmrg /* This file handles the (bare) PARALLEL construct.  */
271debfc3dSmrg 
281debfc3dSmrg #include "libgomp.h"
291debfc3dSmrg #include <limits.h>
301debfc3dSmrg 
311debfc3dSmrg 
321debfc3dSmrg /* Determine the number of threads to be launched for a PARALLEL construct.
331debfc3dSmrg    This algorithm is explicitly described in OpenMP 3.0 section 2.4.1.
341debfc3dSmrg    SPECIFIED is a combination of the NUM_THREADS clause and the IF clause.
351debfc3dSmrg    If the IF clause is false, SPECIFIED is forced to 1.  When NUM_THREADS
361debfc3dSmrg    is not present, SPECIFIED is 0.  */
371debfc3dSmrg 
381debfc3dSmrg unsigned
gomp_resolve_num_threads(unsigned specified,unsigned count)391debfc3dSmrg gomp_resolve_num_threads (unsigned specified, unsigned count)
401debfc3dSmrg {
411debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
421debfc3dSmrg   struct gomp_task_icv *icv;
431debfc3dSmrg   unsigned threads_requested, max_num_threads, num_threads;
441debfc3dSmrg   unsigned long busy;
451debfc3dSmrg   struct gomp_thread_pool *pool;
461debfc3dSmrg 
471debfc3dSmrg   icv = gomp_icv (false);
481debfc3dSmrg 
491debfc3dSmrg   if (specified == 1)
501debfc3dSmrg     return 1;
511debfc3dSmrg   else if (thr->ts.active_level >= 1 && !icv->nest_var)
521debfc3dSmrg     return 1;
531debfc3dSmrg   else if (thr->ts.active_level >= gomp_max_active_levels_var)
541debfc3dSmrg     return 1;
551debfc3dSmrg 
561debfc3dSmrg   /* If NUM_THREADS not specified, use nthreads_var.  */
571debfc3dSmrg   if (specified == 0)
581debfc3dSmrg     threads_requested = icv->nthreads_var;
591debfc3dSmrg   else
601debfc3dSmrg     threads_requested = specified;
611debfc3dSmrg 
621debfc3dSmrg   max_num_threads = threads_requested;
631debfc3dSmrg 
641debfc3dSmrg   /* If dynamic threads are enabled, bound the number of threads
651debfc3dSmrg      that we launch.  */
661debfc3dSmrg   if (icv->dyn_var)
671debfc3dSmrg     {
681debfc3dSmrg       unsigned dyn = gomp_dynamic_max_threads ();
691debfc3dSmrg       if (dyn < max_num_threads)
701debfc3dSmrg 	max_num_threads = dyn;
711debfc3dSmrg 
721debfc3dSmrg       /* Optimization for parallel sections.  */
731debfc3dSmrg       if (count && count < max_num_threads)
741debfc3dSmrg 	max_num_threads = count;
751debfc3dSmrg     }
761debfc3dSmrg 
771debfc3dSmrg   /* UINT_MAX stands for infinity.  */
781debfc3dSmrg   if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1)
791debfc3dSmrg       || max_num_threads == 1)
801debfc3dSmrg     return max_num_threads;
811debfc3dSmrg 
821debfc3dSmrg   /* The threads_busy counter lives in thread_pool, if there
831debfc3dSmrg      isn't a thread_pool yet, there must be just one thread
841debfc3dSmrg      in the contention group.  If thr->team is NULL, this isn't
851debfc3dSmrg      nested parallel, so there is just one thread in the
861debfc3dSmrg      contention group as well, no need to handle it atomically.  */
871debfc3dSmrg   pool = thr->thread_pool;
881debfc3dSmrg   if (thr->ts.team == NULL || pool == NULL)
891debfc3dSmrg     {
901debfc3dSmrg       num_threads = max_num_threads;
911debfc3dSmrg       if (num_threads > icv->thread_limit_var)
921debfc3dSmrg 	num_threads = icv->thread_limit_var;
931debfc3dSmrg       if (pool)
941debfc3dSmrg 	pool->threads_busy = num_threads;
951debfc3dSmrg       return num_threads;
961debfc3dSmrg     }
971debfc3dSmrg 
981debfc3dSmrg #ifdef HAVE_SYNC_BUILTINS
991debfc3dSmrg   do
1001debfc3dSmrg     {
1011debfc3dSmrg       busy = pool->threads_busy;
1021debfc3dSmrg       num_threads = max_num_threads;
1031debfc3dSmrg       if (icv->thread_limit_var - busy + 1 < num_threads)
1041debfc3dSmrg 	num_threads = icv->thread_limit_var - busy + 1;
1051debfc3dSmrg     }
1061debfc3dSmrg   while (__sync_val_compare_and_swap (&pool->threads_busy,
1071debfc3dSmrg 				      busy, busy + num_threads - 1)
1081debfc3dSmrg 	 != busy);
1091debfc3dSmrg #else
1101debfc3dSmrg   gomp_mutex_lock (&gomp_managed_threads_lock);
1111debfc3dSmrg   num_threads = max_num_threads;
1121debfc3dSmrg   busy = pool->threads_busy;
1131debfc3dSmrg   if (icv->thread_limit_var - busy + 1 < num_threads)
1141debfc3dSmrg     num_threads = icv->thread_limit_var - busy + 1;
1151debfc3dSmrg   pool->threads_busy += num_threads - 1;
1161debfc3dSmrg   gomp_mutex_unlock (&gomp_managed_threads_lock);
1171debfc3dSmrg #endif
1181debfc3dSmrg 
1191debfc3dSmrg   return num_threads;
1201debfc3dSmrg }
1211debfc3dSmrg 
1221debfc3dSmrg void
GOMP_parallel_start(void (* fn)(void *),void * data,unsigned num_threads)1231debfc3dSmrg GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
1241debfc3dSmrg {
1251debfc3dSmrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
126c0a68be4Smrg   gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
127c0a68be4Smrg 		   NULL);
1281debfc3dSmrg }
1291debfc3dSmrg 
1301debfc3dSmrg void
GOMP_parallel_end(void)1311debfc3dSmrg GOMP_parallel_end (void)
1321debfc3dSmrg {
1331debfc3dSmrg   struct gomp_task_icv *icv = gomp_icv (false);
1341debfc3dSmrg   if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0))
1351debfc3dSmrg     {
1361debfc3dSmrg       struct gomp_thread *thr = gomp_thread ();
1371debfc3dSmrg       struct gomp_team *team = thr->ts.team;
1381debfc3dSmrg       unsigned int nthreads = team ? team->nthreads : 1;
1391debfc3dSmrg       gomp_team_end ();
1401debfc3dSmrg       if (nthreads > 1)
1411debfc3dSmrg 	{
1421debfc3dSmrg 	  /* If not nested, there is just one thread in the
1431debfc3dSmrg 	     contention group left, no need for atomicity.  */
1441debfc3dSmrg 	  if (thr->ts.team == NULL)
1451debfc3dSmrg 	    thr->thread_pool->threads_busy = 1;
1461debfc3dSmrg 	  else
1471debfc3dSmrg 	    {
1481debfc3dSmrg #ifdef HAVE_SYNC_BUILTINS
1491debfc3dSmrg 	      __sync_fetch_and_add (&thr->thread_pool->threads_busy,
1501debfc3dSmrg 				    1UL - nthreads);
1511debfc3dSmrg #else
1521debfc3dSmrg 	      gomp_mutex_lock (&gomp_managed_threads_lock);
1531debfc3dSmrg 	      thr->thread_pool->threads_busy -= nthreads - 1;
1541debfc3dSmrg 	      gomp_mutex_unlock (&gomp_managed_threads_lock);
1551debfc3dSmrg #endif
1561debfc3dSmrg 	    }
1571debfc3dSmrg 	}
1581debfc3dSmrg     }
1591debfc3dSmrg   else
1601debfc3dSmrg     gomp_team_end ();
1611debfc3dSmrg }
ialias(GOMP_parallel_end)1621debfc3dSmrg ialias (GOMP_parallel_end)
1631debfc3dSmrg 
1641debfc3dSmrg void
165c0a68be4Smrg GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
166c0a68be4Smrg 	       unsigned int flags)
1671debfc3dSmrg {
1681debfc3dSmrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
169c0a68be4Smrg   gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
170c0a68be4Smrg 		   NULL);
1711debfc3dSmrg   fn (data);
1721debfc3dSmrg   ialias_call (GOMP_parallel_end) ();
1731debfc3dSmrg }
1741debfc3dSmrg 
175c0a68be4Smrg unsigned
GOMP_parallel_reductions(void (* fn)(void *),void * data,unsigned num_threads,unsigned int flags)176c0a68be4Smrg GOMP_parallel_reductions (void (*fn) (void *), void *data,
177c0a68be4Smrg 			  unsigned num_threads, unsigned int flags)
178c0a68be4Smrg {
179c0a68be4Smrg   struct gomp_taskgroup *taskgroup;
180c0a68be4Smrg   num_threads = gomp_resolve_num_threads (num_threads, 0);
181c0a68be4Smrg   uintptr_t *rdata = *(uintptr_t **)data;
182c0a68be4Smrg   taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
183c0a68be4Smrg   gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
184c0a68be4Smrg 		   taskgroup);
185c0a68be4Smrg   fn (data);
186c0a68be4Smrg   ialias_call (GOMP_parallel_end) ();
187c0a68be4Smrg   gomp_sem_destroy (&taskgroup->taskgroup_sem);
188c0a68be4Smrg   free (taskgroup);
189c0a68be4Smrg   return num_threads;
190c0a68be4Smrg }
191c0a68be4Smrg 
1921debfc3dSmrg bool
GOMP_cancellation_point(int which)1931debfc3dSmrg GOMP_cancellation_point (int which)
1941debfc3dSmrg {
1951debfc3dSmrg   if (!gomp_cancel_var)
1961debfc3dSmrg     return false;
1971debfc3dSmrg 
1981debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
1991debfc3dSmrg   struct gomp_team *team = thr->ts.team;
2001debfc3dSmrg   if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
2011debfc3dSmrg     {
2021debfc3dSmrg       if (team == NULL)
2031debfc3dSmrg 	return false;
2041debfc3dSmrg       return team->work_share_cancelled != 0;
2051debfc3dSmrg     }
2061debfc3dSmrg   else if (which & GOMP_CANCEL_TASKGROUP)
2071debfc3dSmrg     {
208c0a68be4Smrg       if (thr->task->taskgroup)
209c0a68be4Smrg 	{
210c0a68be4Smrg 	  if (thr->task->taskgroup->cancelled)
2111debfc3dSmrg 	    return true;
212c0a68be4Smrg 	  if (thr->task->taskgroup->workshare
213c0a68be4Smrg 	      && thr->task->taskgroup->prev
214c0a68be4Smrg 	      && thr->task->taskgroup->prev->cancelled)
215c0a68be4Smrg 	    return true;
216c0a68be4Smrg 	}
2171debfc3dSmrg       /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
2181debfc3dSmrg 	 as #pragma omp cancel parallel also cancels all explicit
2191debfc3dSmrg 	 tasks.  */
2201debfc3dSmrg     }
2211debfc3dSmrg   if (team)
2221debfc3dSmrg     return gomp_team_barrier_cancelled (&team->barrier);
2231debfc3dSmrg   return false;
2241debfc3dSmrg }
ialias(GOMP_cancellation_point)2251debfc3dSmrg ialias (GOMP_cancellation_point)
2261debfc3dSmrg 
2271debfc3dSmrg bool
2281debfc3dSmrg GOMP_cancel (int which, bool do_cancel)
2291debfc3dSmrg {
2301debfc3dSmrg   if (!gomp_cancel_var)
2311debfc3dSmrg     return false;
2321debfc3dSmrg 
2331debfc3dSmrg   if (!do_cancel)
2341debfc3dSmrg     return ialias_call (GOMP_cancellation_point) (which);
2351debfc3dSmrg 
2361debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
2371debfc3dSmrg   struct gomp_team *team = thr->ts.team;
2381debfc3dSmrg   if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
2391debfc3dSmrg     {
2401debfc3dSmrg       /* In orphaned worksharing region, all we want to cancel
2411debfc3dSmrg 	 is current thread.  */
2421debfc3dSmrg       if (team != NULL)
2431debfc3dSmrg 	team->work_share_cancelled = 1;
2441debfc3dSmrg       return true;
2451debfc3dSmrg     }
2461debfc3dSmrg   else if (which & GOMP_CANCEL_TASKGROUP)
2471debfc3dSmrg     {
248c0a68be4Smrg       if (thr->task->taskgroup)
249c0a68be4Smrg 	{
250c0a68be4Smrg 	  struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
251c0a68be4Smrg 	  if (taskgroup->workshare && taskgroup->prev)
252c0a68be4Smrg 	    taskgroup = taskgroup->prev;
253c0a68be4Smrg 	  if (!taskgroup->cancelled)
2541debfc3dSmrg 	    {
2551debfc3dSmrg 	      gomp_mutex_lock (&team->task_lock);
256c0a68be4Smrg 	      taskgroup->cancelled = true;
2571debfc3dSmrg 	      gomp_mutex_unlock (&team->task_lock);
2581debfc3dSmrg 	    }
259c0a68be4Smrg 	}
2601debfc3dSmrg       return true;
2611debfc3dSmrg     }
2621debfc3dSmrg   team->team_cancelled = 1;
2631debfc3dSmrg   gomp_team_barrier_cancel (team);
2641debfc3dSmrg   return true;
2651debfc3dSmrg }
2661debfc3dSmrg 
2671debfc3dSmrg /* The public OpenMP API for thread and team related inquiries.  */
2681debfc3dSmrg 
2691debfc3dSmrg int
omp_get_num_threads(void)2701debfc3dSmrg omp_get_num_threads (void)
2711debfc3dSmrg {
2721debfc3dSmrg   struct gomp_team *team = gomp_thread ()->ts.team;
2731debfc3dSmrg   return team ? team->nthreads : 1;
2741debfc3dSmrg }
2751debfc3dSmrg 
2761debfc3dSmrg int
omp_get_thread_num(void)2771debfc3dSmrg omp_get_thread_num (void)
2781debfc3dSmrg {
2791debfc3dSmrg   return gomp_thread ()->ts.team_id;
2801debfc3dSmrg }
2811debfc3dSmrg 
2821debfc3dSmrg /* This wasn't right for OpenMP 2.5.  Active region used to be non-zero
2831debfc3dSmrg    when the IF clause doesn't evaluate to false, starting with OpenMP 3.0
2841debfc3dSmrg    it is non-zero with more than one thread in the team.  */
2851debfc3dSmrg 
2861debfc3dSmrg int
omp_in_parallel(void)2871debfc3dSmrg omp_in_parallel (void)
2881debfc3dSmrg {
2891debfc3dSmrg   return gomp_thread ()->ts.active_level > 0;
2901debfc3dSmrg }
2911debfc3dSmrg 
2921debfc3dSmrg int
omp_get_level(void)2931debfc3dSmrg omp_get_level (void)
2941debfc3dSmrg {
2951debfc3dSmrg   return gomp_thread ()->ts.level;
2961debfc3dSmrg }
2971debfc3dSmrg 
2981debfc3dSmrg int
omp_get_ancestor_thread_num(int level)2991debfc3dSmrg omp_get_ancestor_thread_num (int level)
3001debfc3dSmrg {
3011debfc3dSmrg   struct gomp_team_state *ts = &gomp_thread ()->ts;
3021debfc3dSmrg   if (level < 0 || level > ts->level)
3031debfc3dSmrg     return -1;
3041debfc3dSmrg   for (level = ts->level - level; level > 0; --level)
3051debfc3dSmrg     ts = &ts->team->prev_ts;
3061debfc3dSmrg   return ts->team_id;
3071debfc3dSmrg }
3081debfc3dSmrg 
3091debfc3dSmrg int
omp_get_team_size(int level)3101debfc3dSmrg omp_get_team_size (int level)
3111debfc3dSmrg {
3121debfc3dSmrg   struct gomp_team_state *ts = &gomp_thread ()->ts;
3131debfc3dSmrg   if (level < 0 || level > ts->level)
3141debfc3dSmrg     return -1;
3151debfc3dSmrg   for (level = ts->level - level; level > 0; --level)
3161debfc3dSmrg     ts = &ts->team->prev_ts;
3171debfc3dSmrg   if (ts->team == NULL)
3181debfc3dSmrg     return 1;
3191debfc3dSmrg   else
3201debfc3dSmrg     return ts->team->nthreads;
3211debfc3dSmrg }
3221debfc3dSmrg 
3231debfc3dSmrg int
omp_get_active_level(void)3241debfc3dSmrg omp_get_active_level (void)
3251debfc3dSmrg {
3261debfc3dSmrg   return gomp_thread ()->ts.active_level;
3271debfc3dSmrg }
3281debfc3dSmrg 
3291debfc3dSmrg ialias (omp_get_num_threads)
3301debfc3dSmrg ialias (omp_get_thread_num)
3311debfc3dSmrg ialias (omp_in_parallel)
3321debfc3dSmrg ialias (omp_get_level)
3331debfc3dSmrg ialias (omp_get_ancestor_thread_num)
3341debfc3dSmrg ialias (omp_get_team_size)
3351debfc3dSmrg ialias (omp_get_active_level)
336