1*b1e83836Smrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
24fee23f9Smrg Contributed by Richard Henderson <rth@redhat.com>.
34fee23f9Smrg
44d5abbe8Smrg This file is part of the GNU Offloading and Multi Processing Library
54d5abbe8Smrg (libgomp).
64fee23f9Smrg
74fee23f9Smrg Libgomp is free software; you can redistribute it and/or modify it
84fee23f9Smrg under the terms of the GNU General Public License as published by
94fee23f9Smrg the Free Software Foundation; either version 3, or (at your option)
104fee23f9Smrg any later version.
114fee23f9Smrg
124fee23f9Smrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
134fee23f9Smrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
144fee23f9Smrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
154fee23f9Smrg more details.
164fee23f9Smrg
174fee23f9Smrg Under Section 7 of GPL version 3, you are granted additional
184fee23f9Smrg permissions described in the GCC Runtime Library Exception, version
194fee23f9Smrg 3.1, as published by the Free Software Foundation.
204fee23f9Smrg
214fee23f9Smrg You should have received a copy of the GNU General Public License and
224fee23f9Smrg a copy of the GCC Runtime Library Exception along with this program;
234fee23f9Smrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
244fee23f9Smrg <http://www.gnu.org/licenses/>. */
254fee23f9Smrg
264fee23f9Smrg /* This file handles the (bare) PARALLEL construct. */
274fee23f9Smrg
284fee23f9Smrg #include "libgomp.h"
294fee23f9Smrg #include <limits.h>
304fee23f9Smrg
314fee23f9Smrg
324fee23f9Smrg /* Determine the number of threads to be launched for a PARALLEL construct.
334fee23f9Smrg This algorithm is explicitly described in OpenMP 3.0 section 2.4.1.
344fee23f9Smrg SPECIFIED is a combination of the NUM_THREADS clause and the IF clause.
354fee23f9Smrg If the IF clause is false, SPECIFIED is forced to 1. When NUM_THREADS
364fee23f9Smrg is not present, SPECIFIED is 0. */
374fee23f9Smrg
384fee23f9Smrg unsigned
gomp_resolve_num_threads(unsigned specified,unsigned count)394fee23f9Smrg gomp_resolve_num_threads (unsigned specified, unsigned count)
404fee23f9Smrg {
414d5abbe8Smrg struct gomp_thread *thr = gomp_thread ();
424fee23f9Smrg struct gomp_task_icv *icv;
434fee23f9Smrg unsigned threads_requested, max_num_threads, num_threads;
444d5abbe8Smrg unsigned long busy;
454d5abbe8Smrg struct gomp_thread_pool *pool;
464fee23f9Smrg
474fee23f9Smrg icv = gomp_icv (false);
484fee23f9Smrg
494fee23f9Smrg if (specified == 1)
504fee23f9Smrg return 1;
51*b1e83836Smrg
52*b1e83836Smrg if (thr->ts.active_level >= 1
53*b1e83836Smrg /* Accelerators with fixed thread counts require this to return 1 for
54*b1e83836Smrg nested parallel regions. */
55*b1e83836Smrg #if !defined(__AMDGCN__) && !defined(__nvptx__)
56*b1e83836Smrg && icv->max_active_levels_var <= 1
57*b1e83836Smrg #endif
58*b1e83836Smrg )
594fee23f9Smrg return 1;
60*b1e83836Smrg else if (thr->ts.active_level >= icv->max_active_levels_var)
614fee23f9Smrg return 1;
624fee23f9Smrg
634fee23f9Smrg /* If NUM_THREADS not specified, use nthreads_var. */
644fee23f9Smrg if (specified == 0)
654fee23f9Smrg threads_requested = icv->nthreads_var;
664fee23f9Smrg else
674fee23f9Smrg threads_requested = specified;
684fee23f9Smrg
694fee23f9Smrg max_num_threads = threads_requested;
704fee23f9Smrg
714fee23f9Smrg /* If dynamic threads are enabled, bound the number of threads
724fee23f9Smrg that we launch. */
734fee23f9Smrg if (icv->dyn_var)
744fee23f9Smrg {
754fee23f9Smrg unsigned dyn = gomp_dynamic_max_threads ();
764fee23f9Smrg if (dyn < max_num_threads)
774fee23f9Smrg max_num_threads = dyn;
784fee23f9Smrg
794fee23f9Smrg /* Optimization for parallel sections. */
804fee23f9Smrg if (count && count < max_num_threads)
814fee23f9Smrg max_num_threads = count;
824fee23f9Smrg }
834fee23f9Smrg
844d5abbe8Smrg /* UINT_MAX stands for infinity. */
854d5abbe8Smrg if (__builtin_expect (icv->thread_limit_var == UINT_MAX, 1)
864fee23f9Smrg || max_num_threads == 1)
874fee23f9Smrg return max_num_threads;
884fee23f9Smrg
894d5abbe8Smrg /* The threads_busy counter lives in thread_pool, if there
904d5abbe8Smrg isn't a thread_pool yet, there must be just one thread
914d5abbe8Smrg in the contention group. If thr->team is NULL, this isn't
924d5abbe8Smrg nested parallel, so there is just one thread in the
934d5abbe8Smrg contention group as well, no need to handle it atomically. */
944d5abbe8Smrg pool = thr->thread_pool;
95f9a78e0eSmrg if (thr->ts.team == NULL || pool == NULL)
964d5abbe8Smrg {
974d5abbe8Smrg num_threads = max_num_threads;
984d5abbe8Smrg if (num_threads > icv->thread_limit_var)
994d5abbe8Smrg num_threads = icv->thread_limit_var;
1004d5abbe8Smrg if (pool)
1014d5abbe8Smrg pool->threads_busy = num_threads;
1024d5abbe8Smrg return num_threads;
1034d5abbe8Smrg }
1044d5abbe8Smrg
1054fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1064fee23f9Smrg do
1074fee23f9Smrg {
1084d5abbe8Smrg busy = pool->threads_busy;
1094fee23f9Smrg num_threads = max_num_threads;
1104d5abbe8Smrg if (icv->thread_limit_var - busy + 1 < num_threads)
1114d5abbe8Smrg num_threads = icv->thread_limit_var - busy + 1;
1124fee23f9Smrg }
1134d5abbe8Smrg while (__sync_val_compare_and_swap (&pool->threads_busy,
1144d5abbe8Smrg busy, busy + num_threads - 1)
1154d5abbe8Smrg != busy);
1164fee23f9Smrg #else
1174d5abbe8Smrg gomp_mutex_lock (&gomp_managed_threads_lock);
1184fee23f9Smrg num_threads = max_num_threads;
1194d5abbe8Smrg busy = pool->threads_busy;
1204d5abbe8Smrg if (icv->thread_limit_var - busy + 1 < num_threads)
1214d5abbe8Smrg num_threads = icv->thread_limit_var - busy + 1;
1224d5abbe8Smrg pool->threads_busy += num_threads - 1;
1234d5abbe8Smrg gomp_mutex_unlock (&gomp_managed_threads_lock);
1244fee23f9Smrg #endif
1254fee23f9Smrg
1264fee23f9Smrg return num_threads;
1274fee23f9Smrg }
1284fee23f9Smrg
1294fee23f9Smrg void
GOMP_parallel_start(void (* fn)(void *),void * data,unsigned num_threads)1304fee23f9Smrg GOMP_parallel_start (void (*fn) (void *), void *data, unsigned num_threads)
1314fee23f9Smrg {
1324fee23f9Smrg num_threads = gomp_resolve_num_threads (num_threads, 0);
133181254a7Smrg gomp_team_start (fn, data, num_threads, 0, gomp_new_team (num_threads),
134181254a7Smrg NULL);
1354fee23f9Smrg }
1364fee23f9Smrg
1374fee23f9Smrg void
GOMP_parallel_end(void)1384fee23f9Smrg GOMP_parallel_end (void)
1394fee23f9Smrg {
1404d5abbe8Smrg struct gomp_task_icv *icv = gomp_icv (false);
1414d5abbe8Smrg if (__builtin_expect (icv->thread_limit_var != UINT_MAX, 0))
1424fee23f9Smrg {
1434fee23f9Smrg struct gomp_thread *thr = gomp_thread ();
1444fee23f9Smrg struct gomp_team *team = thr->ts.team;
14548fb7bfaSmrg unsigned int nthreads = team ? team->nthreads : 1;
14648fb7bfaSmrg gomp_team_end ();
14748fb7bfaSmrg if (nthreads > 1)
1484fee23f9Smrg {
1494d5abbe8Smrg /* If not nested, there is just one thread in the
1504d5abbe8Smrg contention group left, no need for atomicity. */
1514d5abbe8Smrg if (thr->ts.team == NULL)
1524d5abbe8Smrg thr->thread_pool->threads_busy = 1;
1534d5abbe8Smrg else
1544d5abbe8Smrg {
1554fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1564d5abbe8Smrg __sync_fetch_and_add (&thr->thread_pool->threads_busy,
1574d5abbe8Smrg 1UL - nthreads);
1584fee23f9Smrg #else
1594d5abbe8Smrg gomp_mutex_lock (&gomp_managed_threads_lock);
1604d5abbe8Smrg thr->thread_pool->threads_busy -= nthreads - 1;
1614d5abbe8Smrg gomp_mutex_unlock (&gomp_managed_threads_lock);
1624fee23f9Smrg #endif
1634fee23f9Smrg }
1644fee23f9Smrg }
1654d5abbe8Smrg }
16648fb7bfaSmrg else
1674fee23f9Smrg gomp_team_end ();
1684fee23f9Smrg }
ialias(GOMP_parallel_end)1694d5abbe8Smrg ialias (GOMP_parallel_end)
1704fee23f9Smrg
1714d5abbe8Smrg void
172181254a7Smrg GOMP_parallel (void (*fn) (void *), void *data, unsigned num_threads,
173181254a7Smrg unsigned int flags)
1744d5abbe8Smrg {
1754d5abbe8Smrg num_threads = gomp_resolve_num_threads (num_threads, 0);
176181254a7Smrg gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
177181254a7Smrg NULL);
1784d5abbe8Smrg fn (data);
1794d5abbe8Smrg ialias_call (GOMP_parallel_end) ();
1804d5abbe8Smrg }
1814d5abbe8Smrg
182181254a7Smrg unsigned
GOMP_parallel_reductions(void (* fn)(void *),void * data,unsigned num_threads,unsigned int flags)183181254a7Smrg GOMP_parallel_reductions (void (*fn) (void *), void *data,
184181254a7Smrg unsigned num_threads, unsigned int flags)
185181254a7Smrg {
186181254a7Smrg struct gomp_taskgroup *taskgroup;
187181254a7Smrg num_threads = gomp_resolve_num_threads (num_threads, 0);
188181254a7Smrg uintptr_t *rdata = *(uintptr_t **)data;
189181254a7Smrg taskgroup = gomp_parallel_reduction_register (rdata, num_threads);
190181254a7Smrg gomp_team_start (fn, data, num_threads, flags, gomp_new_team (num_threads),
191181254a7Smrg taskgroup);
192181254a7Smrg fn (data);
193181254a7Smrg ialias_call (GOMP_parallel_end) ();
194181254a7Smrg gomp_sem_destroy (&taskgroup->taskgroup_sem);
195181254a7Smrg free (taskgroup);
196181254a7Smrg return num_threads;
197181254a7Smrg }
198181254a7Smrg
1994d5abbe8Smrg bool
GOMP_cancellation_point(int which)2004d5abbe8Smrg GOMP_cancellation_point (int which)
2014d5abbe8Smrg {
2024d5abbe8Smrg if (!gomp_cancel_var)
2034d5abbe8Smrg return false;
2044d5abbe8Smrg
2054d5abbe8Smrg struct gomp_thread *thr = gomp_thread ();
2064d5abbe8Smrg struct gomp_team *team = thr->ts.team;
2074d5abbe8Smrg if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
2084d5abbe8Smrg {
2094d5abbe8Smrg if (team == NULL)
2104d5abbe8Smrg return false;
2114d5abbe8Smrg return team->work_share_cancelled != 0;
2124d5abbe8Smrg }
2134d5abbe8Smrg else if (which & GOMP_CANCEL_TASKGROUP)
2144d5abbe8Smrg {
215181254a7Smrg if (thr->task->taskgroup)
216181254a7Smrg {
217181254a7Smrg if (thr->task->taskgroup->cancelled)
2184d5abbe8Smrg return true;
219181254a7Smrg if (thr->task->taskgroup->workshare
220181254a7Smrg && thr->task->taskgroup->prev
221181254a7Smrg && thr->task->taskgroup->prev->cancelled)
222181254a7Smrg return true;
223181254a7Smrg }
2244d5abbe8Smrg /* FALLTHRU into the GOMP_CANCEL_PARALLEL case,
2254d5abbe8Smrg as #pragma omp cancel parallel also cancels all explicit
2264d5abbe8Smrg tasks. */
2274d5abbe8Smrg }
2284d5abbe8Smrg if (team)
2294d5abbe8Smrg return gomp_team_barrier_cancelled (&team->barrier);
2304d5abbe8Smrg return false;
2314d5abbe8Smrg }
ialias(GOMP_cancellation_point)2324d5abbe8Smrg ialias (GOMP_cancellation_point)
2334d5abbe8Smrg
2344d5abbe8Smrg bool
2354d5abbe8Smrg GOMP_cancel (int which, bool do_cancel)
2364d5abbe8Smrg {
2374d5abbe8Smrg if (!gomp_cancel_var)
2384d5abbe8Smrg return false;
2394d5abbe8Smrg
2404d5abbe8Smrg if (!do_cancel)
2414d5abbe8Smrg return ialias_call (GOMP_cancellation_point) (which);
2424d5abbe8Smrg
2434d5abbe8Smrg struct gomp_thread *thr = gomp_thread ();
2444d5abbe8Smrg struct gomp_team *team = thr->ts.team;
2454d5abbe8Smrg if (which & (GOMP_CANCEL_LOOP | GOMP_CANCEL_SECTIONS))
2464d5abbe8Smrg {
2474d5abbe8Smrg /* In orphaned worksharing region, all we want to cancel
2484d5abbe8Smrg is current thread. */
2494d5abbe8Smrg if (team != NULL)
2504d5abbe8Smrg team->work_share_cancelled = 1;
2514d5abbe8Smrg return true;
2524d5abbe8Smrg }
2534d5abbe8Smrg else if (which & GOMP_CANCEL_TASKGROUP)
2544d5abbe8Smrg {
255181254a7Smrg if (thr->task->taskgroup)
256181254a7Smrg {
257181254a7Smrg struct gomp_taskgroup *taskgroup = thr->task->taskgroup;
258181254a7Smrg if (taskgroup->workshare && taskgroup->prev)
259181254a7Smrg taskgroup = taskgroup->prev;
260181254a7Smrg if (!taskgroup->cancelled)
2614d5abbe8Smrg {
2624d5abbe8Smrg gomp_mutex_lock (&team->task_lock);
263181254a7Smrg taskgroup->cancelled = true;
2644d5abbe8Smrg gomp_mutex_unlock (&team->task_lock);
2654d5abbe8Smrg }
266181254a7Smrg }
2674d5abbe8Smrg return true;
2684d5abbe8Smrg }
2694d5abbe8Smrg team->team_cancelled = 1;
2704d5abbe8Smrg gomp_team_barrier_cancel (team);
2714d5abbe8Smrg return true;
2724d5abbe8Smrg }
2734fee23f9Smrg
2744fee23f9Smrg /* The public OpenMP API for thread and team related inquiries. */
2754fee23f9Smrg
2764fee23f9Smrg int
omp_get_num_threads(void)2774fee23f9Smrg omp_get_num_threads (void)
2784fee23f9Smrg {
2794fee23f9Smrg struct gomp_team *team = gomp_thread ()->ts.team;
2804fee23f9Smrg return team ? team->nthreads : 1;
2814fee23f9Smrg }
2824fee23f9Smrg
2834fee23f9Smrg int
omp_get_thread_num(void)2844fee23f9Smrg omp_get_thread_num (void)
2854fee23f9Smrg {
2864fee23f9Smrg return gomp_thread ()->ts.team_id;
2874fee23f9Smrg }
2884fee23f9Smrg
2894fee23f9Smrg /* This wasn't right for OpenMP 2.5. Active region used to be non-zero
2904fee23f9Smrg when the IF clause doesn't evaluate to false, starting with OpenMP 3.0
2914fee23f9Smrg it is non-zero with more than one thread in the team. */
2924fee23f9Smrg
2934fee23f9Smrg int
omp_in_parallel(void)2944fee23f9Smrg omp_in_parallel (void)
2954fee23f9Smrg {
2964fee23f9Smrg return gomp_thread ()->ts.active_level > 0;
2974fee23f9Smrg }
2984fee23f9Smrg
2994fee23f9Smrg int
omp_get_level(void)3004fee23f9Smrg omp_get_level (void)
3014fee23f9Smrg {
3024fee23f9Smrg return gomp_thread ()->ts.level;
3034fee23f9Smrg }
3044fee23f9Smrg
3054fee23f9Smrg int
omp_get_ancestor_thread_num(int level)3064fee23f9Smrg omp_get_ancestor_thread_num (int level)
3074fee23f9Smrg {
3084fee23f9Smrg struct gomp_team_state *ts = &gomp_thread ()->ts;
3094fee23f9Smrg if (level < 0 || level > ts->level)
3104fee23f9Smrg return -1;
3114fee23f9Smrg for (level = ts->level - level; level > 0; --level)
3124fee23f9Smrg ts = &ts->team->prev_ts;
3134fee23f9Smrg return ts->team_id;
3144fee23f9Smrg }
3154fee23f9Smrg
3164fee23f9Smrg int
omp_get_team_size(int level)3174fee23f9Smrg omp_get_team_size (int level)
3184fee23f9Smrg {
3194fee23f9Smrg struct gomp_team_state *ts = &gomp_thread ()->ts;
3204fee23f9Smrg if (level < 0 || level > ts->level)
3214fee23f9Smrg return -1;
3224fee23f9Smrg for (level = ts->level - level; level > 0; --level)
3234fee23f9Smrg ts = &ts->team->prev_ts;
3244fee23f9Smrg if (ts->team == NULL)
3254fee23f9Smrg return 1;
3264fee23f9Smrg else
3274fee23f9Smrg return ts->team->nthreads;
3284fee23f9Smrg }
3294fee23f9Smrg
3304fee23f9Smrg int
omp_get_active_level(void)3314fee23f9Smrg omp_get_active_level (void)
3324fee23f9Smrg {
3334fee23f9Smrg return gomp_thread ()->ts.active_level;
3344fee23f9Smrg }
3354fee23f9Smrg
3364fee23f9Smrg ialias (omp_get_num_threads)
3374fee23f9Smrg ialias (omp_get_thread_num)
3384fee23f9Smrg ialias (omp_in_parallel)
3394fee23f9Smrg ialias (omp_get_level)
3404fee23f9Smrg ialias (omp_get_ancestor_thread_num)
3414fee23f9Smrg ialias (omp_get_team_size)
3424fee23f9Smrg ialias (omp_get_active_level)
343