1*b1e83836Smrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
24fee23f9Smrg Contributed by Richard Henderson <rth@redhat.com>.
34fee23f9Smrg
44d5abbe8Smrg This file is part of the GNU Offloading and Multi Processing Library
54d5abbe8Smrg (libgomp).
64fee23f9Smrg
74fee23f9Smrg Libgomp is free software; you can redistribute it and/or modify it
84fee23f9Smrg under the terms of the GNU General Public License as published by
94fee23f9Smrg the Free Software Foundation; either version 3, or (at your option)
104fee23f9Smrg any later version.
114fee23f9Smrg
124fee23f9Smrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
134fee23f9Smrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
144fee23f9Smrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
154fee23f9Smrg more details.
164fee23f9Smrg
174fee23f9Smrg Under Section 7 of GPL version 3, you are granted additional
184fee23f9Smrg permissions described in the GCC Runtime Library Exception, version
194fee23f9Smrg 3.1, as published by the Free Software Foundation.
204fee23f9Smrg
214fee23f9Smrg You should have received a copy of the GNU General Public License and
224fee23f9Smrg a copy of the GCC Runtime Library Exception along with this program;
234fee23f9Smrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
244fee23f9Smrg <http://www.gnu.org/licenses/>. */
254fee23f9Smrg
264fee23f9Smrg /* This file handles the SECTIONS construct. */
274fee23f9Smrg
284fee23f9Smrg #include "libgomp.h"
29181254a7Smrg #include <string.h>
304fee23f9Smrg
314fee23f9Smrg
ialias_redirect(GOMP_taskgroup_reduction_register)32181254a7Smrg ialias_redirect (GOMP_taskgroup_reduction_register)
33181254a7Smrg
344fee23f9Smrg /* Initialize the given work share construct from the given arguments. */
354fee23f9Smrg
364fee23f9Smrg static inline void
374fee23f9Smrg gomp_sections_init (struct gomp_work_share *ws, unsigned count)
384fee23f9Smrg {
394fee23f9Smrg ws->sched = GFS_DYNAMIC;
404fee23f9Smrg ws->chunk_size = 1;
414fee23f9Smrg ws->end = count + 1L;
424fee23f9Smrg ws->incr = 1;
434fee23f9Smrg ws->next = 1;
444fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
454fee23f9Smrg /* Prepare things to make each iteration faster. */
464fee23f9Smrg if (sizeof (long) > sizeof (unsigned))
474fee23f9Smrg ws->mode = 1;
484fee23f9Smrg else
494fee23f9Smrg {
504fee23f9Smrg struct gomp_thread *thr = gomp_thread ();
514fee23f9Smrg struct gomp_team *team = thr->ts.team;
524fee23f9Smrg long nthreads = team ? team->nthreads : 1;
534fee23f9Smrg
544fee23f9Smrg ws->mode = ((nthreads | ws->end)
554fee23f9Smrg < 1UL << (sizeof (long) * __CHAR_BIT__ / 2 - 1));
564fee23f9Smrg }
574fee23f9Smrg #else
584fee23f9Smrg ws->mode = 0;
594fee23f9Smrg #endif
604fee23f9Smrg }
614fee23f9Smrg
624fee23f9Smrg /* This routine is called when first encountering a sections construct
634fee23f9Smrg that is not bound directly to a parallel construct. The first thread
644fee23f9Smrg that arrives will create the work-share construct; subsequent threads
654fee23f9Smrg will see the construct exists and allocate work from it.
664fee23f9Smrg
674fee23f9Smrg COUNT is the number of sections in this construct.
684fee23f9Smrg
694fee23f9Smrg Returns the 1-based section number for this thread to perform, or 0 if
704fee23f9Smrg all work was assigned to other threads prior to this thread's arrival. */
714fee23f9Smrg
724fee23f9Smrg unsigned
GOMP_sections_start(unsigned count)734fee23f9Smrg GOMP_sections_start (unsigned count)
744fee23f9Smrg {
754fee23f9Smrg struct gomp_thread *thr = gomp_thread ();
764fee23f9Smrg long s, e, ret;
774fee23f9Smrg
78181254a7Smrg if (gomp_work_share_start (0))
794fee23f9Smrg {
804fee23f9Smrg gomp_sections_init (thr->ts.work_share, count);
814fee23f9Smrg gomp_work_share_init_done ();
824fee23f9Smrg }
834fee23f9Smrg
844fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
854fee23f9Smrg if (gomp_iter_dynamic_next (&s, &e))
864fee23f9Smrg ret = s;
874fee23f9Smrg else
884fee23f9Smrg ret = 0;
894fee23f9Smrg #else
904fee23f9Smrg gomp_mutex_lock (&thr->ts.work_share->lock);
914fee23f9Smrg if (gomp_iter_dynamic_next_locked (&s, &e))
924fee23f9Smrg ret = s;
934fee23f9Smrg else
944fee23f9Smrg ret = 0;
954fee23f9Smrg gomp_mutex_unlock (&thr->ts.work_share->lock);
964fee23f9Smrg #endif
974fee23f9Smrg
984fee23f9Smrg return ret;
994fee23f9Smrg }
1004fee23f9Smrg
101181254a7Smrg unsigned
GOMP_sections2_start(unsigned count,uintptr_t * reductions,void ** mem)102181254a7Smrg GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem)
103181254a7Smrg {
104181254a7Smrg struct gomp_thread *thr = gomp_thread ();
105181254a7Smrg long s, e, ret;
106181254a7Smrg
107181254a7Smrg if (reductions)
108181254a7Smrg gomp_workshare_taskgroup_start ();
109181254a7Smrg if (gomp_work_share_start (0))
110181254a7Smrg {
111181254a7Smrg gomp_sections_init (thr->ts.work_share, count);
112181254a7Smrg if (reductions)
113181254a7Smrg {
114181254a7Smrg GOMP_taskgroup_reduction_register (reductions);
115181254a7Smrg thr->task->taskgroup->workshare = true;
116181254a7Smrg thr->ts.work_share->task_reductions = reductions;
117181254a7Smrg }
118181254a7Smrg if (mem)
119181254a7Smrg {
120181254a7Smrg uintptr_t size = (uintptr_t) *mem;
121181254a7Smrg #define INLINE_ORDERED_TEAM_IDS_OFF \
122181254a7Smrg ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \
123181254a7Smrg + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
124*b1e83836Smrg if (sizeof (struct gomp_work_share)
125*b1e83836Smrg <= INLINE_ORDERED_TEAM_IDS_OFF
126*b1e83836Smrg || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
127*b1e83836Smrg || size > (sizeof (struct gomp_work_share)
128181254a7Smrg - INLINE_ORDERED_TEAM_IDS_OFF))
129181254a7Smrg *mem
130181254a7Smrg = (void *) (thr->ts.work_share->ordered_team_ids
131181254a7Smrg = gomp_malloc_cleared (size));
132181254a7Smrg else
133181254a7Smrg *mem = memset (((char *) thr->ts.work_share)
134181254a7Smrg + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
135181254a7Smrg }
136181254a7Smrg gomp_work_share_init_done ();
137181254a7Smrg }
138181254a7Smrg else
139181254a7Smrg {
140181254a7Smrg if (reductions)
141181254a7Smrg {
142181254a7Smrg uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
143181254a7Smrg gomp_workshare_task_reduction_register (reductions,
144181254a7Smrg first_reductions);
145181254a7Smrg }
146181254a7Smrg if (mem)
147181254a7Smrg {
148181254a7Smrg if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
149181254a7Smrg & (__alignof__ (long long) - 1)) == 0)
150181254a7Smrg *mem = (void *) thr->ts.work_share->ordered_team_ids;
151181254a7Smrg else
152181254a7Smrg {
153181254a7Smrg uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
154181254a7Smrg p += __alignof__ (long long) - 1;
155181254a7Smrg p &= ~(__alignof__ (long long) - 1);
156181254a7Smrg *mem = (void *) p;
157181254a7Smrg }
158181254a7Smrg }
159181254a7Smrg }
160181254a7Smrg
161181254a7Smrg #ifdef HAVE_SYNC_BUILTINS
162181254a7Smrg if (gomp_iter_dynamic_next (&s, &e))
163181254a7Smrg ret = s;
164181254a7Smrg else
165181254a7Smrg ret = 0;
166181254a7Smrg #else
167181254a7Smrg gomp_mutex_lock (&thr->ts.work_share->lock);
168181254a7Smrg if (gomp_iter_dynamic_next_locked (&s, &e))
169181254a7Smrg ret = s;
170181254a7Smrg else
171181254a7Smrg ret = 0;
172181254a7Smrg gomp_mutex_unlock (&thr->ts.work_share->lock);
173181254a7Smrg #endif
174181254a7Smrg
175181254a7Smrg return ret;
176181254a7Smrg }
177181254a7Smrg
1784fee23f9Smrg /* This routine is called when the thread completes processing of the
1794fee23f9Smrg section currently assigned to it. If the work-share construct is
1804fee23f9Smrg bound directly to a parallel construct, then the construct may have
1814fee23f9Smrg been set up before the parallel. In which case, this may be the
1824fee23f9Smrg first iteration for the thread.
1834fee23f9Smrg
1844fee23f9Smrg Returns the 1-based section number for this thread to perform, or 0 if
1854fee23f9Smrg all work was assigned to other threads prior to this thread's arrival. */
1864fee23f9Smrg
1874fee23f9Smrg unsigned
GOMP_sections_next(void)1884fee23f9Smrg GOMP_sections_next (void)
1894fee23f9Smrg {
1904fee23f9Smrg long s, e, ret;
1914fee23f9Smrg
1924fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1934fee23f9Smrg if (gomp_iter_dynamic_next (&s, &e))
1944fee23f9Smrg ret = s;
1954fee23f9Smrg else
1964fee23f9Smrg ret = 0;
1974fee23f9Smrg #else
1984fee23f9Smrg struct gomp_thread *thr = gomp_thread ();
1994fee23f9Smrg
2004fee23f9Smrg gomp_mutex_lock (&thr->ts.work_share->lock);
2014fee23f9Smrg if (gomp_iter_dynamic_next_locked (&s, &e))
2024fee23f9Smrg ret = s;
2034fee23f9Smrg else
2044fee23f9Smrg ret = 0;
2054fee23f9Smrg gomp_mutex_unlock (&thr->ts.work_share->lock);
2064fee23f9Smrg #endif
2074fee23f9Smrg
2084fee23f9Smrg return ret;
2094fee23f9Smrg }
2104fee23f9Smrg
2114fee23f9Smrg /* This routine pre-initializes a work-share construct to avoid one
2124fee23f9Smrg synchronization once we get into the loop. */
2134fee23f9Smrg
2144fee23f9Smrg void
GOMP_parallel_sections_start(void (* fn)(void *),void * data,unsigned num_threads,unsigned count)2154fee23f9Smrg GOMP_parallel_sections_start (void (*fn) (void *), void *data,
2164fee23f9Smrg unsigned num_threads, unsigned count)
2174fee23f9Smrg {
2184fee23f9Smrg struct gomp_team *team;
2194fee23f9Smrg
2204fee23f9Smrg num_threads = gomp_resolve_num_threads (num_threads, count);
2214fee23f9Smrg team = gomp_new_team (num_threads);
2224fee23f9Smrg gomp_sections_init (&team->work_shares[0], count);
223181254a7Smrg gomp_team_start (fn, data, num_threads, 0, team, NULL);
2244d5abbe8Smrg }
2254d5abbe8Smrg
ialias_redirect(GOMP_parallel_end)2264d5abbe8Smrg ialias_redirect (GOMP_parallel_end)
2274d5abbe8Smrg
2284d5abbe8Smrg void
2294d5abbe8Smrg GOMP_parallel_sections (void (*fn) (void *), void *data,
2304d5abbe8Smrg unsigned num_threads, unsigned count, unsigned flags)
2314d5abbe8Smrg {
2324d5abbe8Smrg struct gomp_team *team;
2334d5abbe8Smrg
2344d5abbe8Smrg num_threads = gomp_resolve_num_threads (num_threads, count);
2354d5abbe8Smrg team = gomp_new_team (num_threads);
2364d5abbe8Smrg gomp_sections_init (&team->work_shares[0], count);
237181254a7Smrg gomp_team_start (fn, data, num_threads, flags, team, NULL);
2384d5abbe8Smrg fn (data);
2394d5abbe8Smrg GOMP_parallel_end ();
2404fee23f9Smrg }
2414fee23f9Smrg
2424fee23f9Smrg /* The GOMP_section_end* routines are called after the thread is told
2434d5abbe8Smrg that all sections are complete. The first two versions synchronize
2444fee23f9Smrg all threads; the nowait version does not. */
2454fee23f9Smrg
2464fee23f9Smrg void
GOMP_sections_end(void)2474fee23f9Smrg GOMP_sections_end (void)
2484fee23f9Smrg {
2494fee23f9Smrg gomp_work_share_end ();
2504fee23f9Smrg }
2514fee23f9Smrg
2524d5abbe8Smrg bool
GOMP_sections_end_cancel(void)2534d5abbe8Smrg GOMP_sections_end_cancel (void)
2544d5abbe8Smrg {
2554d5abbe8Smrg return gomp_work_share_end_cancel ();
2564d5abbe8Smrg }
2574d5abbe8Smrg
2584fee23f9Smrg void
GOMP_sections_end_nowait(void)2594fee23f9Smrg GOMP_sections_end_nowait (void)
2604fee23f9Smrg {
2614fee23f9Smrg gomp_work_share_end_nowait ();
2624fee23f9Smrg }
263