xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/sections.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
24fee23f9Smrg    Contributed by Richard Henderson <rth@redhat.com>.
34fee23f9Smrg 
44d5abbe8Smrg    This file is part of the GNU Offloading and Multi Processing Library
54d5abbe8Smrg    (libgomp).
64fee23f9Smrg 
74fee23f9Smrg    Libgomp is free software; you can redistribute it and/or modify it
84fee23f9Smrg    under the terms of the GNU General Public License as published by
94fee23f9Smrg    the Free Software Foundation; either version 3, or (at your option)
104fee23f9Smrg    any later version.
114fee23f9Smrg 
124fee23f9Smrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
134fee23f9Smrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
144fee23f9Smrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
154fee23f9Smrg    more details.
164fee23f9Smrg 
174fee23f9Smrg    Under Section 7 of GPL version 3, you are granted additional
184fee23f9Smrg    permissions described in the GCC Runtime Library Exception, version
194fee23f9Smrg    3.1, as published by the Free Software Foundation.
204fee23f9Smrg 
214fee23f9Smrg    You should have received a copy of the GNU General Public License and
224fee23f9Smrg    a copy of the GCC Runtime Library Exception along with this program;
234fee23f9Smrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
244fee23f9Smrg    <http://www.gnu.org/licenses/>.  */
254fee23f9Smrg 
264fee23f9Smrg /* This file handles the SECTIONS construct.  */
274fee23f9Smrg 
284fee23f9Smrg #include "libgomp.h"
29181254a7Smrg #include <string.h>
304fee23f9Smrg 
314fee23f9Smrg 
ialias_redirect(GOMP_taskgroup_reduction_register)32181254a7Smrg ialias_redirect (GOMP_taskgroup_reduction_register)
33181254a7Smrg 
344fee23f9Smrg /* Initialize the given work share construct from the given arguments.  */
354fee23f9Smrg 
364fee23f9Smrg static inline void
374fee23f9Smrg gomp_sections_init (struct gomp_work_share *ws, unsigned count)
384fee23f9Smrg {
394fee23f9Smrg   ws->sched = GFS_DYNAMIC;
404fee23f9Smrg   ws->chunk_size = 1;
414fee23f9Smrg   ws->end = count + 1L;
424fee23f9Smrg   ws->incr = 1;
434fee23f9Smrg   ws->next = 1;
444fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
454fee23f9Smrg   /* Prepare things to make each iteration faster.  */
464fee23f9Smrg   if (sizeof (long) > sizeof (unsigned))
474fee23f9Smrg     ws->mode = 1;
484fee23f9Smrg   else
494fee23f9Smrg     {
504fee23f9Smrg       struct gomp_thread *thr = gomp_thread ();
514fee23f9Smrg       struct gomp_team *team = thr->ts.team;
524fee23f9Smrg       long nthreads = team ? team->nthreads : 1;
534fee23f9Smrg 
544fee23f9Smrg       ws->mode = ((nthreads | ws->end)
554fee23f9Smrg 		  < 1UL << (sizeof (long) * __CHAR_BIT__ / 2 - 1));
564fee23f9Smrg     }
574fee23f9Smrg #else
584fee23f9Smrg   ws->mode = 0;
594fee23f9Smrg #endif
604fee23f9Smrg }
614fee23f9Smrg 
624fee23f9Smrg /* This routine is called when first encountering a sections construct
634fee23f9Smrg    that is not bound directly to a parallel construct.  The first thread
644fee23f9Smrg    that arrives will create the work-share construct; subsequent threads
654fee23f9Smrg    will see the construct exists and allocate work from it.
664fee23f9Smrg 
674fee23f9Smrg    COUNT is the number of sections in this construct.
684fee23f9Smrg 
694fee23f9Smrg    Returns the 1-based section number for this thread to perform, or 0 if
704fee23f9Smrg    all work was assigned to other threads prior to this thread's arrival.  */
714fee23f9Smrg 
724fee23f9Smrg unsigned
GOMP_sections_start(unsigned count)734fee23f9Smrg GOMP_sections_start (unsigned count)
744fee23f9Smrg {
754fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
764fee23f9Smrg   long s, e, ret;
774fee23f9Smrg 
78181254a7Smrg   if (gomp_work_share_start (0))
794fee23f9Smrg     {
804fee23f9Smrg       gomp_sections_init (thr->ts.work_share, count);
814fee23f9Smrg       gomp_work_share_init_done ();
824fee23f9Smrg     }
834fee23f9Smrg 
844fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
854fee23f9Smrg   if (gomp_iter_dynamic_next (&s, &e))
864fee23f9Smrg     ret = s;
874fee23f9Smrg   else
884fee23f9Smrg     ret = 0;
894fee23f9Smrg #else
904fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
914fee23f9Smrg   if (gomp_iter_dynamic_next_locked (&s, &e))
924fee23f9Smrg     ret = s;
934fee23f9Smrg   else
944fee23f9Smrg     ret = 0;
954fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
964fee23f9Smrg #endif
974fee23f9Smrg 
984fee23f9Smrg   return ret;
994fee23f9Smrg }
1004fee23f9Smrg 
101181254a7Smrg unsigned
GOMP_sections2_start(unsigned count,uintptr_t * reductions,void ** mem)102181254a7Smrg GOMP_sections2_start (unsigned count, uintptr_t *reductions, void **mem)
103181254a7Smrg {
104181254a7Smrg   struct gomp_thread *thr = gomp_thread ();
105181254a7Smrg   long s, e, ret;
106181254a7Smrg 
107181254a7Smrg   if (reductions)
108181254a7Smrg     gomp_workshare_taskgroup_start ();
109181254a7Smrg   if (gomp_work_share_start (0))
110181254a7Smrg     {
111181254a7Smrg       gomp_sections_init (thr->ts.work_share, count);
112181254a7Smrg       if (reductions)
113181254a7Smrg 	{
114181254a7Smrg 	  GOMP_taskgroup_reduction_register (reductions);
115181254a7Smrg 	  thr->task->taskgroup->workshare = true;
116181254a7Smrg 	  thr->ts.work_share->task_reductions = reductions;
117181254a7Smrg 	}
118181254a7Smrg       if (mem)
119181254a7Smrg 	{
120181254a7Smrg 	  uintptr_t size = (uintptr_t) *mem;
121181254a7Smrg #define INLINE_ORDERED_TEAM_IDS_OFF \
122181254a7Smrg   ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
123181254a7Smrg     + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
124*b1e83836Smrg 	  if (sizeof (struct gomp_work_share)
125*b1e83836Smrg 	      <= INLINE_ORDERED_TEAM_IDS_OFF
126*b1e83836Smrg 	      || __alignof__ (struct gomp_work_share) < __alignof__ (long long)
127*b1e83836Smrg 	      || size > (sizeof (struct gomp_work_share)
128181254a7Smrg 			- INLINE_ORDERED_TEAM_IDS_OFF))
129181254a7Smrg 	    *mem
130181254a7Smrg 	      = (void *) (thr->ts.work_share->ordered_team_ids
131181254a7Smrg 			  = gomp_malloc_cleared (size));
132181254a7Smrg 	  else
133181254a7Smrg 	    *mem = memset (((char *) thr->ts.work_share)
134181254a7Smrg 			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
135181254a7Smrg 	}
136181254a7Smrg       gomp_work_share_init_done ();
137181254a7Smrg     }
138181254a7Smrg   else
139181254a7Smrg     {
140181254a7Smrg       if (reductions)
141181254a7Smrg 	{
142181254a7Smrg 	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
143181254a7Smrg 	  gomp_workshare_task_reduction_register (reductions,
144181254a7Smrg 						  first_reductions);
145181254a7Smrg 	}
146181254a7Smrg       if (mem)
147181254a7Smrg 	{
148181254a7Smrg 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
149181254a7Smrg 	       & (__alignof__ (long long) - 1)) == 0)
150181254a7Smrg 	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
151181254a7Smrg 	  else
152181254a7Smrg 	    {
153181254a7Smrg 	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
154181254a7Smrg 	      p += __alignof__ (long long) - 1;
155181254a7Smrg 	      p &= ~(__alignof__ (long long) - 1);
156181254a7Smrg 	      *mem = (void *) p;
157181254a7Smrg 	    }
158181254a7Smrg 	}
159181254a7Smrg     }
160181254a7Smrg 
161181254a7Smrg #ifdef HAVE_SYNC_BUILTINS
162181254a7Smrg   if (gomp_iter_dynamic_next (&s, &e))
163181254a7Smrg     ret = s;
164181254a7Smrg   else
165181254a7Smrg     ret = 0;
166181254a7Smrg #else
167181254a7Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
168181254a7Smrg   if (gomp_iter_dynamic_next_locked (&s, &e))
169181254a7Smrg     ret = s;
170181254a7Smrg   else
171181254a7Smrg     ret = 0;
172181254a7Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
173181254a7Smrg #endif
174181254a7Smrg 
175181254a7Smrg   return ret;
176181254a7Smrg }
177181254a7Smrg 
1784fee23f9Smrg /* This routine is called when the thread completes processing of the
1794fee23f9Smrg    section currently assigned to it.  If the work-share construct is
1804fee23f9Smrg    bound directly to a parallel construct, then the construct may have
1814fee23f9Smrg    been set up before the parallel.  In which case, this may be the
1824fee23f9Smrg    first iteration for the thread.
1834fee23f9Smrg 
1844fee23f9Smrg    Returns the 1-based section number for this thread to perform, or 0 if
1854fee23f9Smrg    all work was assigned to other threads prior to this thread's arrival.  */
1864fee23f9Smrg 
1874fee23f9Smrg unsigned
GOMP_sections_next(void)1884fee23f9Smrg GOMP_sections_next (void)
1894fee23f9Smrg {
1904fee23f9Smrg   long s, e, ret;
1914fee23f9Smrg 
1924fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1934fee23f9Smrg   if (gomp_iter_dynamic_next (&s, &e))
1944fee23f9Smrg     ret = s;
1954fee23f9Smrg   else
1964fee23f9Smrg     ret = 0;
1974fee23f9Smrg #else
1984fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
1994fee23f9Smrg 
2004fee23f9Smrg   gomp_mutex_lock (&thr->ts.work_share->lock);
2014fee23f9Smrg   if (gomp_iter_dynamic_next_locked (&s, &e))
2024fee23f9Smrg     ret = s;
2034fee23f9Smrg   else
2044fee23f9Smrg     ret = 0;
2054fee23f9Smrg   gomp_mutex_unlock (&thr->ts.work_share->lock);
2064fee23f9Smrg #endif
2074fee23f9Smrg 
2084fee23f9Smrg   return ret;
2094fee23f9Smrg }
2104fee23f9Smrg 
2114fee23f9Smrg /* This routine pre-initializes a work-share construct to avoid one
2124fee23f9Smrg    synchronization once we get into the loop.  */
2134fee23f9Smrg 
2144fee23f9Smrg void
GOMP_parallel_sections_start(void (* fn)(void *),void * data,unsigned num_threads,unsigned count)2154fee23f9Smrg GOMP_parallel_sections_start (void (*fn) (void *), void *data,
2164fee23f9Smrg 			      unsigned num_threads, unsigned count)
2174fee23f9Smrg {
2184fee23f9Smrg   struct gomp_team *team;
2194fee23f9Smrg 
2204fee23f9Smrg   num_threads = gomp_resolve_num_threads (num_threads, count);
2214fee23f9Smrg   team = gomp_new_team (num_threads);
2224fee23f9Smrg   gomp_sections_init (&team->work_shares[0], count);
223181254a7Smrg   gomp_team_start (fn, data, num_threads, 0, team, NULL);
2244d5abbe8Smrg }
2254d5abbe8Smrg 
ialias_redirect(GOMP_parallel_end)2264d5abbe8Smrg ialias_redirect (GOMP_parallel_end)
2274d5abbe8Smrg 
2284d5abbe8Smrg void
2294d5abbe8Smrg GOMP_parallel_sections (void (*fn) (void *), void *data,
2304d5abbe8Smrg 			unsigned num_threads, unsigned count, unsigned flags)
2314d5abbe8Smrg {
2324d5abbe8Smrg   struct gomp_team *team;
2334d5abbe8Smrg 
2344d5abbe8Smrg   num_threads = gomp_resolve_num_threads (num_threads, count);
2354d5abbe8Smrg   team = gomp_new_team (num_threads);
2364d5abbe8Smrg   gomp_sections_init (&team->work_shares[0], count);
237181254a7Smrg   gomp_team_start (fn, data, num_threads, flags, team, NULL);
2384d5abbe8Smrg   fn (data);
2394d5abbe8Smrg   GOMP_parallel_end ();
2404fee23f9Smrg }
2414fee23f9Smrg 
2424fee23f9Smrg /* The GOMP_section_end* routines are called after the thread is told
2434d5abbe8Smrg    that all sections are complete.  The first two versions synchronize
2444fee23f9Smrg    all threads; the nowait version does not.  */
2454fee23f9Smrg 
2464fee23f9Smrg void
GOMP_sections_end(void)2474fee23f9Smrg GOMP_sections_end (void)
2484fee23f9Smrg {
2494fee23f9Smrg   gomp_work_share_end ();
2504fee23f9Smrg }
2514fee23f9Smrg 
2524d5abbe8Smrg bool
GOMP_sections_end_cancel(void)2534d5abbe8Smrg GOMP_sections_end_cancel (void)
2544d5abbe8Smrg {
2554d5abbe8Smrg   return gomp_work_share_end_cancel ();
2564d5abbe8Smrg }
2574d5abbe8Smrg 
2584fee23f9Smrg void
GOMP_sections_end_nowait(void)2594fee23f9Smrg GOMP_sections_end_nowait (void)
2604fee23f9Smrg {
2614fee23f9Smrg   gomp_work_share_end_nowait ();
2624fee23f9Smrg }
263