xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/work.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1*b1e83836Smrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
24fee23f9Smrg    Contributed by Richard Henderson <rth@redhat.com>.
34fee23f9Smrg 
44d5abbe8Smrg    This file is part of the GNU Offloading and Multi Processing Library
54d5abbe8Smrg    (libgomp).
64fee23f9Smrg 
74fee23f9Smrg    Libgomp is free software; you can redistribute it and/or modify it
84fee23f9Smrg    under the terms of the GNU General Public License as published by
94fee23f9Smrg    the Free Software Foundation; either version 3, or (at your option)
104fee23f9Smrg    any later version.
114fee23f9Smrg 
124fee23f9Smrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
134fee23f9Smrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
144fee23f9Smrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
154fee23f9Smrg    more details.
164fee23f9Smrg 
174fee23f9Smrg    Under Section 7 of GPL version 3, you are granted additional
184fee23f9Smrg    permissions described in the GCC Runtime Library Exception, version
194fee23f9Smrg    3.1, as published by the Free Software Foundation.
204fee23f9Smrg 
214fee23f9Smrg    You should have received a copy of the GNU General Public License and
224fee23f9Smrg    a copy of the GCC Runtime Library Exception along with this program;
234fee23f9Smrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
244fee23f9Smrg    <http://www.gnu.org/licenses/>.  */
254fee23f9Smrg 
264fee23f9Smrg /* This file contains routines to manage the work-share queue for a team
274fee23f9Smrg    of threads.  */
284fee23f9Smrg 
294fee23f9Smrg #include "libgomp.h"
304fee23f9Smrg #include <stddef.h>
314fee23f9Smrg #include <stdlib.h>
324fee23f9Smrg #include <string.h>
334fee23f9Smrg 
344fee23f9Smrg 
354fee23f9Smrg /* Allocate a new work share structure, preferably from current team's
364fee23f9Smrg    free gomp_work_share cache.  */
374fee23f9Smrg 
384fee23f9Smrg static struct gomp_work_share *
alloc_work_share(struct gomp_team * team)394fee23f9Smrg alloc_work_share (struct gomp_team *team)
404fee23f9Smrg {
414fee23f9Smrg   struct gomp_work_share *ws;
424fee23f9Smrg   unsigned int i;
434fee23f9Smrg 
444fee23f9Smrg   /* This is called in a critical section.  */
454fee23f9Smrg   if (team->work_share_list_alloc != NULL)
464fee23f9Smrg     {
474fee23f9Smrg       ws = team->work_share_list_alloc;
484fee23f9Smrg       team->work_share_list_alloc = ws->next_free;
494fee23f9Smrg       return ws;
504fee23f9Smrg     }
514fee23f9Smrg 
524fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
534fee23f9Smrg   ws = team->work_share_list_free;
544fee23f9Smrg   /* We need atomic read from work_share_list_free,
554fee23f9Smrg      as free_work_share can be called concurrently.  */
564fee23f9Smrg   __asm ("" : "+r" (ws));
574fee23f9Smrg 
584fee23f9Smrg   if (ws && ws->next_free)
594fee23f9Smrg     {
604fee23f9Smrg       struct gomp_work_share *next = ws->next_free;
614fee23f9Smrg       ws->next_free = NULL;
624fee23f9Smrg       team->work_share_list_alloc = next->next_free;
634fee23f9Smrg       return next;
644fee23f9Smrg     }
654fee23f9Smrg #else
664fee23f9Smrg   gomp_mutex_lock (&team->work_share_list_free_lock);
674fee23f9Smrg   ws = team->work_share_list_free;
684fee23f9Smrg   if (ws)
694fee23f9Smrg     {
704fee23f9Smrg       team->work_share_list_alloc = ws->next_free;
714fee23f9Smrg       team->work_share_list_free = NULL;
724fee23f9Smrg       gomp_mutex_unlock (&team->work_share_list_free_lock);
734fee23f9Smrg       return ws;
744fee23f9Smrg     }
754fee23f9Smrg   gomp_mutex_unlock (&team->work_share_list_free_lock);
764fee23f9Smrg #endif
774fee23f9Smrg 
784fee23f9Smrg   team->work_share_chunk *= 2;
79181254a7Smrg   /* Allocating gomp_work_share structures aligned is just an
80181254a7Smrg      optimization, don't do it when using the fallback method.  */
81*b1e83836Smrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
82181254a7Smrg   ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
83181254a7Smrg 			   team->work_share_chunk
84181254a7Smrg 			   * sizeof (struct gomp_work_share));
85181254a7Smrg #else
864fee23f9Smrg   ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
87181254a7Smrg #endif
884fee23f9Smrg   ws->next_alloc = team->work_shares[0].next_alloc;
894fee23f9Smrg   team->work_shares[0].next_alloc = ws;
904fee23f9Smrg   team->work_share_list_alloc = &ws[1];
914fee23f9Smrg   for (i = 1; i < team->work_share_chunk - 1; i++)
924fee23f9Smrg     ws[i].next_free = &ws[i + 1];
934fee23f9Smrg   ws[i].next_free = NULL;
944fee23f9Smrg   return ws;
954fee23f9Smrg }
964fee23f9Smrg 
974fee23f9Smrg /* Initialize an already allocated struct gomp_work_share.
984fee23f9Smrg    This shouldn't touch the next_alloc field.  */
994fee23f9Smrg 
1004fee23f9Smrg void
gomp_init_work_share(struct gomp_work_share * ws,size_t ordered,unsigned nthreads)101181254a7Smrg gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
1024fee23f9Smrg 		      unsigned nthreads)
1034fee23f9Smrg {
1044fee23f9Smrg   gomp_mutex_init (&ws->lock);
1054fee23f9Smrg   if (__builtin_expect (ordered, 0))
1064fee23f9Smrg     {
107181254a7Smrg #define INLINE_ORDERED_TEAM_IDS_SIZE \
108181254a7Smrg   (sizeof (struct gomp_work_share) \
109181254a7Smrg    - offsetof (struct gomp_work_share, inline_ordered_team_ids))
1104fee23f9Smrg 
111181254a7Smrg       if (__builtin_expect (ordered != 1, 0))
112181254a7Smrg 	{
113181254a7Smrg 	  size_t o = nthreads * sizeof (*ws->ordered_team_ids);
114181254a7Smrg 	  o += __alignof__ (long long) - 1;
115181254a7Smrg 	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
116*b1e83836Smrg 	       & (__alignof__ (long long) - 1)) == 0
117*b1e83836Smrg 	      && __alignof__ (struct gomp_work_share)
118*b1e83836Smrg 		 >= __alignof__ (long long))
119181254a7Smrg 	    o &= ~(__alignof__ (long long) - 1);
120181254a7Smrg 	  ordered += o - 1;
121181254a7Smrg 	}
122181254a7Smrg       else
123181254a7Smrg 	ordered = nthreads * sizeof (*ws->ordered_team_ids);
124181254a7Smrg       if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
125fb8a8121Smrg 	ws->ordered_team_ids = team_malloc (ordered);
1264fee23f9Smrg       else
1274fee23f9Smrg 	ws->ordered_team_ids = ws->inline_ordered_team_ids;
128181254a7Smrg       memset (ws->ordered_team_ids, '\0', ordered);
1294fee23f9Smrg       ws->ordered_num_used = 0;
1304fee23f9Smrg       ws->ordered_owner = -1;
1314fee23f9Smrg       ws->ordered_cur = 0;
1324fee23f9Smrg     }
1334fee23f9Smrg   else
134181254a7Smrg     ws->ordered_team_ids = ws->inline_ordered_team_ids;
1354fee23f9Smrg   gomp_ptrlock_init (&ws->next_ws, NULL);
1364fee23f9Smrg   ws->threads_completed = 0;
1374fee23f9Smrg }
1384fee23f9Smrg 
1394fee23f9Smrg /* Do any needed destruction of gomp_work_share fields before it
1404fee23f9Smrg    is put back into free gomp_work_share cache or freed.  */
1414fee23f9Smrg 
1424fee23f9Smrg void
gomp_fini_work_share(struct gomp_work_share * ws)1434fee23f9Smrg gomp_fini_work_share (struct gomp_work_share *ws)
1444fee23f9Smrg {
1454fee23f9Smrg   gomp_mutex_destroy (&ws->lock);
1464fee23f9Smrg   if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
147fb8a8121Smrg     team_free (ws->ordered_team_ids);
1484fee23f9Smrg   gomp_ptrlock_destroy (&ws->next_ws);
1494fee23f9Smrg }
1504fee23f9Smrg 
1514fee23f9Smrg /* Free a work share struct, if not orphaned, put it into current
1524fee23f9Smrg    team's free gomp_work_share cache.  */
1534fee23f9Smrg 
1544fee23f9Smrg static inline void
free_work_share(struct gomp_team * team,struct gomp_work_share * ws)1554fee23f9Smrg free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
1564fee23f9Smrg {
1574fee23f9Smrg   gomp_fini_work_share (ws);
1584fee23f9Smrg   if (__builtin_expect (team == NULL, 0))
1594fee23f9Smrg     free (ws);
1604fee23f9Smrg   else
1614fee23f9Smrg     {
1624fee23f9Smrg       struct gomp_work_share *next_ws;
1634fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1644fee23f9Smrg       do
1654fee23f9Smrg 	{
1664fee23f9Smrg 	  next_ws = team->work_share_list_free;
1674fee23f9Smrg 	  ws->next_free = next_ws;
1684fee23f9Smrg 	}
1694fee23f9Smrg       while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
1704fee23f9Smrg 					    next_ws, ws));
1714fee23f9Smrg #else
1724fee23f9Smrg       gomp_mutex_lock (&team->work_share_list_free_lock);
1734fee23f9Smrg       next_ws = team->work_share_list_free;
1744fee23f9Smrg       ws->next_free = next_ws;
1754fee23f9Smrg       team->work_share_list_free = ws;
1764fee23f9Smrg       gomp_mutex_unlock (&team->work_share_list_free_lock);
1774fee23f9Smrg #endif
1784fee23f9Smrg     }
1794fee23f9Smrg }
1804fee23f9Smrg 
1814fee23f9Smrg /* The current thread is ready to begin the next work sharing construct.
1824fee23f9Smrg    In all cases, thr->ts.work_share is updated to point to the new
1834fee23f9Smrg    structure.  In all cases the work_share lock is locked.  Return true
1844fee23f9Smrg    if this was the first thread to reach this point.  */
1854fee23f9Smrg 
1864fee23f9Smrg bool
gomp_work_share_start(size_t ordered)187181254a7Smrg gomp_work_share_start (size_t ordered)
1884fee23f9Smrg {
1894fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
1904fee23f9Smrg   struct gomp_team *team = thr->ts.team;
1914fee23f9Smrg   struct gomp_work_share *ws;
1924fee23f9Smrg 
1934fee23f9Smrg   /* Work sharing constructs can be orphaned.  */
1944fee23f9Smrg   if (team == NULL)
1954fee23f9Smrg     {
196*b1e83836Smrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
197*b1e83836Smrg       ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
198*b1e83836Smrg 			       sizeof (*ws));
199*b1e83836Smrg #else
2004fee23f9Smrg       ws = gomp_malloc (sizeof (*ws));
201*b1e83836Smrg #endif
2024fee23f9Smrg       gomp_init_work_share (ws, ordered, 1);
2034fee23f9Smrg       thr->ts.work_share = ws;
204181254a7Smrg       return true;
2054fee23f9Smrg     }
2064fee23f9Smrg 
2074fee23f9Smrg   ws = thr->ts.work_share;
2084fee23f9Smrg   thr->ts.last_work_share = ws;
2094fee23f9Smrg   ws = gomp_ptrlock_get (&ws->next_ws);
2104fee23f9Smrg   if (ws == NULL)
2114fee23f9Smrg     {
2124fee23f9Smrg       /* This thread encountered a new ws first.  */
2134fee23f9Smrg       struct gomp_work_share *ws = alloc_work_share (team);
2144fee23f9Smrg       gomp_init_work_share (ws, ordered, team->nthreads);
2154fee23f9Smrg       thr->ts.work_share = ws;
2164fee23f9Smrg       return true;
2174fee23f9Smrg     }
2184fee23f9Smrg   else
2194fee23f9Smrg     {
2204fee23f9Smrg       thr->ts.work_share = ws;
2214fee23f9Smrg       return false;
2224fee23f9Smrg     }
2234fee23f9Smrg }
2244fee23f9Smrg 
2254fee23f9Smrg /* The current thread is done with its current work sharing construct.
2264fee23f9Smrg    This version does imply a barrier at the end of the work-share.  */
2274fee23f9Smrg 
2284fee23f9Smrg void
gomp_work_share_end(void)2294fee23f9Smrg gomp_work_share_end (void)
2304fee23f9Smrg {
2314fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
2324fee23f9Smrg   struct gomp_team *team = thr->ts.team;
2334fee23f9Smrg   gomp_barrier_state_t bstate;
2344fee23f9Smrg 
2354fee23f9Smrg   /* Work sharing constructs can be orphaned.  */
2364fee23f9Smrg   if (team == NULL)
2374fee23f9Smrg     {
2384fee23f9Smrg       free_work_share (NULL, thr->ts.work_share);
2394fee23f9Smrg       thr->ts.work_share = NULL;
2404fee23f9Smrg       return;
2414fee23f9Smrg     }
2424fee23f9Smrg 
2434fee23f9Smrg   bstate = gomp_barrier_wait_start (&team->barrier);
2444fee23f9Smrg 
2454fee23f9Smrg   if (gomp_barrier_last_thread (bstate))
2464fee23f9Smrg     {
2474fee23f9Smrg       if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
2484d5abbe8Smrg 	{
2494d5abbe8Smrg 	  team->work_shares_to_free = thr->ts.work_share;
2504fee23f9Smrg 	  free_work_share (team, thr->ts.last_work_share);
2514fee23f9Smrg 	}
2524d5abbe8Smrg     }
2534fee23f9Smrg 
2544fee23f9Smrg   gomp_team_barrier_wait_end (&team->barrier, bstate);
2554fee23f9Smrg   thr->ts.last_work_share = NULL;
2564fee23f9Smrg }
2574fee23f9Smrg 
2584fee23f9Smrg /* The current thread is done with its current work sharing construct.
2594d5abbe8Smrg    This version implies a cancellable barrier at the end of the work-share.  */
2604d5abbe8Smrg 
2614d5abbe8Smrg bool
gomp_work_share_end_cancel(void)2624d5abbe8Smrg gomp_work_share_end_cancel (void)
2634d5abbe8Smrg {
2644d5abbe8Smrg   struct gomp_thread *thr = gomp_thread ();
2654d5abbe8Smrg   struct gomp_team *team = thr->ts.team;
2664d5abbe8Smrg   gomp_barrier_state_t bstate;
2674d5abbe8Smrg 
2684d5abbe8Smrg   /* Cancellable work sharing constructs cannot be orphaned.  */
2694d5abbe8Smrg   bstate = gomp_barrier_wait_cancel_start (&team->barrier);
2704d5abbe8Smrg 
2714d5abbe8Smrg   if (gomp_barrier_last_thread (bstate))
2724d5abbe8Smrg     {
2734d5abbe8Smrg       if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
2744d5abbe8Smrg 	{
2754d5abbe8Smrg 	  team->work_shares_to_free = thr->ts.work_share;
2764d5abbe8Smrg 	  free_work_share (team, thr->ts.last_work_share);
2774d5abbe8Smrg 	}
2784d5abbe8Smrg     }
2794d5abbe8Smrg   thr->ts.last_work_share = NULL;
2804d5abbe8Smrg 
2814d5abbe8Smrg   return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
2824d5abbe8Smrg }
2834d5abbe8Smrg 
2844d5abbe8Smrg /* The current thread is done with its current work sharing construct.
2854fee23f9Smrg    This version does NOT imply a barrier at the end of the work-share.  */
2864fee23f9Smrg 
2874fee23f9Smrg void
gomp_work_share_end_nowait(void)2884fee23f9Smrg gomp_work_share_end_nowait (void)
2894fee23f9Smrg {
2904fee23f9Smrg   struct gomp_thread *thr = gomp_thread ();
2914fee23f9Smrg   struct gomp_team *team = thr->ts.team;
2924fee23f9Smrg   struct gomp_work_share *ws = thr->ts.work_share;
2934fee23f9Smrg   unsigned completed;
2944fee23f9Smrg 
2954fee23f9Smrg   /* Work sharing constructs can be orphaned.  */
2964fee23f9Smrg   if (team == NULL)
2974fee23f9Smrg     {
2984fee23f9Smrg       free_work_share (NULL, ws);
2994fee23f9Smrg       thr->ts.work_share = NULL;
3004fee23f9Smrg       return;
3014fee23f9Smrg     }
3024fee23f9Smrg 
3034fee23f9Smrg   if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
3044fee23f9Smrg     return;
3054fee23f9Smrg 
3064fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
3074fee23f9Smrg   completed = __sync_add_and_fetch (&ws->threads_completed, 1);
3084fee23f9Smrg #else
3094fee23f9Smrg   gomp_mutex_lock (&ws->lock);
3104fee23f9Smrg   completed = ++ws->threads_completed;
3114fee23f9Smrg   gomp_mutex_unlock (&ws->lock);
3124fee23f9Smrg #endif
3134fee23f9Smrg 
3144fee23f9Smrg   if (completed == team->nthreads)
3154d5abbe8Smrg     {
3164d5abbe8Smrg       team->work_shares_to_free = thr->ts.work_share;
3174fee23f9Smrg       free_work_share (team, thr->ts.last_work_share);
3184d5abbe8Smrg     }
3194fee23f9Smrg   thr->ts.last_work_share = NULL;
3204fee23f9Smrg }
321