1*b1e83836Smrg /* Copyright (C) 2005-2022 Free Software Foundation, Inc.
24fee23f9Smrg Contributed by Richard Henderson <rth@redhat.com>.
34fee23f9Smrg
44d5abbe8Smrg This file is part of the GNU Offloading and Multi Processing Library
54d5abbe8Smrg (libgomp).
64fee23f9Smrg
74fee23f9Smrg Libgomp is free software; you can redistribute it and/or modify it
84fee23f9Smrg under the terms of the GNU General Public License as published by
94fee23f9Smrg the Free Software Foundation; either version 3, or (at your option)
104fee23f9Smrg any later version.
114fee23f9Smrg
124fee23f9Smrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
134fee23f9Smrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
144fee23f9Smrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
154fee23f9Smrg more details.
164fee23f9Smrg
174fee23f9Smrg Under Section 7 of GPL version 3, you are granted additional
184fee23f9Smrg permissions described in the GCC Runtime Library Exception, version
194fee23f9Smrg 3.1, as published by the Free Software Foundation.
204fee23f9Smrg
214fee23f9Smrg You should have received a copy of the GNU General Public License and
224fee23f9Smrg a copy of the GCC Runtime Library Exception along with this program;
234fee23f9Smrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
244fee23f9Smrg <http://www.gnu.org/licenses/>. */
254fee23f9Smrg
264fee23f9Smrg /* This file contains routines to manage the work-share queue for a team
274fee23f9Smrg of threads. */
284fee23f9Smrg
294fee23f9Smrg #include "libgomp.h"
304fee23f9Smrg #include <stddef.h>
314fee23f9Smrg #include <stdlib.h>
324fee23f9Smrg #include <string.h>
334fee23f9Smrg
344fee23f9Smrg
354fee23f9Smrg /* Allocate a new work share structure, preferably from current team's
364fee23f9Smrg free gomp_work_share cache. */
374fee23f9Smrg
384fee23f9Smrg static struct gomp_work_share *
alloc_work_share(struct gomp_team * team)394fee23f9Smrg alloc_work_share (struct gomp_team *team)
404fee23f9Smrg {
414fee23f9Smrg struct gomp_work_share *ws;
424fee23f9Smrg unsigned int i;
434fee23f9Smrg
444fee23f9Smrg /* This is called in a critical section. */
454fee23f9Smrg if (team->work_share_list_alloc != NULL)
464fee23f9Smrg {
474fee23f9Smrg ws = team->work_share_list_alloc;
484fee23f9Smrg team->work_share_list_alloc = ws->next_free;
494fee23f9Smrg return ws;
504fee23f9Smrg }
514fee23f9Smrg
524fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
534fee23f9Smrg ws = team->work_share_list_free;
544fee23f9Smrg /* We need atomic read from work_share_list_free,
554fee23f9Smrg as free_work_share can be called concurrently. */
564fee23f9Smrg __asm ("" : "+r" (ws));
574fee23f9Smrg
584fee23f9Smrg if (ws && ws->next_free)
594fee23f9Smrg {
604fee23f9Smrg struct gomp_work_share *next = ws->next_free;
614fee23f9Smrg ws->next_free = NULL;
624fee23f9Smrg team->work_share_list_alloc = next->next_free;
634fee23f9Smrg return next;
644fee23f9Smrg }
654fee23f9Smrg #else
664fee23f9Smrg gomp_mutex_lock (&team->work_share_list_free_lock);
674fee23f9Smrg ws = team->work_share_list_free;
684fee23f9Smrg if (ws)
694fee23f9Smrg {
704fee23f9Smrg team->work_share_list_alloc = ws->next_free;
714fee23f9Smrg team->work_share_list_free = NULL;
724fee23f9Smrg gomp_mutex_unlock (&team->work_share_list_free_lock);
734fee23f9Smrg return ws;
744fee23f9Smrg }
754fee23f9Smrg gomp_mutex_unlock (&team->work_share_list_free_lock);
764fee23f9Smrg #endif
774fee23f9Smrg
784fee23f9Smrg team->work_share_chunk *= 2;
79181254a7Smrg /* Allocating gomp_work_share structures aligned is just an
80181254a7Smrg optimization, don't do it when using the fallback method. */
81*b1e83836Smrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
82181254a7Smrg ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
83181254a7Smrg team->work_share_chunk
84181254a7Smrg * sizeof (struct gomp_work_share));
85181254a7Smrg #else
864fee23f9Smrg ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
87181254a7Smrg #endif
884fee23f9Smrg ws->next_alloc = team->work_shares[0].next_alloc;
894fee23f9Smrg team->work_shares[0].next_alloc = ws;
904fee23f9Smrg team->work_share_list_alloc = &ws[1];
914fee23f9Smrg for (i = 1; i < team->work_share_chunk - 1; i++)
924fee23f9Smrg ws[i].next_free = &ws[i + 1];
934fee23f9Smrg ws[i].next_free = NULL;
944fee23f9Smrg return ws;
954fee23f9Smrg }
964fee23f9Smrg
974fee23f9Smrg /* Initialize an already allocated struct gomp_work_share.
984fee23f9Smrg This shouldn't touch the next_alloc field. */
994fee23f9Smrg
1004fee23f9Smrg void
gomp_init_work_share(struct gomp_work_share * ws,size_t ordered,unsigned nthreads)101181254a7Smrg gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
1024fee23f9Smrg unsigned nthreads)
1034fee23f9Smrg {
1044fee23f9Smrg gomp_mutex_init (&ws->lock);
1054fee23f9Smrg if (__builtin_expect (ordered, 0))
1064fee23f9Smrg {
107181254a7Smrg #define INLINE_ORDERED_TEAM_IDS_SIZE \
108181254a7Smrg (sizeof (struct gomp_work_share) \
109181254a7Smrg - offsetof (struct gomp_work_share, inline_ordered_team_ids))
1104fee23f9Smrg
111181254a7Smrg if (__builtin_expect (ordered != 1, 0))
112181254a7Smrg {
113181254a7Smrg size_t o = nthreads * sizeof (*ws->ordered_team_ids);
114181254a7Smrg o += __alignof__ (long long) - 1;
115181254a7Smrg if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
116*b1e83836Smrg & (__alignof__ (long long) - 1)) == 0
117*b1e83836Smrg && __alignof__ (struct gomp_work_share)
118*b1e83836Smrg >= __alignof__ (long long))
119181254a7Smrg o &= ~(__alignof__ (long long) - 1);
120181254a7Smrg ordered += o - 1;
121181254a7Smrg }
122181254a7Smrg else
123181254a7Smrg ordered = nthreads * sizeof (*ws->ordered_team_ids);
124181254a7Smrg if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
125fb8a8121Smrg ws->ordered_team_ids = team_malloc (ordered);
1264fee23f9Smrg else
1274fee23f9Smrg ws->ordered_team_ids = ws->inline_ordered_team_ids;
128181254a7Smrg memset (ws->ordered_team_ids, '\0', ordered);
1294fee23f9Smrg ws->ordered_num_used = 0;
1304fee23f9Smrg ws->ordered_owner = -1;
1314fee23f9Smrg ws->ordered_cur = 0;
1324fee23f9Smrg }
1334fee23f9Smrg else
134181254a7Smrg ws->ordered_team_ids = ws->inline_ordered_team_ids;
1354fee23f9Smrg gomp_ptrlock_init (&ws->next_ws, NULL);
1364fee23f9Smrg ws->threads_completed = 0;
1374fee23f9Smrg }
1384fee23f9Smrg
1394fee23f9Smrg /* Do any needed destruction of gomp_work_share fields before it
1404fee23f9Smrg is put back into free gomp_work_share cache or freed. */
1414fee23f9Smrg
1424fee23f9Smrg void
gomp_fini_work_share(struct gomp_work_share * ws)1434fee23f9Smrg gomp_fini_work_share (struct gomp_work_share *ws)
1444fee23f9Smrg {
1454fee23f9Smrg gomp_mutex_destroy (&ws->lock);
1464fee23f9Smrg if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
147fb8a8121Smrg team_free (ws->ordered_team_ids);
1484fee23f9Smrg gomp_ptrlock_destroy (&ws->next_ws);
1494fee23f9Smrg }
1504fee23f9Smrg
1514fee23f9Smrg /* Free a work share struct, if not orphaned, put it into current
1524fee23f9Smrg team's free gomp_work_share cache. */
1534fee23f9Smrg
1544fee23f9Smrg static inline void
free_work_share(struct gomp_team * team,struct gomp_work_share * ws)1554fee23f9Smrg free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
1564fee23f9Smrg {
1574fee23f9Smrg gomp_fini_work_share (ws);
1584fee23f9Smrg if (__builtin_expect (team == NULL, 0))
1594fee23f9Smrg free (ws);
1604fee23f9Smrg else
1614fee23f9Smrg {
1624fee23f9Smrg struct gomp_work_share *next_ws;
1634fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
1644fee23f9Smrg do
1654fee23f9Smrg {
1664fee23f9Smrg next_ws = team->work_share_list_free;
1674fee23f9Smrg ws->next_free = next_ws;
1684fee23f9Smrg }
1694fee23f9Smrg while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
1704fee23f9Smrg next_ws, ws));
1714fee23f9Smrg #else
1724fee23f9Smrg gomp_mutex_lock (&team->work_share_list_free_lock);
1734fee23f9Smrg next_ws = team->work_share_list_free;
1744fee23f9Smrg ws->next_free = next_ws;
1754fee23f9Smrg team->work_share_list_free = ws;
1764fee23f9Smrg gomp_mutex_unlock (&team->work_share_list_free_lock);
1774fee23f9Smrg #endif
1784fee23f9Smrg }
1794fee23f9Smrg }
1804fee23f9Smrg
1814fee23f9Smrg /* The current thread is ready to begin the next work sharing construct.
1824fee23f9Smrg In all cases, thr->ts.work_share is updated to point to the new
1834fee23f9Smrg structure. In all cases the work_share lock is locked. Return true
1844fee23f9Smrg if this was the first thread to reach this point. */
1854fee23f9Smrg
1864fee23f9Smrg bool
gomp_work_share_start(size_t ordered)187181254a7Smrg gomp_work_share_start (size_t ordered)
1884fee23f9Smrg {
1894fee23f9Smrg struct gomp_thread *thr = gomp_thread ();
1904fee23f9Smrg struct gomp_team *team = thr->ts.team;
1914fee23f9Smrg struct gomp_work_share *ws;
1924fee23f9Smrg
1934fee23f9Smrg /* Work sharing constructs can be orphaned. */
1944fee23f9Smrg if (team == NULL)
1954fee23f9Smrg {
196*b1e83836Smrg #ifdef GOMP_USE_ALIGNED_WORK_SHARES
197*b1e83836Smrg ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
198*b1e83836Smrg sizeof (*ws));
199*b1e83836Smrg #else
2004fee23f9Smrg ws = gomp_malloc (sizeof (*ws));
201*b1e83836Smrg #endif
2024fee23f9Smrg gomp_init_work_share (ws, ordered, 1);
2034fee23f9Smrg thr->ts.work_share = ws;
204181254a7Smrg return true;
2054fee23f9Smrg }
2064fee23f9Smrg
2074fee23f9Smrg ws = thr->ts.work_share;
2084fee23f9Smrg thr->ts.last_work_share = ws;
2094fee23f9Smrg ws = gomp_ptrlock_get (&ws->next_ws);
2104fee23f9Smrg if (ws == NULL)
2114fee23f9Smrg {
2124fee23f9Smrg /* This thread encountered a new ws first. */
2134fee23f9Smrg struct gomp_work_share *ws = alloc_work_share (team);
2144fee23f9Smrg gomp_init_work_share (ws, ordered, team->nthreads);
2154fee23f9Smrg thr->ts.work_share = ws;
2164fee23f9Smrg return true;
2174fee23f9Smrg }
2184fee23f9Smrg else
2194fee23f9Smrg {
2204fee23f9Smrg thr->ts.work_share = ws;
2214fee23f9Smrg return false;
2224fee23f9Smrg }
2234fee23f9Smrg }
2244fee23f9Smrg
2254fee23f9Smrg /* The current thread is done with its current work sharing construct.
2264fee23f9Smrg This version does imply a barrier at the end of the work-share. */
2274fee23f9Smrg
2284fee23f9Smrg void
gomp_work_share_end(void)2294fee23f9Smrg gomp_work_share_end (void)
2304fee23f9Smrg {
2314fee23f9Smrg struct gomp_thread *thr = gomp_thread ();
2324fee23f9Smrg struct gomp_team *team = thr->ts.team;
2334fee23f9Smrg gomp_barrier_state_t bstate;
2344fee23f9Smrg
2354fee23f9Smrg /* Work sharing constructs can be orphaned. */
2364fee23f9Smrg if (team == NULL)
2374fee23f9Smrg {
2384fee23f9Smrg free_work_share (NULL, thr->ts.work_share);
2394fee23f9Smrg thr->ts.work_share = NULL;
2404fee23f9Smrg return;
2414fee23f9Smrg }
2424fee23f9Smrg
2434fee23f9Smrg bstate = gomp_barrier_wait_start (&team->barrier);
2444fee23f9Smrg
2454fee23f9Smrg if (gomp_barrier_last_thread (bstate))
2464fee23f9Smrg {
2474fee23f9Smrg if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
2484d5abbe8Smrg {
2494d5abbe8Smrg team->work_shares_to_free = thr->ts.work_share;
2504fee23f9Smrg free_work_share (team, thr->ts.last_work_share);
2514fee23f9Smrg }
2524d5abbe8Smrg }
2534fee23f9Smrg
2544fee23f9Smrg gomp_team_barrier_wait_end (&team->barrier, bstate);
2554fee23f9Smrg thr->ts.last_work_share = NULL;
2564fee23f9Smrg }
2574fee23f9Smrg
2584fee23f9Smrg /* The current thread is done with its current work sharing construct.
2594d5abbe8Smrg This version implies a cancellable barrier at the end of the work-share. */
2604d5abbe8Smrg
2614d5abbe8Smrg bool
gomp_work_share_end_cancel(void)2624d5abbe8Smrg gomp_work_share_end_cancel (void)
2634d5abbe8Smrg {
2644d5abbe8Smrg struct gomp_thread *thr = gomp_thread ();
2654d5abbe8Smrg struct gomp_team *team = thr->ts.team;
2664d5abbe8Smrg gomp_barrier_state_t bstate;
2674d5abbe8Smrg
2684d5abbe8Smrg /* Cancellable work sharing constructs cannot be orphaned. */
2694d5abbe8Smrg bstate = gomp_barrier_wait_cancel_start (&team->barrier);
2704d5abbe8Smrg
2714d5abbe8Smrg if (gomp_barrier_last_thread (bstate))
2724d5abbe8Smrg {
2734d5abbe8Smrg if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
2744d5abbe8Smrg {
2754d5abbe8Smrg team->work_shares_to_free = thr->ts.work_share;
2764d5abbe8Smrg free_work_share (team, thr->ts.last_work_share);
2774d5abbe8Smrg }
2784d5abbe8Smrg }
2794d5abbe8Smrg thr->ts.last_work_share = NULL;
2804d5abbe8Smrg
2814d5abbe8Smrg return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
2824d5abbe8Smrg }
2834d5abbe8Smrg
2844d5abbe8Smrg /* The current thread is done with its current work sharing construct.
2854fee23f9Smrg This version does NOT imply a barrier at the end of the work-share. */
2864fee23f9Smrg
2874fee23f9Smrg void
gomp_work_share_end_nowait(void)2884fee23f9Smrg gomp_work_share_end_nowait (void)
2894fee23f9Smrg {
2904fee23f9Smrg struct gomp_thread *thr = gomp_thread ();
2914fee23f9Smrg struct gomp_team *team = thr->ts.team;
2924fee23f9Smrg struct gomp_work_share *ws = thr->ts.work_share;
2934fee23f9Smrg unsigned completed;
2944fee23f9Smrg
2954fee23f9Smrg /* Work sharing constructs can be orphaned. */
2964fee23f9Smrg if (team == NULL)
2974fee23f9Smrg {
2984fee23f9Smrg free_work_share (NULL, ws);
2994fee23f9Smrg thr->ts.work_share = NULL;
3004fee23f9Smrg return;
3014fee23f9Smrg }
3024fee23f9Smrg
3034fee23f9Smrg if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
3044fee23f9Smrg return;
3054fee23f9Smrg
3064fee23f9Smrg #ifdef HAVE_SYNC_BUILTINS
3074fee23f9Smrg completed = __sync_add_and_fetch (&ws->threads_completed, 1);
3084fee23f9Smrg #else
3094fee23f9Smrg gomp_mutex_lock (&ws->lock);
3104fee23f9Smrg completed = ++ws->threads_completed;
3114fee23f9Smrg gomp_mutex_unlock (&ws->lock);
3124fee23f9Smrg #endif
3134fee23f9Smrg
3144fee23f9Smrg if (completed == team->nthreads)
3154d5abbe8Smrg {
3164d5abbe8Smrg team->work_shares_to_free = thr->ts.work_share;
3174fee23f9Smrg free_work_share (team, thr->ts.last_work_share);
3184d5abbe8Smrg }
3194fee23f9Smrg thr->ts.last_work_share = NULL;
3204fee23f9Smrg }
321