1*8feb0f0bSmrg /* Copyright (C) 2005-2020 Free Software Foundation, Inc.
21debfc3dSmrg Contributed by Richard Henderson <rth@redhat.com>.
31debfc3dSmrg
41debfc3dSmrg This file is part of the GNU Offloading and Multi Processing Library
51debfc3dSmrg (libgomp).
61debfc3dSmrg
71debfc3dSmrg Libgomp is free software; you can redistribute it and/or modify it
81debfc3dSmrg under the terms of the GNU General Public License as published by
91debfc3dSmrg the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg any later version.
111debfc3dSmrg
121debfc3dSmrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
141debfc3dSmrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
151debfc3dSmrg more details.
161debfc3dSmrg
171debfc3dSmrg Under Section 7 of GPL version 3, you are granted additional
181debfc3dSmrg permissions described in the GCC Runtime Library Exception, version
191debfc3dSmrg 3.1, as published by the Free Software Foundation.
201debfc3dSmrg
211debfc3dSmrg You should have received a copy of the GNU General Public License and
221debfc3dSmrg a copy of the GCC Runtime Library Exception along with this program;
231debfc3dSmrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
241debfc3dSmrg <http://www.gnu.org/licenses/>. */
251debfc3dSmrg
261debfc3dSmrg /* This file contains routines to manage the work-share queue for a team
271debfc3dSmrg of threads. */
281debfc3dSmrg
291debfc3dSmrg #include "libgomp.h"
301debfc3dSmrg #include <stddef.h>
311debfc3dSmrg #include <stdlib.h>
321debfc3dSmrg #include <string.h>
331debfc3dSmrg
341debfc3dSmrg
351debfc3dSmrg /* Allocate a new work share structure, preferably from current team's
361debfc3dSmrg free gomp_work_share cache. */
371debfc3dSmrg
381debfc3dSmrg static struct gomp_work_share *
alloc_work_share(struct gomp_team * team)391debfc3dSmrg alloc_work_share (struct gomp_team *team)
401debfc3dSmrg {
411debfc3dSmrg struct gomp_work_share *ws;
421debfc3dSmrg unsigned int i;
431debfc3dSmrg
441debfc3dSmrg /* This is called in a critical section. */
451debfc3dSmrg if (team->work_share_list_alloc != NULL)
461debfc3dSmrg {
471debfc3dSmrg ws = team->work_share_list_alloc;
481debfc3dSmrg team->work_share_list_alloc = ws->next_free;
491debfc3dSmrg return ws;
501debfc3dSmrg }
511debfc3dSmrg
521debfc3dSmrg #ifdef HAVE_SYNC_BUILTINS
531debfc3dSmrg ws = team->work_share_list_free;
541debfc3dSmrg /* We need atomic read from work_share_list_free,
551debfc3dSmrg as free_work_share can be called concurrently. */
561debfc3dSmrg __asm ("" : "+r" (ws));
571debfc3dSmrg
581debfc3dSmrg if (ws && ws->next_free)
591debfc3dSmrg {
601debfc3dSmrg struct gomp_work_share *next = ws->next_free;
611debfc3dSmrg ws->next_free = NULL;
621debfc3dSmrg team->work_share_list_alloc = next->next_free;
631debfc3dSmrg return next;
641debfc3dSmrg }
651debfc3dSmrg #else
661debfc3dSmrg gomp_mutex_lock (&team->work_share_list_free_lock);
671debfc3dSmrg ws = team->work_share_list_free;
681debfc3dSmrg if (ws)
691debfc3dSmrg {
701debfc3dSmrg team->work_share_list_alloc = ws->next_free;
711debfc3dSmrg team->work_share_list_free = NULL;
721debfc3dSmrg gomp_mutex_unlock (&team->work_share_list_free_lock);
731debfc3dSmrg return ws;
741debfc3dSmrg }
751debfc3dSmrg gomp_mutex_unlock (&team->work_share_list_free_lock);
761debfc3dSmrg #endif
771debfc3dSmrg
781debfc3dSmrg team->work_share_chunk *= 2;
79c0a68be4Smrg /* Allocating gomp_work_share structures aligned is just an
80c0a68be4Smrg optimization, don't do it when using the fallback method. */
81c0a68be4Smrg #ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
82c0a68be4Smrg ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
83c0a68be4Smrg team->work_share_chunk
84c0a68be4Smrg * sizeof (struct gomp_work_share));
85c0a68be4Smrg #else
861debfc3dSmrg ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
87c0a68be4Smrg #endif
881debfc3dSmrg ws->next_alloc = team->work_shares[0].next_alloc;
891debfc3dSmrg team->work_shares[0].next_alloc = ws;
901debfc3dSmrg team->work_share_list_alloc = &ws[1];
911debfc3dSmrg for (i = 1; i < team->work_share_chunk - 1; i++)
921debfc3dSmrg ws[i].next_free = &ws[i + 1];
931debfc3dSmrg ws[i].next_free = NULL;
941debfc3dSmrg return ws;
951debfc3dSmrg }
961debfc3dSmrg
971debfc3dSmrg /* Initialize an already allocated struct gomp_work_share.
981debfc3dSmrg This shouldn't touch the next_alloc field. */
991debfc3dSmrg
1001debfc3dSmrg void
gomp_init_work_share(struct gomp_work_share * ws,size_t ordered,unsigned nthreads)101c0a68be4Smrg gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
1021debfc3dSmrg unsigned nthreads)
1031debfc3dSmrg {
1041debfc3dSmrg gomp_mutex_init (&ws->lock);
1051debfc3dSmrg if (__builtin_expect (ordered, 0))
1061debfc3dSmrg {
107c0a68be4Smrg #define INLINE_ORDERED_TEAM_IDS_SIZE \
108c0a68be4Smrg (sizeof (struct gomp_work_share) \
109c0a68be4Smrg - offsetof (struct gomp_work_share, inline_ordered_team_ids))
1101debfc3dSmrg
111c0a68be4Smrg if (__builtin_expect (ordered != 1, 0))
112c0a68be4Smrg {
113c0a68be4Smrg size_t o = nthreads * sizeof (*ws->ordered_team_ids);
114c0a68be4Smrg o += __alignof__ (long long) - 1;
115c0a68be4Smrg if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
116c0a68be4Smrg & (__alignof__ (long long) - 1)) == 0)
117c0a68be4Smrg o &= ~(__alignof__ (long long) - 1);
118c0a68be4Smrg ordered += o - 1;
119c0a68be4Smrg }
120c0a68be4Smrg else
121c0a68be4Smrg ordered = nthreads * sizeof (*ws->ordered_team_ids);
122c0a68be4Smrg if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
123*8feb0f0bSmrg ws->ordered_team_ids = team_malloc (ordered);
1241debfc3dSmrg else
1251debfc3dSmrg ws->ordered_team_ids = ws->inline_ordered_team_ids;
126c0a68be4Smrg memset (ws->ordered_team_ids, '\0', ordered);
1271debfc3dSmrg ws->ordered_num_used = 0;
1281debfc3dSmrg ws->ordered_owner = -1;
1291debfc3dSmrg ws->ordered_cur = 0;
1301debfc3dSmrg }
1311debfc3dSmrg else
132c0a68be4Smrg ws->ordered_team_ids = ws->inline_ordered_team_ids;
1331debfc3dSmrg gomp_ptrlock_init (&ws->next_ws, NULL);
1341debfc3dSmrg ws->threads_completed = 0;
1351debfc3dSmrg }
1361debfc3dSmrg
1371debfc3dSmrg /* Do any needed destruction of gomp_work_share fields before it
1381debfc3dSmrg is put back into free gomp_work_share cache or freed. */
1391debfc3dSmrg
1401debfc3dSmrg void
gomp_fini_work_share(struct gomp_work_share * ws)1411debfc3dSmrg gomp_fini_work_share (struct gomp_work_share *ws)
1421debfc3dSmrg {
1431debfc3dSmrg gomp_mutex_destroy (&ws->lock);
1441debfc3dSmrg if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
145*8feb0f0bSmrg team_free (ws->ordered_team_ids);
1461debfc3dSmrg gomp_ptrlock_destroy (&ws->next_ws);
1471debfc3dSmrg }
1481debfc3dSmrg
1491debfc3dSmrg /* Free a work share struct, if not orphaned, put it into current
1501debfc3dSmrg team's free gomp_work_share cache. */
1511debfc3dSmrg
1521debfc3dSmrg static inline void
free_work_share(struct gomp_team * team,struct gomp_work_share * ws)1531debfc3dSmrg free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
1541debfc3dSmrg {
1551debfc3dSmrg gomp_fini_work_share (ws);
1561debfc3dSmrg if (__builtin_expect (team == NULL, 0))
1571debfc3dSmrg free (ws);
1581debfc3dSmrg else
1591debfc3dSmrg {
1601debfc3dSmrg struct gomp_work_share *next_ws;
1611debfc3dSmrg #ifdef HAVE_SYNC_BUILTINS
1621debfc3dSmrg do
1631debfc3dSmrg {
1641debfc3dSmrg next_ws = team->work_share_list_free;
1651debfc3dSmrg ws->next_free = next_ws;
1661debfc3dSmrg }
1671debfc3dSmrg while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
1681debfc3dSmrg next_ws, ws));
1691debfc3dSmrg #else
1701debfc3dSmrg gomp_mutex_lock (&team->work_share_list_free_lock);
1711debfc3dSmrg next_ws = team->work_share_list_free;
1721debfc3dSmrg ws->next_free = next_ws;
1731debfc3dSmrg team->work_share_list_free = ws;
1741debfc3dSmrg gomp_mutex_unlock (&team->work_share_list_free_lock);
1751debfc3dSmrg #endif
1761debfc3dSmrg }
1771debfc3dSmrg }
1781debfc3dSmrg
1791debfc3dSmrg /* The current thread is ready to begin the next work sharing construct.
1801debfc3dSmrg In all cases, thr->ts.work_share is updated to point to the new
1811debfc3dSmrg structure. In all cases the work_share lock is locked. Return true
1821debfc3dSmrg if this was the first thread to reach this point. */
1831debfc3dSmrg
1841debfc3dSmrg bool
gomp_work_share_start(size_t ordered)185c0a68be4Smrg gomp_work_share_start (size_t ordered)
1861debfc3dSmrg {
1871debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
1881debfc3dSmrg struct gomp_team *team = thr->ts.team;
1891debfc3dSmrg struct gomp_work_share *ws;
1901debfc3dSmrg
1911debfc3dSmrg /* Work sharing constructs can be orphaned. */
1921debfc3dSmrg if (team == NULL)
1931debfc3dSmrg {
1941debfc3dSmrg ws = gomp_malloc (sizeof (*ws));
1951debfc3dSmrg gomp_init_work_share (ws, ordered, 1);
1961debfc3dSmrg thr->ts.work_share = ws;
197c0a68be4Smrg return true;
1981debfc3dSmrg }
1991debfc3dSmrg
2001debfc3dSmrg ws = thr->ts.work_share;
2011debfc3dSmrg thr->ts.last_work_share = ws;
2021debfc3dSmrg ws = gomp_ptrlock_get (&ws->next_ws);
2031debfc3dSmrg if (ws == NULL)
2041debfc3dSmrg {
2051debfc3dSmrg /* This thread encountered a new ws first. */
2061debfc3dSmrg struct gomp_work_share *ws = alloc_work_share (team);
2071debfc3dSmrg gomp_init_work_share (ws, ordered, team->nthreads);
2081debfc3dSmrg thr->ts.work_share = ws;
2091debfc3dSmrg return true;
2101debfc3dSmrg }
2111debfc3dSmrg else
2121debfc3dSmrg {
2131debfc3dSmrg thr->ts.work_share = ws;
2141debfc3dSmrg return false;
2151debfc3dSmrg }
2161debfc3dSmrg }
2171debfc3dSmrg
2181debfc3dSmrg /* The current thread is done with its current work sharing construct.
2191debfc3dSmrg This version does imply a barrier at the end of the work-share. */
2201debfc3dSmrg
2211debfc3dSmrg void
gomp_work_share_end(void)2221debfc3dSmrg gomp_work_share_end (void)
2231debfc3dSmrg {
2241debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
2251debfc3dSmrg struct gomp_team *team = thr->ts.team;
2261debfc3dSmrg gomp_barrier_state_t bstate;
2271debfc3dSmrg
2281debfc3dSmrg /* Work sharing constructs can be orphaned. */
2291debfc3dSmrg if (team == NULL)
2301debfc3dSmrg {
2311debfc3dSmrg free_work_share (NULL, thr->ts.work_share);
2321debfc3dSmrg thr->ts.work_share = NULL;
2331debfc3dSmrg return;
2341debfc3dSmrg }
2351debfc3dSmrg
2361debfc3dSmrg bstate = gomp_barrier_wait_start (&team->barrier);
2371debfc3dSmrg
2381debfc3dSmrg if (gomp_barrier_last_thread (bstate))
2391debfc3dSmrg {
2401debfc3dSmrg if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
2411debfc3dSmrg {
2421debfc3dSmrg team->work_shares_to_free = thr->ts.work_share;
2431debfc3dSmrg free_work_share (team, thr->ts.last_work_share);
2441debfc3dSmrg }
2451debfc3dSmrg }
2461debfc3dSmrg
2471debfc3dSmrg gomp_team_barrier_wait_end (&team->barrier, bstate);
2481debfc3dSmrg thr->ts.last_work_share = NULL;
2491debfc3dSmrg }
2501debfc3dSmrg
2511debfc3dSmrg /* The current thread is done with its current work sharing construct.
2521debfc3dSmrg This version implies a cancellable barrier at the end of the work-share. */
2531debfc3dSmrg
2541debfc3dSmrg bool
gomp_work_share_end_cancel(void)2551debfc3dSmrg gomp_work_share_end_cancel (void)
2561debfc3dSmrg {
2571debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
2581debfc3dSmrg struct gomp_team *team = thr->ts.team;
2591debfc3dSmrg gomp_barrier_state_t bstate;
2601debfc3dSmrg
2611debfc3dSmrg /* Cancellable work sharing constructs cannot be orphaned. */
2621debfc3dSmrg bstate = gomp_barrier_wait_cancel_start (&team->barrier);
2631debfc3dSmrg
2641debfc3dSmrg if (gomp_barrier_last_thread (bstate))
2651debfc3dSmrg {
2661debfc3dSmrg if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
2671debfc3dSmrg {
2681debfc3dSmrg team->work_shares_to_free = thr->ts.work_share;
2691debfc3dSmrg free_work_share (team, thr->ts.last_work_share);
2701debfc3dSmrg }
2711debfc3dSmrg }
2721debfc3dSmrg thr->ts.last_work_share = NULL;
2731debfc3dSmrg
2741debfc3dSmrg return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
2751debfc3dSmrg }
2761debfc3dSmrg
2771debfc3dSmrg /* The current thread is done with its current work sharing construct.
2781debfc3dSmrg This version does NOT imply a barrier at the end of the work-share. */
2791debfc3dSmrg
2801debfc3dSmrg void
gomp_work_share_end_nowait(void)2811debfc3dSmrg gomp_work_share_end_nowait (void)
2821debfc3dSmrg {
2831debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
2841debfc3dSmrg struct gomp_team *team = thr->ts.team;
2851debfc3dSmrg struct gomp_work_share *ws = thr->ts.work_share;
2861debfc3dSmrg unsigned completed;
2871debfc3dSmrg
2881debfc3dSmrg /* Work sharing constructs can be orphaned. */
2891debfc3dSmrg if (team == NULL)
2901debfc3dSmrg {
2911debfc3dSmrg free_work_share (NULL, ws);
2921debfc3dSmrg thr->ts.work_share = NULL;
2931debfc3dSmrg return;
2941debfc3dSmrg }
2951debfc3dSmrg
2961debfc3dSmrg if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
2971debfc3dSmrg return;
2981debfc3dSmrg
2991debfc3dSmrg #ifdef HAVE_SYNC_BUILTINS
3001debfc3dSmrg completed = __sync_add_and_fetch (&ws->threads_completed, 1);
3011debfc3dSmrg #else
3021debfc3dSmrg gomp_mutex_lock (&ws->lock);
3031debfc3dSmrg completed = ++ws->threads_completed;
3041debfc3dSmrg gomp_mutex_unlock (&ws->lock);
3051debfc3dSmrg #endif
3061debfc3dSmrg
3071debfc3dSmrg if (completed == team->nthreads)
3081debfc3dSmrg {
3091debfc3dSmrg team->work_shares_to_free = thr->ts.work_share;
3101debfc3dSmrg free_work_share (team, thr->ts.last_work_share);
3111debfc3dSmrg }
3121debfc3dSmrg thr->ts.last_work_share = NULL;
3131debfc3dSmrg }
314