xref: /dflybsd-src/contrib/gcc-8.0/libgomp/task.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Copyright (C) 2007-2018 Free Software Foundation, Inc.
2*38fd1498Szrj    Contributed by Richard Henderson <rth@redhat.com>.
3*38fd1498Szrj 
4*38fd1498Szrj    This file is part of the GNU Offloading and Multi Processing Library
5*38fd1498Szrj    (libgomp).
6*38fd1498Szrj 
7*38fd1498Szrj    Libgomp is free software; you can redistribute it and/or modify it
8*38fd1498Szrj    under the terms of the GNU General Public License as published by
9*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj    any later version.
11*38fd1498Szrj 
12*38fd1498Szrj    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*38fd1498Szrj    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15*38fd1498Szrj    more details.
16*38fd1498Szrj 
17*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
18*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
19*38fd1498Szrj    3.1, as published by the Free Software Foundation.
20*38fd1498Szrj 
21*38fd1498Szrj    You should have received a copy of the GNU General Public License and
22*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
23*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
25*38fd1498Szrj 
26*38fd1498Szrj /* This file handles the maintainence of tasks in response to task
27*38fd1498Szrj    creation and termination.  */
28*38fd1498Szrj 
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include <stdlib.h>
31*38fd1498Szrj #include <string.h>
32*38fd1498Szrj #include "gomp-constants.h"
33*38fd1498Szrj 
34*38fd1498Szrj typedef struct gomp_task_depend_entry *hash_entry_type;
35*38fd1498Szrj 
36*38fd1498Szrj static inline void *
htab_alloc(size_t size)37*38fd1498Szrj htab_alloc (size_t size)
38*38fd1498Szrj {
39*38fd1498Szrj   return gomp_malloc (size);
40*38fd1498Szrj }
41*38fd1498Szrj 
42*38fd1498Szrj static inline void
htab_free(void * ptr)43*38fd1498Szrj htab_free (void *ptr)
44*38fd1498Szrj {
45*38fd1498Szrj   free (ptr);
46*38fd1498Szrj }
47*38fd1498Szrj 
48*38fd1498Szrj #include "hashtab.h"
49*38fd1498Szrj 
50*38fd1498Szrj static inline hashval_t
htab_hash(hash_entry_type element)51*38fd1498Szrj htab_hash (hash_entry_type element)
52*38fd1498Szrj {
53*38fd1498Szrj   return hash_pointer (element->addr);
54*38fd1498Szrj }
55*38fd1498Szrj 
56*38fd1498Szrj static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)57*38fd1498Szrj htab_eq (hash_entry_type x, hash_entry_type y)
58*38fd1498Szrj {
59*38fd1498Szrj   return x->addr == y->addr;
60*38fd1498Szrj }
61*38fd1498Szrj 
62*38fd1498Szrj /* Create a new task data structure.  */
63*38fd1498Szrj 
64*38fd1498Szrj void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)65*38fd1498Szrj gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66*38fd1498Szrj 		struct gomp_task_icv *prev_icv)
67*38fd1498Szrj {
68*38fd1498Szrj   /* It would seem that using memset here would be a win, but it turns
69*38fd1498Szrj      out that partially filling gomp_task allows us to keep the
70*38fd1498Szrj      overhead of task creation low.  In the nqueens-1.c test, for a
71*38fd1498Szrj      sufficiently large N, we drop the overhead from 5-6% to 1%.
72*38fd1498Szrj 
73*38fd1498Szrj      Note, the nqueens-1.c test in serial mode is a good test to
74*38fd1498Szrj      benchmark the overhead of creating tasks as there are millions of
75*38fd1498Szrj      tiny tasks created that all run undeferred.  */
76*38fd1498Szrj   task->parent = parent_task;
77*38fd1498Szrj   task->icv = *prev_icv;
78*38fd1498Szrj   task->kind = GOMP_TASK_IMPLICIT;
79*38fd1498Szrj   task->taskwait = NULL;
80*38fd1498Szrj   task->in_tied_task = false;
81*38fd1498Szrj   task->final_task = false;
82*38fd1498Szrj   task->copy_ctors_done = false;
83*38fd1498Szrj   task->parent_depends_on = false;
84*38fd1498Szrj   priority_queue_init (&task->children_queue);
85*38fd1498Szrj   task->taskgroup = NULL;
86*38fd1498Szrj   task->dependers = NULL;
87*38fd1498Szrj   task->depend_hash = NULL;
88*38fd1498Szrj   task->depend_count = 0;
89*38fd1498Szrj }
90*38fd1498Szrj 
91*38fd1498Szrj /* Clean up a task, after completing it.  */
92*38fd1498Szrj 
93*38fd1498Szrj void
gomp_end_task(void)94*38fd1498Szrj gomp_end_task (void)
95*38fd1498Szrj {
96*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
97*38fd1498Szrj   struct gomp_task *task = thr->task;
98*38fd1498Szrj 
99*38fd1498Szrj   gomp_finish_task (task);
100*38fd1498Szrj   thr->task = task->parent;
101*38fd1498Szrj }
102*38fd1498Szrj 
103*38fd1498Szrj /* Clear the parent field of every task in LIST.  */
104*38fd1498Szrj 
105*38fd1498Szrj static inline void
gomp_clear_parent_in_list(struct priority_list * list)106*38fd1498Szrj gomp_clear_parent_in_list (struct priority_list *list)
107*38fd1498Szrj {
108*38fd1498Szrj   struct priority_node *p = list->tasks;
109*38fd1498Szrj   if (p)
110*38fd1498Szrj     do
111*38fd1498Szrj       {
112*38fd1498Szrj 	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113*38fd1498Szrj 	p = p->next;
114*38fd1498Szrj       }
115*38fd1498Szrj     while (p != list->tasks);
116*38fd1498Szrj }
117*38fd1498Szrj 
118*38fd1498Szrj /* Splay tree version of gomp_clear_parent_in_list.
119*38fd1498Szrj 
120*38fd1498Szrj    Clear the parent field of every task in NODE within SP, and free
121*38fd1498Szrj    the node when done.  */
122*38fd1498Szrj 
123*38fd1498Szrj static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)124*38fd1498Szrj gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125*38fd1498Szrj {
126*38fd1498Szrj   if (!node)
127*38fd1498Szrj     return;
128*38fd1498Szrj   prio_splay_tree_node left = node->left, right = node->right;
129*38fd1498Szrj   gomp_clear_parent_in_list (&node->key.l);
130*38fd1498Szrj #if _LIBGOMP_CHECKING_
131*38fd1498Szrj   memset (node, 0xaf, sizeof (*node));
132*38fd1498Szrj #endif
133*38fd1498Szrj   /* No need to remove the node from the tree.  We're nuking
134*38fd1498Szrj      everything, so just free the nodes and our caller can clear the
135*38fd1498Szrj      entire splay tree.  */
136*38fd1498Szrj   free (node);
137*38fd1498Szrj   gomp_clear_parent_in_tree (sp, left);
138*38fd1498Szrj   gomp_clear_parent_in_tree (sp, right);
139*38fd1498Szrj }
140*38fd1498Szrj 
141*38fd1498Szrj /* Clear the parent field of every task in Q and remove every task
142*38fd1498Szrj    from Q.  */
143*38fd1498Szrj 
144*38fd1498Szrj static inline void
gomp_clear_parent(struct priority_queue * q)145*38fd1498Szrj gomp_clear_parent (struct priority_queue *q)
146*38fd1498Szrj {
147*38fd1498Szrj   if (priority_queue_multi_p (q))
148*38fd1498Szrj     {
149*38fd1498Szrj       gomp_clear_parent_in_tree (&q->t, q->t.root);
150*38fd1498Szrj       /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151*38fd1498Szrj 	 No need to remove anything.  We can just nuke everything.  */
152*38fd1498Szrj       q->t.root = NULL;
153*38fd1498Szrj     }
154*38fd1498Szrj   else
155*38fd1498Szrj     gomp_clear_parent_in_list (&q->l);
156*38fd1498Szrj }
157*38fd1498Szrj 
158*38fd1498Szrj /* Helper function for GOMP_task and gomp_create_target_task.
159*38fd1498Szrj 
160*38fd1498Szrj    For a TASK with in/out dependencies, fill in the various dependency
161*38fd1498Szrj    queues.  PARENT is the parent of said task.  DEPEND is as in
162*38fd1498Szrj    GOMP_task.  */
163*38fd1498Szrj 
164*38fd1498Szrj static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)165*38fd1498Szrj gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166*38fd1498Szrj 			 void **depend)
167*38fd1498Szrj {
168*38fd1498Szrj   size_t ndepend = (uintptr_t) depend[0];
169*38fd1498Szrj   size_t nout = (uintptr_t) depend[1];
170*38fd1498Szrj   size_t i;
171*38fd1498Szrj   hash_entry_type ent;
172*38fd1498Szrj 
173*38fd1498Szrj   task->depend_count = ndepend;
174*38fd1498Szrj   task->num_dependees = 0;
175*38fd1498Szrj   if (parent->depend_hash == NULL)
176*38fd1498Szrj     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177*38fd1498Szrj   for (i = 0; i < ndepend; i++)
178*38fd1498Szrj     {
179*38fd1498Szrj       task->depend[i].addr = depend[2 + i];
180*38fd1498Szrj       task->depend[i].next = NULL;
181*38fd1498Szrj       task->depend[i].prev = NULL;
182*38fd1498Szrj       task->depend[i].task = task;
183*38fd1498Szrj       task->depend[i].is_in = i >= nout;
184*38fd1498Szrj       task->depend[i].redundant = false;
185*38fd1498Szrj       task->depend[i].redundant_out = false;
186*38fd1498Szrj 
187*38fd1498Szrj       hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188*38fd1498Szrj 					      &task->depend[i], INSERT);
189*38fd1498Szrj       hash_entry_type out = NULL, last = NULL;
190*38fd1498Szrj       if (*slot)
191*38fd1498Szrj 	{
192*38fd1498Szrj 	  /* If multiple depends on the same task are the same, all but the
193*38fd1498Szrj 	     first one are redundant.  As inout/out come first, if any of them
194*38fd1498Szrj 	     is inout/out, it will win, which is the right semantics.  */
195*38fd1498Szrj 	  if ((*slot)->task == task)
196*38fd1498Szrj 	    {
197*38fd1498Szrj 	      task->depend[i].redundant = true;
198*38fd1498Szrj 	      continue;
199*38fd1498Szrj 	    }
200*38fd1498Szrj 	  for (ent = *slot; ent; ent = ent->next)
201*38fd1498Szrj 	    {
202*38fd1498Szrj 	      if (ent->redundant_out)
203*38fd1498Szrj 		break;
204*38fd1498Szrj 
205*38fd1498Szrj 	      last = ent;
206*38fd1498Szrj 
207*38fd1498Szrj 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
208*38fd1498Szrj 	      if (i >= nout && ent->is_in)
209*38fd1498Szrj 		continue;
210*38fd1498Szrj 
211*38fd1498Szrj 	      if (!ent->is_in)
212*38fd1498Szrj 		out = ent;
213*38fd1498Szrj 
214*38fd1498Szrj 	      struct gomp_task *tsk = ent->task;
215*38fd1498Szrj 	      if (tsk->dependers == NULL)
216*38fd1498Szrj 		{
217*38fd1498Szrj 		  tsk->dependers
218*38fd1498Szrj 		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
219*38fd1498Szrj 				   + 6 * sizeof (struct gomp_task *));
220*38fd1498Szrj 		  tsk->dependers->n_elem = 1;
221*38fd1498Szrj 		  tsk->dependers->allocated = 6;
222*38fd1498Szrj 		  tsk->dependers->elem[0] = task;
223*38fd1498Szrj 		  task->num_dependees++;
224*38fd1498Szrj 		  continue;
225*38fd1498Szrj 		}
226*38fd1498Szrj 	      /* We already have some other dependency on tsk from earlier
227*38fd1498Szrj 		 depend clause.  */
228*38fd1498Szrj 	      else if (tsk->dependers->n_elem
229*38fd1498Szrj 		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230*38fd1498Szrj 			   == task))
231*38fd1498Szrj 		continue;
232*38fd1498Szrj 	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
233*38fd1498Szrj 		{
234*38fd1498Szrj 		  tsk->dependers->allocated
235*38fd1498Szrj 		    = tsk->dependers->allocated * 2 + 2;
236*38fd1498Szrj 		  tsk->dependers
237*38fd1498Szrj 		    = gomp_realloc (tsk->dependers,
238*38fd1498Szrj 				    sizeof (struct gomp_dependers_vec)
239*38fd1498Szrj 				    + (tsk->dependers->allocated
240*38fd1498Szrj 				       * sizeof (struct gomp_task *)));
241*38fd1498Szrj 		}
242*38fd1498Szrj 	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243*38fd1498Szrj 	      task->num_dependees++;
244*38fd1498Szrj 	    }
245*38fd1498Szrj 	  task->depend[i].next = *slot;
246*38fd1498Szrj 	  (*slot)->prev = &task->depend[i];
247*38fd1498Szrj 	}
248*38fd1498Szrj       *slot = &task->depend[i];
249*38fd1498Szrj 
250*38fd1498Szrj       /* There is no need to store more than one depend({,in}out:) task per
251*38fd1498Szrj 	 address in the hash table chain for the purpose of creation of
252*38fd1498Szrj 	 deferred tasks, because each out depends on all earlier outs, thus it
253*38fd1498Szrj 	 is enough to record just the last depend({,in}out:).  For depend(in:),
254*38fd1498Szrj 	 we need to keep all of the previous ones not terminated yet, because
255*38fd1498Szrj 	 a later depend({,in}out:) might need to depend on all of them.  So, if
256*38fd1498Szrj 	 the new task's clause is depend({,in}out:), we know there is at most
257*38fd1498Szrj 	 one other depend({,in}out:) clause in the list (out).  For
258*38fd1498Szrj 	 non-deferred tasks we want to see all outs, so they are moved to the
259*38fd1498Szrj 	 end of the chain, after first redundant_out entry all following
260*38fd1498Szrj 	 entries should be redundant_out.  */
261*38fd1498Szrj       if (!task->depend[i].is_in && out)
262*38fd1498Szrj 	{
263*38fd1498Szrj 	  if (out != last)
264*38fd1498Szrj 	    {
265*38fd1498Szrj 	      out->next->prev = out->prev;
266*38fd1498Szrj 	      out->prev->next = out->next;
267*38fd1498Szrj 	      out->next = last->next;
268*38fd1498Szrj 	      out->prev = last;
269*38fd1498Szrj 	      last->next = out;
270*38fd1498Szrj 	      if (out->next)
271*38fd1498Szrj 		out->next->prev = out;
272*38fd1498Szrj 	    }
273*38fd1498Szrj 	  out->redundant_out = true;
274*38fd1498Szrj 	}
275*38fd1498Szrj     }
276*38fd1498Szrj }
277*38fd1498Szrj 
278*38fd1498Szrj /* Called when encountering an explicit task directive.  If IF_CLAUSE is
279*38fd1498Szrj    false, then we must not delay in executing the task.  If UNTIED is true,
280*38fd1498Szrj    then the task may be executed by any member of the team.
281*38fd1498Szrj 
282*38fd1498Szrj    DEPEND is an array containing:
283*38fd1498Szrj 	depend[0]: number of depend elements.
284*38fd1498Szrj 	depend[1]: number of depend elements of type "out".
285*38fd1498Szrj 	depend[2..N+1]: address of [1..N]th depend element.  */
286*38fd1498Szrj 
287*38fd1498Szrj void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)288*38fd1498Szrj GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289*38fd1498Szrj 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
290*38fd1498Szrj 	   void **depend, int priority)
291*38fd1498Szrj {
292*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
293*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
294*38fd1498Szrj 
295*38fd1498Szrj #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296*38fd1498Szrj   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297*38fd1498Szrj      tied to one thread all the time.  This means UNTIED tasks must be
298*38fd1498Szrj      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299*38fd1498Szrj      might be running on different thread than FN.  */
300*38fd1498Szrj   if (cpyfn)
301*38fd1498Szrj     if_clause = false;
302*38fd1498Szrj   flags &= ~GOMP_TASK_FLAG_UNTIED;
303*38fd1498Szrj #endif
304*38fd1498Szrj 
305*38fd1498Szrj   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
306*38fd1498Szrj   if (team
307*38fd1498Szrj       && (gomp_team_barrier_cancelled (&team->barrier)
308*38fd1498Szrj 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309*38fd1498Szrj     return;
310*38fd1498Szrj 
311*38fd1498Szrj   if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312*38fd1498Szrj     priority = 0;
313*38fd1498Szrj   else if (priority > gomp_max_task_priority_var)
314*38fd1498Szrj     priority = gomp_max_task_priority_var;
315*38fd1498Szrj 
316*38fd1498Szrj   if (!if_clause || team == NULL
317*38fd1498Szrj       || (thr->task && thr->task->final_task)
318*38fd1498Szrj       || team->task_count > 64 * team->nthreads)
319*38fd1498Szrj     {
320*38fd1498Szrj       struct gomp_task task;
321*38fd1498Szrj 
322*38fd1498Szrj       /* If there are depend clauses and earlier deferred sibling tasks
323*38fd1498Szrj 	 with depend clauses, check if there isn't a dependency.  If there
324*38fd1498Szrj 	 is, we need to wait for them.  There is no need to handle
325*38fd1498Szrj 	 depend clauses for non-deferred tasks other than this, because
326*38fd1498Szrj 	 the parent task is suspended until the child task finishes and thus
327*38fd1498Szrj 	 it can't start further child tasks.  */
328*38fd1498Szrj       if ((flags & GOMP_TASK_FLAG_DEPEND)
329*38fd1498Szrj 	  && thr->task && thr->task->depend_hash)
330*38fd1498Szrj 	gomp_task_maybe_wait_for_dependencies (depend);
331*38fd1498Szrj 
332*38fd1498Szrj       gomp_init_task (&task, thr->task, gomp_icv (false));
333*38fd1498Szrj       task.kind = GOMP_TASK_UNDEFERRED;
334*38fd1498Szrj       task.final_task = (thr->task && thr->task->final_task)
335*38fd1498Szrj 			|| (flags & GOMP_TASK_FLAG_FINAL);
336*38fd1498Szrj       task.priority = priority;
337*38fd1498Szrj       if (thr->task)
338*38fd1498Szrj 	{
339*38fd1498Szrj 	  task.in_tied_task = thr->task->in_tied_task;
340*38fd1498Szrj 	  task.taskgroup = thr->task->taskgroup;
341*38fd1498Szrj 	}
342*38fd1498Szrj       thr->task = &task;
343*38fd1498Szrj       if (__builtin_expect (cpyfn != NULL, 0))
344*38fd1498Szrj 	{
345*38fd1498Szrj 	  char buf[arg_size + arg_align - 1];
346*38fd1498Szrj 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347*38fd1498Szrj 				& ~(uintptr_t) (arg_align - 1));
348*38fd1498Szrj 	  cpyfn (arg, data);
349*38fd1498Szrj 	  fn (arg);
350*38fd1498Szrj 	}
351*38fd1498Szrj       else
352*38fd1498Szrj 	fn (data);
353*38fd1498Szrj       /* Access to "children" is normally done inside a task_lock
354*38fd1498Szrj 	 mutex region, but the only way this particular task.children
355*38fd1498Szrj 	 can be set is if this thread's task work function (fn)
356*38fd1498Szrj 	 creates children.  So since the setter is *this* thread, we
357*38fd1498Szrj 	 need no barriers here when testing for non-NULL.  We can have
358*38fd1498Szrj 	 task.children set by the current thread then changed by a
359*38fd1498Szrj 	 child thread, but seeing a stale non-NULL value is not a
360*38fd1498Szrj 	 problem.  Once past the task_lock acquisition, this thread
361*38fd1498Szrj 	 will see the real value of task.children.  */
362*38fd1498Szrj       if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
363*38fd1498Szrj 	{
364*38fd1498Szrj 	  gomp_mutex_lock (&team->task_lock);
365*38fd1498Szrj 	  gomp_clear_parent (&task.children_queue);
366*38fd1498Szrj 	  gomp_mutex_unlock (&team->task_lock);
367*38fd1498Szrj 	}
368*38fd1498Szrj       gomp_end_task ();
369*38fd1498Szrj     }
370*38fd1498Szrj   else
371*38fd1498Szrj     {
372*38fd1498Szrj       struct gomp_task *task;
373*38fd1498Szrj       struct gomp_task *parent = thr->task;
374*38fd1498Szrj       struct gomp_taskgroup *taskgroup = parent->taskgroup;
375*38fd1498Szrj       char *arg;
376*38fd1498Szrj       bool do_wake;
377*38fd1498Szrj       size_t depend_size = 0;
378*38fd1498Szrj 
379*38fd1498Szrj       if (flags & GOMP_TASK_FLAG_DEPEND)
380*38fd1498Szrj 	depend_size = ((uintptr_t) depend[0]
381*38fd1498Szrj 		       * sizeof (struct gomp_task_depend_entry));
382*38fd1498Szrj       task = gomp_malloc (sizeof (*task) + depend_size
383*38fd1498Szrj 			  + arg_size + arg_align - 1);
384*38fd1498Szrj       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385*38fd1498Szrj 		      & ~(uintptr_t) (arg_align - 1));
386*38fd1498Szrj       gomp_init_task (task, parent, gomp_icv (false));
387*38fd1498Szrj       task->priority = priority;
388*38fd1498Szrj       task->kind = GOMP_TASK_UNDEFERRED;
389*38fd1498Szrj       task->in_tied_task = parent->in_tied_task;
390*38fd1498Szrj       task->taskgroup = taskgroup;
391*38fd1498Szrj       thr->task = task;
392*38fd1498Szrj       if (cpyfn)
393*38fd1498Szrj 	{
394*38fd1498Szrj 	  cpyfn (arg, data);
395*38fd1498Szrj 	  task->copy_ctors_done = true;
396*38fd1498Szrj 	}
397*38fd1498Szrj       else
398*38fd1498Szrj 	memcpy (arg, data, arg_size);
399*38fd1498Szrj       thr->task = parent;
400*38fd1498Szrj       task->kind = GOMP_TASK_WAITING;
401*38fd1498Szrj       task->fn = fn;
402*38fd1498Szrj       task->fn_data = arg;
403*38fd1498Szrj       task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404*38fd1498Szrj       gomp_mutex_lock (&team->task_lock);
405*38fd1498Szrj       /* If parallel or taskgroup has been cancelled, don't start new
406*38fd1498Szrj 	 tasks.  */
407*38fd1498Szrj       if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408*38fd1498Szrj 			     || (taskgroup && taskgroup->cancelled))
409*38fd1498Szrj 			    && !task->copy_ctors_done, 0))
410*38fd1498Szrj 	{
411*38fd1498Szrj 	  gomp_mutex_unlock (&team->task_lock);
412*38fd1498Szrj 	  gomp_finish_task (task);
413*38fd1498Szrj 	  free (task);
414*38fd1498Szrj 	  return;
415*38fd1498Szrj 	}
416*38fd1498Szrj       if (taskgroup)
417*38fd1498Szrj 	taskgroup->num_children++;
418*38fd1498Szrj       if (depend_size)
419*38fd1498Szrj 	{
420*38fd1498Szrj 	  gomp_task_handle_depend (task, parent, depend);
421*38fd1498Szrj 	  if (task->num_dependees)
422*38fd1498Szrj 	    {
423*38fd1498Szrj 	      /* Tasks that depend on other tasks are not put into the
424*38fd1498Szrj 		 various waiting queues, so we are done for now.  Said
425*38fd1498Szrj 		 tasks are instead put into the queues via
426*38fd1498Szrj 		 gomp_task_run_post_handle_dependers() after their
427*38fd1498Szrj 		 dependencies have been satisfied.  After which, they
428*38fd1498Szrj 		 can be picked up by the various scheduling
429*38fd1498Szrj 		 points.  */
430*38fd1498Szrj 	      gomp_mutex_unlock (&team->task_lock);
431*38fd1498Szrj 	      return;
432*38fd1498Szrj 	    }
433*38fd1498Szrj 	}
434*38fd1498Szrj 
435*38fd1498Szrj       priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436*38fd1498Szrj 			     task, priority,
437*38fd1498Szrj 			     PRIORITY_INSERT_BEGIN,
438*38fd1498Szrj 			     /*adjust_parent_depends_on=*/false,
439*38fd1498Szrj 			     task->parent_depends_on);
440*38fd1498Szrj       if (taskgroup)
441*38fd1498Szrj 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442*38fd1498Szrj 			       task, priority,
443*38fd1498Szrj 			       PRIORITY_INSERT_BEGIN,
444*38fd1498Szrj 			       /*adjust_parent_depends_on=*/false,
445*38fd1498Szrj 			       task->parent_depends_on);
446*38fd1498Szrj 
447*38fd1498Szrj       priority_queue_insert (PQ_TEAM, &team->task_queue,
448*38fd1498Szrj 			     task, priority,
449*38fd1498Szrj 			     PRIORITY_INSERT_END,
450*38fd1498Szrj 			     /*adjust_parent_depends_on=*/false,
451*38fd1498Szrj 			     task->parent_depends_on);
452*38fd1498Szrj 
453*38fd1498Szrj       ++team->task_count;
454*38fd1498Szrj       ++team->task_queued_count;
455*38fd1498Szrj       gomp_team_barrier_set_task_pending (&team->barrier);
456*38fd1498Szrj       do_wake = team->task_running_count + !parent->in_tied_task
457*38fd1498Szrj 		< team->nthreads;
458*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
459*38fd1498Szrj       if (do_wake)
460*38fd1498Szrj 	gomp_team_barrier_wake (&team->barrier, 1);
461*38fd1498Szrj     }
462*38fd1498Szrj }
463*38fd1498Szrj 
464*38fd1498Szrj ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)465*38fd1498Szrj ialias (GOMP_taskgroup_end)
466*38fd1498Szrj 
467*38fd1498Szrj #define TYPE long
468*38fd1498Szrj #define UTYPE unsigned long
469*38fd1498Szrj #define TYPE_is_long 1
470*38fd1498Szrj #include "taskloop.c"
471*38fd1498Szrj #undef TYPE
472*38fd1498Szrj #undef UTYPE
473*38fd1498Szrj #undef TYPE_is_long
474*38fd1498Szrj 
475*38fd1498Szrj #define TYPE unsigned long long
476*38fd1498Szrj #define UTYPE TYPE
477*38fd1498Szrj #define GOMP_taskloop GOMP_taskloop_ull
478*38fd1498Szrj #include "taskloop.c"
479*38fd1498Szrj #undef TYPE
480*38fd1498Szrj #undef UTYPE
481*38fd1498Szrj #undef GOMP_taskloop
482*38fd1498Szrj 
483*38fd1498Szrj static void inline
484*38fd1498Szrj priority_queue_move_task_first (enum priority_queue_type type,
485*38fd1498Szrj 				struct priority_queue *head,
486*38fd1498Szrj 				struct gomp_task *task)
487*38fd1498Szrj {
488*38fd1498Szrj #if _LIBGOMP_CHECKING_
489*38fd1498Szrj   if (!priority_queue_task_in_queue_p (type, head, task))
490*38fd1498Szrj     gomp_fatal ("Attempt to move first missing task %p", task);
491*38fd1498Szrj #endif
492*38fd1498Szrj   struct priority_list *list;
493*38fd1498Szrj   if (priority_queue_multi_p (head))
494*38fd1498Szrj     {
495*38fd1498Szrj       list = priority_queue_lookup_priority (head, task->priority);
496*38fd1498Szrj #if _LIBGOMP_CHECKING_
497*38fd1498Szrj       if (!list)
498*38fd1498Szrj 	gomp_fatal ("Unable to find priority %d", task->priority);
499*38fd1498Szrj #endif
500*38fd1498Szrj     }
501*38fd1498Szrj   else
502*38fd1498Szrj     list = &head->l;
503*38fd1498Szrj   priority_list_remove (list, task_to_priority_node (type, task), 0);
504*38fd1498Szrj   priority_list_insert (type, list, task, task->priority,
505*38fd1498Szrj 			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506*38fd1498Szrj 			task->parent_depends_on);
507*38fd1498Szrj }
508*38fd1498Szrj 
509*38fd1498Szrj /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510*38fd1498Szrj    with team->task_lock held, or is executed in the thread that called
511*38fd1498Szrj    gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512*38fd1498Szrj    run before it acquires team->task_lock.  */
513*38fd1498Szrj 
514*38fd1498Szrj static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)515*38fd1498Szrj gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
516*38fd1498Szrj {
517*38fd1498Szrj   struct gomp_task *parent = task->parent;
518*38fd1498Szrj   if (parent)
519*38fd1498Szrj     priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520*38fd1498Szrj 				    task);
521*38fd1498Szrj 
522*38fd1498Szrj   struct gomp_taskgroup *taskgroup = task->taskgroup;
523*38fd1498Szrj   if (taskgroup)
524*38fd1498Szrj     priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525*38fd1498Szrj 				    task);
526*38fd1498Szrj 
527*38fd1498Szrj   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528*38fd1498Szrj 			 PRIORITY_INSERT_BEGIN, false,
529*38fd1498Szrj 			 task->parent_depends_on);
530*38fd1498Szrj   task->kind = GOMP_TASK_WAITING;
531*38fd1498Szrj   if (parent && parent->taskwait)
532*38fd1498Szrj     {
533*38fd1498Szrj       if (parent->taskwait->in_taskwait)
534*38fd1498Szrj 	{
535*38fd1498Szrj 	  /* One more task has had its dependencies met.
536*38fd1498Szrj 	     Inform any waiters.  */
537*38fd1498Szrj 	  parent->taskwait->in_taskwait = false;
538*38fd1498Szrj 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
539*38fd1498Szrj 	}
540*38fd1498Szrj       else if (parent->taskwait->in_depend_wait)
541*38fd1498Szrj 	{
542*38fd1498Szrj 	  /* One more task has had its dependencies met.
543*38fd1498Szrj 	     Inform any waiters.  */
544*38fd1498Szrj 	  parent->taskwait->in_depend_wait = false;
545*38fd1498Szrj 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
546*38fd1498Szrj 	}
547*38fd1498Szrj     }
548*38fd1498Szrj   if (taskgroup && taskgroup->in_taskgroup_wait)
549*38fd1498Szrj     {
550*38fd1498Szrj       /* One more task has had its dependencies met.
551*38fd1498Szrj 	 Inform any waiters.  */
552*38fd1498Szrj       taskgroup->in_taskgroup_wait = false;
553*38fd1498Szrj       gomp_sem_post (&taskgroup->taskgroup_sem);
554*38fd1498Szrj     }
555*38fd1498Szrj 
556*38fd1498Szrj   ++team->task_queued_count;
557*38fd1498Szrj   gomp_team_barrier_set_task_pending (&team->barrier);
558*38fd1498Szrj   /* I'm afraid this can't be done after releasing team->task_lock,
559*38fd1498Szrj      as gomp_target_task_completion is run from unrelated thread and
560*38fd1498Szrj      therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561*38fd1498Szrj      the team could be gone already.  */
562*38fd1498Szrj   if (team->nthreads > team->task_running_count)
563*38fd1498Szrj     gomp_team_barrier_wake (&team->barrier, 1);
564*38fd1498Szrj }
565*38fd1498Szrj 
566*38fd1498Szrj /* Signal that a target task TTASK has completed the asynchronously
567*38fd1498Szrj    running phase and should be requeued as a task to handle the
568*38fd1498Szrj    variable unmapping.  */
569*38fd1498Szrj 
570*38fd1498Szrj void
GOMP_PLUGIN_target_task_completion(void * data)571*38fd1498Szrj GOMP_PLUGIN_target_task_completion (void *data)
572*38fd1498Szrj {
573*38fd1498Szrj   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574*38fd1498Szrj   struct gomp_task *task = ttask->task;
575*38fd1498Szrj   struct gomp_team *team = ttask->team;
576*38fd1498Szrj 
577*38fd1498Szrj   gomp_mutex_lock (&team->task_lock);
578*38fd1498Szrj   if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
579*38fd1498Szrj     {
580*38fd1498Szrj       ttask->state = GOMP_TARGET_TASK_FINISHED;
581*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
582*38fd1498Szrj       return;
583*38fd1498Szrj     }
584*38fd1498Szrj   ttask->state = GOMP_TARGET_TASK_FINISHED;
585*38fd1498Szrj   gomp_target_task_completion (team, task);
586*38fd1498Szrj   gomp_mutex_unlock (&team->task_lock);
587*38fd1498Szrj }
588*38fd1498Szrj 
589*38fd1498Szrj static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
590*38fd1498Szrj 
591*38fd1498Szrj /* Called for nowait target tasks.  */
592*38fd1498Szrj 
593*38fd1498Szrj bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)594*38fd1498Szrj gomp_create_target_task (struct gomp_device_descr *devicep,
595*38fd1498Szrj 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
596*38fd1498Szrj 			 size_t *sizes, unsigned short *kinds,
597*38fd1498Szrj 			 unsigned int flags, void **depend, void **args,
598*38fd1498Szrj 			 enum gomp_target_task_state state)
599*38fd1498Szrj {
600*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
601*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
602*38fd1498Szrj 
603*38fd1498Szrj   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
604*38fd1498Szrj   if (team
605*38fd1498Szrj       && (gomp_team_barrier_cancelled (&team->barrier)
606*38fd1498Szrj 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
607*38fd1498Szrj     return true;
608*38fd1498Szrj 
609*38fd1498Szrj   struct gomp_target_task *ttask;
610*38fd1498Szrj   struct gomp_task *task;
611*38fd1498Szrj   struct gomp_task *parent = thr->task;
612*38fd1498Szrj   struct gomp_taskgroup *taskgroup = parent->taskgroup;
613*38fd1498Szrj   bool do_wake;
614*38fd1498Szrj   size_t depend_size = 0;
615*38fd1498Szrj   uintptr_t depend_cnt = 0;
616*38fd1498Szrj   size_t tgt_align = 0, tgt_size = 0;
617*38fd1498Szrj 
618*38fd1498Szrj   if (depend != NULL)
619*38fd1498Szrj     {
620*38fd1498Szrj       depend_cnt = (uintptr_t) depend[0];
621*38fd1498Szrj       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
622*38fd1498Szrj     }
623*38fd1498Szrj   if (fn)
624*38fd1498Szrj     {
625*38fd1498Szrj       /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
626*38fd1498Szrj 	 firstprivate on the target task.  */
627*38fd1498Szrj       size_t i;
628*38fd1498Szrj       for (i = 0; i < mapnum; i++)
629*38fd1498Szrj 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
630*38fd1498Szrj 	  {
631*38fd1498Szrj 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
632*38fd1498Szrj 	    if (tgt_align < align)
633*38fd1498Szrj 	      tgt_align = align;
634*38fd1498Szrj 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
635*38fd1498Szrj 	    tgt_size += sizes[i];
636*38fd1498Szrj 	  }
637*38fd1498Szrj       if (tgt_align)
638*38fd1498Szrj 	tgt_size += tgt_align - 1;
639*38fd1498Szrj       else
640*38fd1498Szrj 	tgt_size = 0;
641*38fd1498Szrj     }
642*38fd1498Szrj 
643*38fd1498Szrj   task = gomp_malloc (sizeof (*task) + depend_size
644*38fd1498Szrj 		      + sizeof (*ttask)
645*38fd1498Szrj 		      + mapnum * (sizeof (void *) + sizeof (size_t)
646*38fd1498Szrj 				  + sizeof (unsigned short))
647*38fd1498Szrj 		      + tgt_size);
648*38fd1498Szrj   gomp_init_task (task, parent, gomp_icv (false));
649*38fd1498Szrj   task->priority = 0;
650*38fd1498Szrj   task->kind = GOMP_TASK_WAITING;
651*38fd1498Szrj   task->in_tied_task = parent->in_tied_task;
652*38fd1498Szrj   task->taskgroup = taskgroup;
653*38fd1498Szrj   ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
654*38fd1498Szrj   ttask->devicep = devicep;
655*38fd1498Szrj   ttask->fn = fn;
656*38fd1498Szrj   ttask->mapnum = mapnum;
657*38fd1498Szrj   ttask->args = args;
658*38fd1498Szrj   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
659*38fd1498Szrj   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
660*38fd1498Szrj   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
661*38fd1498Szrj   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
662*38fd1498Szrj   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
663*38fd1498Szrj   if (tgt_align)
664*38fd1498Szrj     {
665*38fd1498Szrj       char *tgt = (char *) &ttask->kinds[mapnum];
666*38fd1498Szrj       size_t i;
667*38fd1498Szrj       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
668*38fd1498Szrj       if (al)
669*38fd1498Szrj 	tgt += tgt_align - al;
670*38fd1498Szrj       tgt_size = 0;
671*38fd1498Szrj       for (i = 0; i < mapnum; i++)
672*38fd1498Szrj 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
673*38fd1498Szrj 	  {
674*38fd1498Szrj 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
675*38fd1498Szrj 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
676*38fd1498Szrj 	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
677*38fd1498Szrj 	    ttask->hostaddrs[i] = tgt + tgt_size;
678*38fd1498Szrj 	    tgt_size = tgt_size + sizes[i];
679*38fd1498Szrj 	  }
680*38fd1498Szrj     }
681*38fd1498Szrj   ttask->flags = flags;
682*38fd1498Szrj   ttask->state = state;
683*38fd1498Szrj   ttask->task = task;
684*38fd1498Szrj   ttask->team = team;
685*38fd1498Szrj   task->fn = NULL;
686*38fd1498Szrj   task->fn_data = ttask;
687*38fd1498Szrj   task->final_task = 0;
688*38fd1498Szrj   gomp_mutex_lock (&team->task_lock);
689*38fd1498Szrj   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
690*38fd1498Szrj   if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
691*38fd1498Szrj 			|| (taskgroup && taskgroup->cancelled), 0))
692*38fd1498Szrj     {
693*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
694*38fd1498Szrj       gomp_finish_task (task);
695*38fd1498Szrj       free (task);
696*38fd1498Szrj       return true;
697*38fd1498Szrj     }
698*38fd1498Szrj   if (depend_size)
699*38fd1498Szrj     {
700*38fd1498Szrj       gomp_task_handle_depend (task, parent, depend);
701*38fd1498Szrj       if (task->num_dependees)
702*38fd1498Szrj 	{
703*38fd1498Szrj 	  if (taskgroup)
704*38fd1498Szrj 	    taskgroup->num_children++;
705*38fd1498Szrj 	  gomp_mutex_unlock (&team->task_lock);
706*38fd1498Szrj 	  return true;
707*38fd1498Szrj 	}
708*38fd1498Szrj     }
709*38fd1498Szrj   if (state == GOMP_TARGET_TASK_DATA)
710*38fd1498Szrj     {
711*38fd1498Szrj       gomp_task_run_post_handle_depend_hash (task);
712*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
713*38fd1498Szrj       gomp_finish_task (task);
714*38fd1498Szrj       free (task);
715*38fd1498Szrj       return false;
716*38fd1498Szrj     }
717*38fd1498Szrj   if (taskgroup)
718*38fd1498Szrj     taskgroup->num_children++;
719*38fd1498Szrj   /* For async offloading, if we don't need to wait for dependencies,
720*38fd1498Szrj      run the gomp_target_task_fn right away, essentially schedule the
721*38fd1498Szrj      mapping part of the task in the current thread.  */
722*38fd1498Szrj   if (devicep != NULL
723*38fd1498Szrj       && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
724*38fd1498Szrj     {
725*38fd1498Szrj       priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
726*38fd1498Szrj 			     PRIORITY_INSERT_END,
727*38fd1498Szrj 			     /*adjust_parent_depends_on=*/false,
728*38fd1498Szrj 			     task->parent_depends_on);
729*38fd1498Szrj       if (taskgroup)
730*38fd1498Szrj 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
731*38fd1498Szrj 			       task, 0, PRIORITY_INSERT_END,
732*38fd1498Szrj 			       /*adjust_parent_depends_on=*/false,
733*38fd1498Szrj 			       task->parent_depends_on);
734*38fd1498Szrj       task->pnode[PQ_TEAM].next = NULL;
735*38fd1498Szrj       task->pnode[PQ_TEAM].prev = NULL;
736*38fd1498Szrj       task->kind = GOMP_TASK_TIED;
737*38fd1498Szrj       ++team->task_count;
738*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
739*38fd1498Szrj 
740*38fd1498Szrj       thr->task = task;
741*38fd1498Szrj       gomp_target_task_fn (task->fn_data);
742*38fd1498Szrj       thr->task = parent;
743*38fd1498Szrj 
744*38fd1498Szrj       gomp_mutex_lock (&team->task_lock);
745*38fd1498Szrj       task->kind = GOMP_TASK_ASYNC_RUNNING;
746*38fd1498Szrj       /* If GOMP_PLUGIN_target_task_completion has run already
747*38fd1498Szrj 	 in between gomp_target_task_fn and the mutex lock,
748*38fd1498Szrj 	 perform the requeuing here.  */
749*38fd1498Szrj       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
750*38fd1498Szrj 	gomp_target_task_completion (team, task);
751*38fd1498Szrj       else
752*38fd1498Szrj 	ttask->state = GOMP_TARGET_TASK_RUNNING;
753*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
754*38fd1498Szrj       return true;
755*38fd1498Szrj     }
756*38fd1498Szrj   priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
757*38fd1498Szrj 			 PRIORITY_INSERT_BEGIN,
758*38fd1498Szrj 			 /*adjust_parent_depends_on=*/false,
759*38fd1498Szrj 			 task->parent_depends_on);
760*38fd1498Szrj   if (taskgroup)
761*38fd1498Szrj     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
762*38fd1498Szrj 			   PRIORITY_INSERT_BEGIN,
763*38fd1498Szrj 			   /*adjust_parent_depends_on=*/false,
764*38fd1498Szrj 			   task->parent_depends_on);
765*38fd1498Szrj   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
766*38fd1498Szrj 			 PRIORITY_INSERT_END,
767*38fd1498Szrj 			 /*adjust_parent_depends_on=*/false,
768*38fd1498Szrj 			 task->parent_depends_on);
769*38fd1498Szrj   ++team->task_count;
770*38fd1498Szrj   ++team->task_queued_count;
771*38fd1498Szrj   gomp_team_barrier_set_task_pending (&team->barrier);
772*38fd1498Szrj   do_wake = team->task_running_count + !parent->in_tied_task
773*38fd1498Szrj 	    < team->nthreads;
774*38fd1498Szrj   gomp_mutex_unlock (&team->task_lock);
775*38fd1498Szrj   if (do_wake)
776*38fd1498Szrj     gomp_team_barrier_wake (&team->barrier, 1);
777*38fd1498Szrj   return true;
778*38fd1498Szrj }
779*38fd1498Szrj 
780*38fd1498Szrj /* Given a parent_depends_on task in LIST, move it to the front of its
781*38fd1498Szrj    priority so it is run as soon as possible.
782*38fd1498Szrj 
783*38fd1498Szrj    Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
784*38fd1498Szrj 
785*38fd1498Szrj    We rearrange the queue such that all parent_depends_on tasks are
786*38fd1498Szrj    first, and last_parent_depends_on points to the last such task we
787*38fd1498Szrj    rearranged.  For example, given the following tasks in a queue
788*38fd1498Szrj    where PD[123] are the parent_depends_on tasks:
789*38fd1498Szrj 
790*38fd1498Szrj 	task->children
791*38fd1498Szrj 	|
792*38fd1498Szrj 	V
793*38fd1498Szrj 	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
794*38fd1498Szrj 
795*38fd1498Szrj 	We rearrange such that:
796*38fd1498Szrj 
797*38fd1498Szrj 	task->children
798*38fd1498Szrj 	|	       +--- last_parent_depends_on
799*38fd1498Szrj 	|	       |
800*38fd1498Szrj 	V	       V
801*38fd1498Szrj 	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
802*38fd1498Szrj 
803*38fd1498Szrj static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)804*38fd1498Szrj priority_list_upgrade_task (struct priority_list *list,
805*38fd1498Szrj 			    struct priority_node *node)
806*38fd1498Szrj {
807*38fd1498Szrj   struct priority_node *last_parent_depends_on
808*38fd1498Szrj     = list->last_parent_depends_on;
809*38fd1498Szrj   if (last_parent_depends_on)
810*38fd1498Szrj     {
811*38fd1498Szrj       node->prev->next = node->next;
812*38fd1498Szrj       node->next->prev = node->prev;
813*38fd1498Szrj       node->prev = last_parent_depends_on;
814*38fd1498Szrj       node->next = last_parent_depends_on->next;
815*38fd1498Szrj       node->prev->next = node;
816*38fd1498Szrj       node->next->prev = node;
817*38fd1498Szrj     }
818*38fd1498Szrj   else if (node != list->tasks)
819*38fd1498Szrj     {
820*38fd1498Szrj       node->prev->next = node->next;
821*38fd1498Szrj       node->next->prev = node->prev;
822*38fd1498Szrj       node->prev = list->tasks->prev;
823*38fd1498Szrj       node->next = list->tasks;
824*38fd1498Szrj       list->tasks = node;
825*38fd1498Szrj       node->prev->next = node;
826*38fd1498Szrj       node->next->prev = node;
827*38fd1498Szrj     }
828*38fd1498Szrj   list->last_parent_depends_on = node;
829*38fd1498Szrj }
830*38fd1498Szrj 
831*38fd1498Szrj /* Given a parent_depends_on TASK in its parent's children_queue, move
832*38fd1498Szrj    it to the front of its priority so it is run as soon as possible.
833*38fd1498Szrj 
834*38fd1498Szrj    PARENT is passed as an optimization.
835*38fd1498Szrj 
836*38fd1498Szrj    (This function could be defined in priority_queue.c, but we want it
837*38fd1498Szrj    inlined, and putting it in priority_queue.h is not an option, given
838*38fd1498Szrj    that gomp_task has not been properly defined at that point).  */
839*38fd1498Szrj 
840*38fd1498Szrj static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)841*38fd1498Szrj priority_queue_upgrade_task (struct gomp_task *task,
842*38fd1498Szrj 			     struct gomp_task *parent)
843*38fd1498Szrj {
844*38fd1498Szrj   struct priority_queue *head = &parent->children_queue;
845*38fd1498Szrj   struct priority_node *node = &task->pnode[PQ_CHILDREN];
846*38fd1498Szrj #if _LIBGOMP_CHECKING_
847*38fd1498Szrj   if (!task->parent_depends_on)
848*38fd1498Szrj     gomp_fatal ("priority_queue_upgrade_task: task must be a "
849*38fd1498Szrj 		"parent_depends_on task");
850*38fd1498Szrj   if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
851*38fd1498Szrj     gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
852*38fd1498Szrj #endif
853*38fd1498Szrj   if (priority_queue_multi_p (head))
854*38fd1498Szrj     {
855*38fd1498Szrj       struct priority_list *list
856*38fd1498Szrj 	= priority_queue_lookup_priority (head, task->priority);
857*38fd1498Szrj       priority_list_upgrade_task (list, node);
858*38fd1498Szrj     }
859*38fd1498Szrj   else
860*38fd1498Szrj     priority_list_upgrade_task (&head->l, node);
861*38fd1498Szrj }
862*38fd1498Szrj 
863*38fd1498Szrj /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
864*38fd1498Szrj    the way in LIST so that other tasks can be considered for
865*38fd1498Szrj    execution.  LIST contains tasks of type TYPE.
866*38fd1498Szrj 
867*38fd1498Szrj    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
868*38fd1498Szrj    if applicable.  */
869*38fd1498Szrj 
870*38fd1498Szrj static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)871*38fd1498Szrj priority_list_downgrade_task (enum priority_queue_type type,
872*38fd1498Szrj 			      struct priority_list *list,
873*38fd1498Szrj 			      struct gomp_task *child_task)
874*38fd1498Szrj {
875*38fd1498Szrj   struct priority_node *node = task_to_priority_node (type, child_task);
876*38fd1498Szrj   if (list->tasks == node)
877*38fd1498Szrj     list->tasks = node->next;
878*38fd1498Szrj   else if (node->next != list->tasks)
879*38fd1498Szrj     {
880*38fd1498Szrj       /* The task in NODE is about to become TIED and TIED tasks
881*38fd1498Szrj 	 cannot come before WAITING tasks.  If we're about to
882*38fd1498Szrj 	 leave the queue in such an indeterminate state, rewire
883*38fd1498Szrj 	 things appropriately.  However, a TIED task at the end is
884*38fd1498Szrj 	 perfectly fine.  */
885*38fd1498Szrj       struct gomp_task *next_task = priority_node_to_task (type, node->next);
886*38fd1498Szrj       if (next_task->kind == GOMP_TASK_WAITING)
887*38fd1498Szrj 	{
888*38fd1498Szrj 	  /* Remove from list.  */
889*38fd1498Szrj 	  node->prev->next = node->next;
890*38fd1498Szrj 	  node->next->prev = node->prev;
891*38fd1498Szrj 	  /* Rewire at the end.  */
892*38fd1498Szrj 	  node->next = list->tasks;
893*38fd1498Szrj 	  node->prev = list->tasks->prev;
894*38fd1498Szrj 	  list->tasks->prev->next = node;
895*38fd1498Szrj 	  list->tasks->prev = node;
896*38fd1498Szrj 	}
897*38fd1498Szrj     }
898*38fd1498Szrj 
899*38fd1498Szrj   /* If the current task is the last_parent_depends_on for its
900*38fd1498Szrj      priority, adjust last_parent_depends_on appropriately.  */
901*38fd1498Szrj   if (__builtin_expect (child_task->parent_depends_on, 0)
902*38fd1498Szrj       && list->last_parent_depends_on == node)
903*38fd1498Szrj     {
904*38fd1498Szrj       struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
905*38fd1498Szrj       if (node->prev != node
906*38fd1498Szrj 	  && prev_child->kind == GOMP_TASK_WAITING
907*38fd1498Szrj 	  && prev_child->parent_depends_on)
908*38fd1498Szrj 	list->last_parent_depends_on = node->prev;
909*38fd1498Szrj       else
910*38fd1498Szrj 	{
911*38fd1498Szrj 	  /* There are no more parent_depends_on entries waiting
912*38fd1498Szrj 	     to run, clear the list.  */
913*38fd1498Szrj 	  list->last_parent_depends_on = NULL;
914*38fd1498Szrj 	}
915*38fd1498Szrj     }
916*38fd1498Szrj }
917*38fd1498Szrj 
918*38fd1498Szrj /* Given a TASK in HEAD that is about to be executed, move it out of
919*38fd1498Szrj    the way so that other tasks can be considered for execution.  HEAD
920*38fd1498Szrj    contains tasks of type TYPE.
921*38fd1498Szrj 
922*38fd1498Szrj    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
923*38fd1498Szrj    if applicable.
924*38fd1498Szrj 
925*38fd1498Szrj    (This function could be defined in priority_queue.c, but we want it
926*38fd1498Szrj    inlined, and putting it in priority_queue.h is not an option, given
927*38fd1498Szrj    that gomp_task has not been properly defined at that point).  */
928*38fd1498Szrj 
929*38fd1498Szrj static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)930*38fd1498Szrj priority_queue_downgrade_task (enum priority_queue_type type,
931*38fd1498Szrj 			       struct priority_queue *head,
932*38fd1498Szrj 			       struct gomp_task *task)
933*38fd1498Szrj {
934*38fd1498Szrj #if _LIBGOMP_CHECKING_
935*38fd1498Szrj   if (!priority_queue_task_in_queue_p (type, head, task))
936*38fd1498Szrj     gomp_fatal ("Attempt to downgrade missing task %p", task);
937*38fd1498Szrj #endif
938*38fd1498Szrj   if (priority_queue_multi_p (head))
939*38fd1498Szrj     {
940*38fd1498Szrj       struct priority_list *list
941*38fd1498Szrj 	= priority_queue_lookup_priority (head, task->priority);
942*38fd1498Szrj       priority_list_downgrade_task (type, list, task);
943*38fd1498Szrj     }
944*38fd1498Szrj   else
945*38fd1498Szrj     priority_list_downgrade_task (type, &head->l, task);
946*38fd1498Szrj }
947*38fd1498Szrj 
948*38fd1498Szrj /* Setup CHILD_TASK to execute.  This is done by setting the task to
949*38fd1498Szrj    TIED, and updating all relevant queues so that CHILD_TASK is no
950*38fd1498Szrj    longer chosen for scheduling.  Also, remove CHILD_TASK from the
951*38fd1498Szrj    overall team task queue entirely.
952*38fd1498Szrj 
953*38fd1498Szrj    Return TRUE if task or its containing taskgroup has been
954*38fd1498Szrj    cancelled.  */
955*38fd1498Szrj 
956*38fd1498Szrj static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)957*38fd1498Szrj gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
958*38fd1498Szrj 		   struct gomp_team *team)
959*38fd1498Szrj {
960*38fd1498Szrj #if _LIBGOMP_CHECKING_
961*38fd1498Szrj   if (child_task->parent)
962*38fd1498Szrj     priority_queue_verify (PQ_CHILDREN,
963*38fd1498Szrj 			   &child_task->parent->children_queue, true);
964*38fd1498Szrj   if (child_task->taskgroup)
965*38fd1498Szrj     priority_queue_verify (PQ_TASKGROUP,
966*38fd1498Szrj 			   &child_task->taskgroup->taskgroup_queue, false);
967*38fd1498Szrj   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
968*38fd1498Szrj #endif
969*38fd1498Szrj 
970*38fd1498Szrj   /* Task is about to go tied, move it out of the way.  */
971*38fd1498Szrj   if (parent)
972*38fd1498Szrj     priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
973*38fd1498Szrj 				   child_task);
974*38fd1498Szrj 
975*38fd1498Szrj   /* Task is about to go tied, move it out of the way.  */
976*38fd1498Szrj   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
977*38fd1498Szrj   if (taskgroup)
978*38fd1498Szrj     priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
979*38fd1498Szrj 				   child_task);
980*38fd1498Szrj 
981*38fd1498Szrj   priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
982*38fd1498Szrj 			 MEMMODEL_RELAXED);
983*38fd1498Szrj   child_task->pnode[PQ_TEAM].next = NULL;
984*38fd1498Szrj   child_task->pnode[PQ_TEAM].prev = NULL;
985*38fd1498Szrj   child_task->kind = GOMP_TASK_TIED;
986*38fd1498Szrj 
987*38fd1498Szrj   if (--team->task_queued_count == 0)
988*38fd1498Szrj     gomp_team_barrier_clear_task_pending (&team->barrier);
989*38fd1498Szrj   if ((gomp_team_barrier_cancelled (&team->barrier)
990*38fd1498Szrj        || (taskgroup && taskgroup->cancelled))
991*38fd1498Szrj       && !child_task->copy_ctors_done)
992*38fd1498Szrj     return true;
993*38fd1498Szrj   return false;
994*38fd1498Szrj }
995*38fd1498Szrj 
996*38fd1498Szrj static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)997*38fd1498Szrj gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
998*38fd1498Szrj {
999*38fd1498Szrj   struct gomp_task *parent = child_task->parent;
1000*38fd1498Szrj   size_t i;
1001*38fd1498Szrj 
1002*38fd1498Szrj   for (i = 0; i < child_task->depend_count; i++)
1003*38fd1498Szrj     if (!child_task->depend[i].redundant)
1004*38fd1498Szrj       {
1005*38fd1498Szrj 	if (child_task->depend[i].next)
1006*38fd1498Szrj 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
1007*38fd1498Szrj 	if (child_task->depend[i].prev)
1008*38fd1498Szrj 	  child_task->depend[i].prev->next = child_task->depend[i].next;
1009*38fd1498Szrj 	else
1010*38fd1498Szrj 	  {
1011*38fd1498Szrj 	    hash_entry_type *slot
1012*38fd1498Szrj 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1013*38fd1498Szrj 				NO_INSERT);
1014*38fd1498Szrj 	    if (*slot != &child_task->depend[i])
1015*38fd1498Szrj 	      abort ();
1016*38fd1498Szrj 	    if (child_task->depend[i].next)
1017*38fd1498Szrj 	      *slot = child_task->depend[i].next;
1018*38fd1498Szrj 	    else
1019*38fd1498Szrj 	      htab_clear_slot (parent->depend_hash, slot);
1020*38fd1498Szrj 	  }
1021*38fd1498Szrj       }
1022*38fd1498Szrj }
1023*38fd1498Szrj 
1024*38fd1498Szrj /* After a CHILD_TASK has been run, adjust the dependency queue for
1025*38fd1498Szrj    each task that depends on CHILD_TASK, to record the fact that there
1026*38fd1498Szrj    is one less dependency to worry about.  If a task that depended on
1027*38fd1498Szrj    CHILD_TASK now has no dependencies, place it in the various queues
1028*38fd1498Szrj    so it gets scheduled to run.
1029*38fd1498Szrj 
1030*38fd1498Szrj    TEAM is the team to which CHILD_TASK belongs to.  */
1031*38fd1498Szrj 
1032*38fd1498Szrj static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1033*38fd1498Szrj gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1034*38fd1498Szrj 				     struct gomp_team *team)
1035*38fd1498Szrj {
1036*38fd1498Szrj   struct gomp_task *parent = child_task->parent;
1037*38fd1498Szrj   size_t i, count = child_task->dependers->n_elem, ret = 0;
1038*38fd1498Szrj   for (i = 0; i < count; i++)
1039*38fd1498Szrj     {
1040*38fd1498Szrj       struct gomp_task *task = child_task->dependers->elem[i];
1041*38fd1498Szrj 
1042*38fd1498Szrj       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
1043*38fd1498Szrj 	 TASK's remaining dependencies.  Once TASK has no other
1044*38fd1498Szrj 	 depenencies, put it into the various queues so it will get
1045*38fd1498Szrj 	 scheduled for execution.  */
1046*38fd1498Szrj       if (--task->num_dependees != 0)
1047*38fd1498Szrj 	continue;
1048*38fd1498Szrj 
1049*38fd1498Szrj       struct gomp_taskgroup *taskgroup = task->taskgroup;
1050*38fd1498Szrj       if (parent)
1051*38fd1498Szrj 	{
1052*38fd1498Szrj 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1053*38fd1498Szrj 				 task, task->priority,
1054*38fd1498Szrj 				 PRIORITY_INSERT_BEGIN,
1055*38fd1498Szrj 				 /*adjust_parent_depends_on=*/true,
1056*38fd1498Szrj 				 task->parent_depends_on);
1057*38fd1498Szrj 	  if (parent->taskwait)
1058*38fd1498Szrj 	    {
1059*38fd1498Szrj 	      if (parent->taskwait->in_taskwait)
1060*38fd1498Szrj 		{
1061*38fd1498Szrj 		  /* One more task has had its dependencies met.
1062*38fd1498Szrj 		     Inform any waiters.  */
1063*38fd1498Szrj 		  parent->taskwait->in_taskwait = false;
1064*38fd1498Szrj 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1065*38fd1498Szrj 		}
1066*38fd1498Szrj 	      else if (parent->taskwait->in_depend_wait)
1067*38fd1498Szrj 		{
1068*38fd1498Szrj 		  /* One more task has had its dependencies met.
1069*38fd1498Szrj 		     Inform any waiters.  */
1070*38fd1498Szrj 		  parent->taskwait->in_depend_wait = false;
1071*38fd1498Szrj 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1072*38fd1498Szrj 		}
1073*38fd1498Szrj 	    }
1074*38fd1498Szrj 	}
1075*38fd1498Szrj       if (taskgroup)
1076*38fd1498Szrj 	{
1077*38fd1498Szrj 	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1078*38fd1498Szrj 				 task, task->priority,
1079*38fd1498Szrj 				 PRIORITY_INSERT_BEGIN,
1080*38fd1498Szrj 				 /*adjust_parent_depends_on=*/false,
1081*38fd1498Szrj 				 task->parent_depends_on);
1082*38fd1498Szrj 	  if (taskgroup->in_taskgroup_wait)
1083*38fd1498Szrj 	    {
1084*38fd1498Szrj 	      /* One more task has had its dependencies met.
1085*38fd1498Szrj 		 Inform any waiters.  */
1086*38fd1498Szrj 	      taskgroup->in_taskgroup_wait = false;
1087*38fd1498Szrj 	      gomp_sem_post (&taskgroup->taskgroup_sem);
1088*38fd1498Szrj 	    }
1089*38fd1498Szrj 	}
1090*38fd1498Szrj       priority_queue_insert (PQ_TEAM, &team->task_queue,
1091*38fd1498Szrj 			     task, task->priority,
1092*38fd1498Szrj 			     PRIORITY_INSERT_END,
1093*38fd1498Szrj 			     /*adjust_parent_depends_on=*/false,
1094*38fd1498Szrj 			     task->parent_depends_on);
1095*38fd1498Szrj       ++team->task_count;
1096*38fd1498Szrj       ++team->task_queued_count;
1097*38fd1498Szrj       ++ret;
1098*38fd1498Szrj     }
1099*38fd1498Szrj   free (child_task->dependers);
1100*38fd1498Szrj   child_task->dependers = NULL;
1101*38fd1498Szrj   if (ret > 1)
1102*38fd1498Szrj     gomp_team_barrier_set_task_pending (&team->barrier);
1103*38fd1498Szrj   return ret;
1104*38fd1498Szrj }
1105*38fd1498Szrj 
1106*38fd1498Szrj static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1107*38fd1498Szrj gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1108*38fd1498Szrj 				  struct gomp_team *team)
1109*38fd1498Szrj {
1110*38fd1498Szrj   if (child_task->depend_count == 0)
1111*38fd1498Szrj     return 0;
1112*38fd1498Szrj 
1113*38fd1498Szrj   /* If parent is gone already, the hash table is freed and nothing
1114*38fd1498Szrj      will use the hash table anymore, no need to remove anything from it.  */
1115*38fd1498Szrj   if (child_task->parent != NULL)
1116*38fd1498Szrj     gomp_task_run_post_handle_depend_hash (child_task);
1117*38fd1498Szrj 
1118*38fd1498Szrj   if (child_task->dependers == NULL)
1119*38fd1498Szrj     return 0;
1120*38fd1498Szrj 
1121*38fd1498Szrj   return gomp_task_run_post_handle_dependers (child_task, team);
1122*38fd1498Szrj }
1123*38fd1498Szrj 
1124*38fd1498Szrj /* Remove CHILD_TASK from its parent.  */
1125*38fd1498Szrj 
1126*38fd1498Szrj static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1127*38fd1498Szrj gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1128*38fd1498Szrj {
1129*38fd1498Szrj   struct gomp_task *parent = child_task->parent;
1130*38fd1498Szrj   if (parent == NULL)
1131*38fd1498Szrj     return;
1132*38fd1498Szrj 
1133*38fd1498Szrj   /* If this was the last task the parent was depending on,
1134*38fd1498Szrj      synchronize with gomp_task_maybe_wait_for_dependencies so it can
1135*38fd1498Szrj      clean up and return.  */
1136*38fd1498Szrj   if (__builtin_expect (child_task->parent_depends_on, 0)
1137*38fd1498Szrj       && --parent->taskwait->n_depend == 0
1138*38fd1498Szrj       && parent->taskwait->in_depend_wait)
1139*38fd1498Szrj     {
1140*38fd1498Szrj       parent->taskwait->in_depend_wait = false;
1141*38fd1498Szrj       gomp_sem_post (&parent->taskwait->taskwait_sem);
1142*38fd1498Szrj     }
1143*38fd1498Szrj 
1144*38fd1498Szrj   if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1145*38fd1498Szrj 			     child_task, MEMMODEL_RELEASE)
1146*38fd1498Szrj       && parent->taskwait && parent->taskwait->in_taskwait)
1147*38fd1498Szrj     {
1148*38fd1498Szrj       parent->taskwait->in_taskwait = false;
1149*38fd1498Szrj       gomp_sem_post (&parent->taskwait->taskwait_sem);
1150*38fd1498Szrj     }
1151*38fd1498Szrj   child_task->pnode[PQ_CHILDREN].next = NULL;
1152*38fd1498Szrj   child_task->pnode[PQ_CHILDREN].prev = NULL;
1153*38fd1498Szrj }
1154*38fd1498Szrj 
1155*38fd1498Szrj /* Remove CHILD_TASK from its taskgroup.  */
1156*38fd1498Szrj 
1157*38fd1498Szrj static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1158*38fd1498Szrj gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1159*38fd1498Szrj {
1160*38fd1498Szrj   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1161*38fd1498Szrj   if (taskgroup == NULL)
1162*38fd1498Szrj     return;
1163*38fd1498Szrj   bool empty = priority_queue_remove (PQ_TASKGROUP,
1164*38fd1498Szrj 				      &taskgroup->taskgroup_queue,
1165*38fd1498Szrj 				      child_task, MEMMODEL_RELAXED);
1166*38fd1498Szrj   child_task->pnode[PQ_TASKGROUP].next = NULL;
1167*38fd1498Szrj   child_task->pnode[PQ_TASKGROUP].prev = NULL;
1168*38fd1498Szrj   if (taskgroup->num_children > 1)
1169*38fd1498Szrj     --taskgroup->num_children;
1170*38fd1498Szrj   else
1171*38fd1498Szrj     {
1172*38fd1498Szrj       /* We access taskgroup->num_children in GOMP_taskgroup_end
1173*38fd1498Szrj 	 outside of the task lock mutex region, so
1174*38fd1498Szrj 	 need a release barrier here to ensure memory
1175*38fd1498Szrj 	 written by child_task->fn above is flushed
1176*38fd1498Szrj 	 before the NULL is written.  */
1177*38fd1498Szrj       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1178*38fd1498Szrj     }
1179*38fd1498Szrj   if (empty && taskgroup->in_taskgroup_wait)
1180*38fd1498Szrj     {
1181*38fd1498Szrj       taskgroup->in_taskgroup_wait = false;
1182*38fd1498Szrj       gomp_sem_post (&taskgroup->taskgroup_sem);
1183*38fd1498Szrj     }
1184*38fd1498Szrj }
1185*38fd1498Szrj 
1186*38fd1498Szrj void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1187*38fd1498Szrj gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1188*38fd1498Szrj {
1189*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
1190*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
1191*38fd1498Szrj   struct gomp_task *task = thr->task;
1192*38fd1498Szrj   struct gomp_task *child_task = NULL;
1193*38fd1498Szrj   struct gomp_task *to_free = NULL;
1194*38fd1498Szrj   int do_wake = 0;
1195*38fd1498Szrj 
1196*38fd1498Szrj   gomp_mutex_lock (&team->task_lock);
1197*38fd1498Szrj   if (gomp_barrier_last_thread (state))
1198*38fd1498Szrj     {
1199*38fd1498Szrj       if (team->task_count == 0)
1200*38fd1498Szrj 	{
1201*38fd1498Szrj 	  gomp_team_barrier_done (&team->barrier, state);
1202*38fd1498Szrj 	  gomp_mutex_unlock (&team->task_lock);
1203*38fd1498Szrj 	  gomp_team_barrier_wake (&team->barrier, 0);
1204*38fd1498Szrj 	  return;
1205*38fd1498Szrj 	}
1206*38fd1498Szrj       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1207*38fd1498Szrj     }
1208*38fd1498Szrj 
1209*38fd1498Szrj   while (1)
1210*38fd1498Szrj     {
1211*38fd1498Szrj       bool cancelled = false;
1212*38fd1498Szrj       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1213*38fd1498Szrj 	{
1214*38fd1498Szrj 	  bool ignored;
1215*38fd1498Szrj 	  child_task
1216*38fd1498Szrj 	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1217*38fd1498Szrj 					PQ_IGNORED, NULL,
1218*38fd1498Szrj 					&ignored);
1219*38fd1498Szrj 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
1220*38fd1498Szrj 					 team);
1221*38fd1498Szrj 	  if (__builtin_expect (cancelled, 0))
1222*38fd1498Szrj 	    {
1223*38fd1498Szrj 	      if (to_free)
1224*38fd1498Szrj 		{
1225*38fd1498Szrj 		  gomp_finish_task (to_free);
1226*38fd1498Szrj 		  free (to_free);
1227*38fd1498Szrj 		  to_free = NULL;
1228*38fd1498Szrj 		}
1229*38fd1498Szrj 	      goto finish_cancelled;
1230*38fd1498Szrj 	    }
1231*38fd1498Szrj 	  team->task_running_count++;
1232*38fd1498Szrj 	  child_task->in_tied_task = true;
1233*38fd1498Szrj 	}
1234*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
1235*38fd1498Szrj       if (do_wake)
1236*38fd1498Szrj 	{
1237*38fd1498Szrj 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1238*38fd1498Szrj 	  do_wake = 0;
1239*38fd1498Szrj 	}
1240*38fd1498Szrj       if (to_free)
1241*38fd1498Szrj 	{
1242*38fd1498Szrj 	  gomp_finish_task (to_free);
1243*38fd1498Szrj 	  free (to_free);
1244*38fd1498Szrj 	  to_free = NULL;
1245*38fd1498Szrj 	}
1246*38fd1498Szrj       if (child_task)
1247*38fd1498Szrj 	{
1248*38fd1498Szrj 	  thr->task = child_task;
1249*38fd1498Szrj 	  if (__builtin_expect (child_task->fn == NULL, 0))
1250*38fd1498Szrj 	    {
1251*38fd1498Szrj 	      if (gomp_target_task_fn (child_task->fn_data))
1252*38fd1498Szrj 		{
1253*38fd1498Szrj 		  thr->task = task;
1254*38fd1498Szrj 		  gomp_mutex_lock (&team->task_lock);
1255*38fd1498Szrj 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1256*38fd1498Szrj 		  team->task_running_count--;
1257*38fd1498Szrj 		  struct gomp_target_task *ttask
1258*38fd1498Szrj 		    = (struct gomp_target_task *) child_task->fn_data;
1259*38fd1498Szrj 		  /* If GOMP_PLUGIN_target_task_completion has run already
1260*38fd1498Szrj 		     in between gomp_target_task_fn and the mutex lock,
1261*38fd1498Szrj 		     perform the requeuing here.  */
1262*38fd1498Szrj 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1263*38fd1498Szrj 		    gomp_target_task_completion (team, child_task);
1264*38fd1498Szrj 		  else
1265*38fd1498Szrj 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1266*38fd1498Szrj 		  child_task = NULL;
1267*38fd1498Szrj 		  continue;
1268*38fd1498Szrj 		}
1269*38fd1498Szrj 	    }
1270*38fd1498Szrj 	  else
1271*38fd1498Szrj 	    child_task->fn (child_task->fn_data);
1272*38fd1498Szrj 	  thr->task = task;
1273*38fd1498Szrj 	}
1274*38fd1498Szrj       else
1275*38fd1498Szrj 	return;
1276*38fd1498Szrj       gomp_mutex_lock (&team->task_lock);
1277*38fd1498Szrj       if (child_task)
1278*38fd1498Szrj 	{
1279*38fd1498Szrj 	 finish_cancelled:;
1280*38fd1498Szrj 	  size_t new_tasks
1281*38fd1498Szrj 	    = gomp_task_run_post_handle_depend (child_task, team);
1282*38fd1498Szrj 	  gomp_task_run_post_remove_parent (child_task);
1283*38fd1498Szrj 	  gomp_clear_parent (&child_task->children_queue);
1284*38fd1498Szrj 	  gomp_task_run_post_remove_taskgroup (child_task);
1285*38fd1498Szrj 	  to_free = child_task;
1286*38fd1498Szrj 	  child_task = NULL;
1287*38fd1498Szrj 	  if (!cancelled)
1288*38fd1498Szrj 	    team->task_running_count--;
1289*38fd1498Szrj 	  if (new_tasks > 1)
1290*38fd1498Szrj 	    {
1291*38fd1498Szrj 	      do_wake = team->nthreads - team->task_running_count;
1292*38fd1498Szrj 	      if (do_wake > new_tasks)
1293*38fd1498Szrj 		do_wake = new_tasks;
1294*38fd1498Szrj 	    }
1295*38fd1498Szrj 	  if (--team->task_count == 0
1296*38fd1498Szrj 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1297*38fd1498Szrj 	    {
1298*38fd1498Szrj 	      gomp_team_barrier_done (&team->barrier, state);
1299*38fd1498Szrj 	      gomp_mutex_unlock (&team->task_lock);
1300*38fd1498Szrj 	      gomp_team_barrier_wake (&team->barrier, 0);
1301*38fd1498Szrj 	      gomp_mutex_lock (&team->task_lock);
1302*38fd1498Szrj 	    }
1303*38fd1498Szrj 	}
1304*38fd1498Szrj     }
1305*38fd1498Szrj }
1306*38fd1498Szrj 
1307*38fd1498Szrj /* Called when encountering a taskwait directive.
1308*38fd1498Szrj 
1309*38fd1498Szrj    Wait for all children of the current task.  */
1310*38fd1498Szrj 
1311*38fd1498Szrj void
GOMP_taskwait(void)1312*38fd1498Szrj GOMP_taskwait (void)
1313*38fd1498Szrj {
1314*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
1315*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
1316*38fd1498Szrj   struct gomp_task *task = thr->task;
1317*38fd1498Szrj   struct gomp_task *child_task = NULL;
1318*38fd1498Szrj   struct gomp_task *to_free = NULL;
1319*38fd1498Szrj   struct gomp_taskwait taskwait;
1320*38fd1498Szrj   int do_wake = 0;
1321*38fd1498Szrj 
1322*38fd1498Szrj   /* The acquire barrier on load of task->children here synchronizes
1323*38fd1498Szrj      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
1324*38fd1498Szrj      not necessary that we synchronize with other non-NULL writes at
1325*38fd1498Szrj      this point, but we must ensure that all writes to memory by a
1326*38fd1498Szrj      child thread task work function are seen before we exit from
1327*38fd1498Szrj      GOMP_taskwait.  */
1328*38fd1498Szrj   if (task == NULL
1329*38fd1498Szrj       || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1330*38fd1498Szrj     return;
1331*38fd1498Szrj 
1332*38fd1498Szrj   memset (&taskwait, 0, sizeof (taskwait));
1333*38fd1498Szrj   bool child_q = false;
1334*38fd1498Szrj   gomp_mutex_lock (&team->task_lock);
1335*38fd1498Szrj   while (1)
1336*38fd1498Szrj     {
1337*38fd1498Szrj       bool cancelled = false;
1338*38fd1498Szrj       if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1339*38fd1498Szrj 	{
1340*38fd1498Szrj 	  bool destroy_taskwait = task->taskwait != NULL;
1341*38fd1498Szrj 	  task->taskwait = NULL;
1342*38fd1498Szrj 	  gomp_mutex_unlock (&team->task_lock);
1343*38fd1498Szrj 	  if (to_free)
1344*38fd1498Szrj 	    {
1345*38fd1498Szrj 	      gomp_finish_task (to_free);
1346*38fd1498Szrj 	      free (to_free);
1347*38fd1498Szrj 	    }
1348*38fd1498Szrj 	  if (destroy_taskwait)
1349*38fd1498Szrj 	    gomp_sem_destroy (&taskwait.taskwait_sem);
1350*38fd1498Szrj 	  return;
1351*38fd1498Szrj 	}
1352*38fd1498Szrj       struct gomp_task *next_task
1353*38fd1498Szrj 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1354*38fd1498Szrj 				    PQ_TEAM, &team->task_queue, &child_q);
1355*38fd1498Szrj       if (next_task->kind == GOMP_TASK_WAITING)
1356*38fd1498Szrj 	{
1357*38fd1498Szrj 	  child_task = next_task;
1358*38fd1498Szrj 	  cancelled
1359*38fd1498Szrj 	    = gomp_task_run_pre (child_task, task, team);
1360*38fd1498Szrj 	  if (__builtin_expect (cancelled, 0))
1361*38fd1498Szrj 	    {
1362*38fd1498Szrj 	      if (to_free)
1363*38fd1498Szrj 		{
1364*38fd1498Szrj 		  gomp_finish_task (to_free);
1365*38fd1498Szrj 		  free (to_free);
1366*38fd1498Szrj 		  to_free = NULL;
1367*38fd1498Szrj 		}
1368*38fd1498Szrj 	      goto finish_cancelled;
1369*38fd1498Szrj 	    }
1370*38fd1498Szrj 	}
1371*38fd1498Szrj       else
1372*38fd1498Szrj 	{
1373*38fd1498Szrj 	/* All tasks we are waiting for are either running in other
1374*38fd1498Szrj 	   threads, or they are tasks that have not had their
1375*38fd1498Szrj 	   dependencies met (so they're not even in the queue).  Wait
1376*38fd1498Szrj 	   for them.  */
1377*38fd1498Szrj 	  if (task->taskwait == NULL)
1378*38fd1498Szrj 	    {
1379*38fd1498Szrj 	      taskwait.in_depend_wait = false;
1380*38fd1498Szrj 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
1381*38fd1498Szrj 	      task->taskwait = &taskwait;
1382*38fd1498Szrj 	    }
1383*38fd1498Szrj 	  taskwait.in_taskwait = true;
1384*38fd1498Szrj 	}
1385*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
1386*38fd1498Szrj       if (do_wake)
1387*38fd1498Szrj 	{
1388*38fd1498Szrj 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1389*38fd1498Szrj 	  do_wake = 0;
1390*38fd1498Szrj 	}
1391*38fd1498Szrj       if (to_free)
1392*38fd1498Szrj 	{
1393*38fd1498Szrj 	  gomp_finish_task (to_free);
1394*38fd1498Szrj 	  free (to_free);
1395*38fd1498Szrj 	  to_free = NULL;
1396*38fd1498Szrj 	}
1397*38fd1498Szrj       if (child_task)
1398*38fd1498Szrj 	{
1399*38fd1498Szrj 	  thr->task = child_task;
1400*38fd1498Szrj 	  if (__builtin_expect (child_task->fn == NULL, 0))
1401*38fd1498Szrj 	    {
1402*38fd1498Szrj 	      if (gomp_target_task_fn (child_task->fn_data))
1403*38fd1498Szrj 		{
1404*38fd1498Szrj 		  thr->task = task;
1405*38fd1498Szrj 		  gomp_mutex_lock (&team->task_lock);
1406*38fd1498Szrj 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1407*38fd1498Szrj 		  struct gomp_target_task *ttask
1408*38fd1498Szrj 		    = (struct gomp_target_task *) child_task->fn_data;
1409*38fd1498Szrj 		  /* If GOMP_PLUGIN_target_task_completion has run already
1410*38fd1498Szrj 		     in between gomp_target_task_fn and the mutex lock,
1411*38fd1498Szrj 		     perform the requeuing here.  */
1412*38fd1498Szrj 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1413*38fd1498Szrj 		    gomp_target_task_completion (team, child_task);
1414*38fd1498Szrj 		  else
1415*38fd1498Szrj 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1416*38fd1498Szrj 		  child_task = NULL;
1417*38fd1498Szrj 		  continue;
1418*38fd1498Szrj 		}
1419*38fd1498Szrj 	    }
1420*38fd1498Szrj 	  else
1421*38fd1498Szrj 	    child_task->fn (child_task->fn_data);
1422*38fd1498Szrj 	  thr->task = task;
1423*38fd1498Szrj 	}
1424*38fd1498Szrj       else
1425*38fd1498Szrj 	gomp_sem_wait (&taskwait.taskwait_sem);
1426*38fd1498Szrj       gomp_mutex_lock (&team->task_lock);
1427*38fd1498Szrj       if (child_task)
1428*38fd1498Szrj 	{
1429*38fd1498Szrj 	 finish_cancelled:;
1430*38fd1498Szrj 	  size_t new_tasks
1431*38fd1498Szrj 	    = gomp_task_run_post_handle_depend (child_task, team);
1432*38fd1498Szrj 
1433*38fd1498Szrj 	  if (child_q)
1434*38fd1498Szrj 	    {
1435*38fd1498Szrj 	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1436*38fd1498Szrj 				     child_task, MEMMODEL_RELAXED);
1437*38fd1498Szrj 	      child_task->pnode[PQ_CHILDREN].next = NULL;
1438*38fd1498Szrj 	      child_task->pnode[PQ_CHILDREN].prev = NULL;
1439*38fd1498Szrj 	    }
1440*38fd1498Szrj 
1441*38fd1498Szrj 	  gomp_clear_parent (&child_task->children_queue);
1442*38fd1498Szrj 
1443*38fd1498Szrj 	  gomp_task_run_post_remove_taskgroup (child_task);
1444*38fd1498Szrj 
1445*38fd1498Szrj 	  to_free = child_task;
1446*38fd1498Szrj 	  child_task = NULL;
1447*38fd1498Szrj 	  team->task_count--;
1448*38fd1498Szrj 	  if (new_tasks > 1)
1449*38fd1498Szrj 	    {
1450*38fd1498Szrj 	      do_wake = team->nthreads - team->task_running_count
1451*38fd1498Szrj 			- !task->in_tied_task;
1452*38fd1498Szrj 	      if (do_wake > new_tasks)
1453*38fd1498Szrj 		do_wake = new_tasks;
1454*38fd1498Szrj 	    }
1455*38fd1498Szrj 	}
1456*38fd1498Szrj     }
1457*38fd1498Szrj }
1458*38fd1498Szrj 
1459*38fd1498Szrj /* An undeferred task is about to run.  Wait for all tasks that this
1460*38fd1498Szrj    undeferred task depends on.
1461*38fd1498Szrj 
1462*38fd1498Szrj    This is done by first putting all known ready dependencies
1463*38fd1498Szrj    (dependencies that have their own dependencies met) at the top of
1464*38fd1498Szrj    the scheduling queues.  Then we iterate through these imminently
1465*38fd1498Szrj    ready tasks (and possibly other high priority tasks), and run them.
1466*38fd1498Szrj    If we run out of ready dependencies to execute, we either wait for
1467*38fd1498Szrj    the reamining dependencies to finish, or wait for them to get
1468*38fd1498Szrj    scheduled so we can run them.
1469*38fd1498Szrj 
1470*38fd1498Szrj    DEPEND is as in GOMP_task.  */
1471*38fd1498Szrj 
1472*38fd1498Szrj void
gomp_task_maybe_wait_for_dependencies(void ** depend)1473*38fd1498Szrj gomp_task_maybe_wait_for_dependencies (void **depend)
1474*38fd1498Szrj {
1475*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
1476*38fd1498Szrj   struct gomp_task *task = thr->task;
1477*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
1478*38fd1498Szrj   struct gomp_task_depend_entry elem, *ent = NULL;
1479*38fd1498Szrj   struct gomp_taskwait taskwait;
1480*38fd1498Szrj   size_t ndepend = (uintptr_t) depend[0];
1481*38fd1498Szrj   size_t nout = (uintptr_t) depend[1];
1482*38fd1498Szrj   size_t i;
1483*38fd1498Szrj   size_t num_awaited = 0;
1484*38fd1498Szrj   struct gomp_task *child_task = NULL;
1485*38fd1498Szrj   struct gomp_task *to_free = NULL;
1486*38fd1498Szrj   int do_wake = 0;
1487*38fd1498Szrj 
1488*38fd1498Szrj   gomp_mutex_lock (&team->task_lock);
1489*38fd1498Szrj   for (i = 0; i < ndepend; i++)
1490*38fd1498Szrj     {
1491*38fd1498Szrj       elem.addr = depend[i + 2];
1492*38fd1498Szrj       ent = htab_find (task->depend_hash, &elem);
1493*38fd1498Szrj       for (; ent; ent = ent->next)
1494*38fd1498Szrj 	if (i >= nout && ent->is_in)
1495*38fd1498Szrj 	  continue;
1496*38fd1498Szrj 	else
1497*38fd1498Szrj 	  {
1498*38fd1498Szrj 	    struct gomp_task *tsk = ent->task;
1499*38fd1498Szrj 	    if (!tsk->parent_depends_on)
1500*38fd1498Szrj 	      {
1501*38fd1498Szrj 		tsk->parent_depends_on = true;
1502*38fd1498Szrj 		++num_awaited;
1503*38fd1498Szrj 		/* If depenency TSK itself has no dependencies and is
1504*38fd1498Szrj 		   ready to run, move it up front so that we run it as
1505*38fd1498Szrj 		   soon as possible.  */
1506*38fd1498Szrj 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1507*38fd1498Szrj 		  priority_queue_upgrade_task (tsk, task);
1508*38fd1498Szrj 	      }
1509*38fd1498Szrj 	  }
1510*38fd1498Szrj     }
1511*38fd1498Szrj   if (num_awaited == 0)
1512*38fd1498Szrj     {
1513*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
1514*38fd1498Szrj       return;
1515*38fd1498Szrj     }
1516*38fd1498Szrj 
1517*38fd1498Szrj   memset (&taskwait, 0, sizeof (taskwait));
1518*38fd1498Szrj   taskwait.n_depend = num_awaited;
1519*38fd1498Szrj   gomp_sem_init (&taskwait.taskwait_sem, 0);
1520*38fd1498Szrj   task->taskwait = &taskwait;
1521*38fd1498Szrj 
1522*38fd1498Szrj   while (1)
1523*38fd1498Szrj     {
1524*38fd1498Szrj       bool cancelled = false;
1525*38fd1498Szrj       if (taskwait.n_depend == 0)
1526*38fd1498Szrj 	{
1527*38fd1498Szrj 	  task->taskwait = NULL;
1528*38fd1498Szrj 	  gomp_mutex_unlock (&team->task_lock);
1529*38fd1498Szrj 	  if (to_free)
1530*38fd1498Szrj 	    {
1531*38fd1498Szrj 	      gomp_finish_task (to_free);
1532*38fd1498Szrj 	      free (to_free);
1533*38fd1498Szrj 	    }
1534*38fd1498Szrj 	  gomp_sem_destroy (&taskwait.taskwait_sem);
1535*38fd1498Szrj 	  return;
1536*38fd1498Szrj 	}
1537*38fd1498Szrj 
1538*38fd1498Szrj       /* Theoretically when we have multiple priorities, we should
1539*38fd1498Szrj 	 chose between the highest priority item in
1540*38fd1498Szrj 	 task->children_queue and team->task_queue here, so we should
1541*38fd1498Szrj 	 use priority_queue_next_task().  However, since we are
1542*38fd1498Szrj 	 running an undeferred task, perhaps that makes all tasks it
1543*38fd1498Szrj 	 depends on undeferred, thus a priority of INF?  This would
1544*38fd1498Szrj 	 make it unnecessary to take anything into account here,
1545*38fd1498Szrj 	 but the dependencies.
1546*38fd1498Szrj 
1547*38fd1498Szrj 	 On the other hand, if we want to use priority_queue_next_task(),
1548*38fd1498Szrj 	 care should be taken to only use priority_queue_remove()
1549*38fd1498Szrj 	 below if the task was actually removed from the children
1550*38fd1498Szrj 	 queue.  */
1551*38fd1498Szrj       bool ignored;
1552*38fd1498Szrj       struct gomp_task *next_task
1553*38fd1498Szrj 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1554*38fd1498Szrj 				    PQ_IGNORED, NULL, &ignored);
1555*38fd1498Szrj 
1556*38fd1498Szrj       if (next_task->kind == GOMP_TASK_WAITING)
1557*38fd1498Szrj 	{
1558*38fd1498Szrj 	  child_task = next_task;
1559*38fd1498Szrj 	  cancelled
1560*38fd1498Szrj 	    = gomp_task_run_pre (child_task, task, team);
1561*38fd1498Szrj 	  if (__builtin_expect (cancelled, 0))
1562*38fd1498Szrj 	    {
1563*38fd1498Szrj 	      if (to_free)
1564*38fd1498Szrj 		{
1565*38fd1498Szrj 		  gomp_finish_task (to_free);
1566*38fd1498Szrj 		  free (to_free);
1567*38fd1498Szrj 		  to_free = NULL;
1568*38fd1498Szrj 		}
1569*38fd1498Szrj 	      goto finish_cancelled;
1570*38fd1498Szrj 	    }
1571*38fd1498Szrj 	}
1572*38fd1498Szrj       else
1573*38fd1498Szrj 	/* All tasks we are waiting for are either running in other
1574*38fd1498Szrj 	   threads, or they are tasks that have not had their
1575*38fd1498Szrj 	   dependencies met (so they're not even in the queue).  Wait
1576*38fd1498Szrj 	   for them.  */
1577*38fd1498Szrj 	taskwait.in_depend_wait = true;
1578*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
1579*38fd1498Szrj       if (do_wake)
1580*38fd1498Szrj 	{
1581*38fd1498Szrj 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1582*38fd1498Szrj 	  do_wake = 0;
1583*38fd1498Szrj 	}
1584*38fd1498Szrj       if (to_free)
1585*38fd1498Szrj 	{
1586*38fd1498Szrj 	  gomp_finish_task (to_free);
1587*38fd1498Szrj 	  free (to_free);
1588*38fd1498Szrj 	  to_free = NULL;
1589*38fd1498Szrj 	}
1590*38fd1498Szrj       if (child_task)
1591*38fd1498Szrj 	{
1592*38fd1498Szrj 	  thr->task = child_task;
1593*38fd1498Szrj 	  if (__builtin_expect (child_task->fn == NULL, 0))
1594*38fd1498Szrj 	    {
1595*38fd1498Szrj 	      if (gomp_target_task_fn (child_task->fn_data))
1596*38fd1498Szrj 		{
1597*38fd1498Szrj 		  thr->task = task;
1598*38fd1498Szrj 		  gomp_mutex_lock (&team->task_lock);
1599*38fd1498Szrj 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1600*38fd1498Szrj 		  struct gomp_target_task *ttask
1601*38fd1498Szrj 		    = (struct gomp_target_task *) child_task->fn_data;
1602*38fd1498Szrj 		  /* If GOMP_PLUGIN_target_task_completion has run already
1603*38fd1498Szrj 		     in between gomp_target_task_fn and the mutex lock,
1604*38fd1498Szrj 		     perform the requeuing here.  */
1605*38fd1498Szrj 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1606*38fd1498Szrj 		    gomp_target_task_completion (team, child_task);
1607*38fd1498Szrj 		  else
1608*38fd1498Szrj 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1609*38fd1498Szrj 		  child_task = NULL;
1610*38fd1498Szrj 		  continue;
1611*38fd1498Szrj 		}
1612*38fd1498Szrj 	    }
1613*38fd1498Szrj 	  else
1614*38fd1498Szrj 	    child_task->fn (child_task->fn_data);
1615*38fd1498Szrj 	  thr->task = task;
1616*38fd1498Szrj 	}
1617*38fd1498Szrj       else
1618*38fd1498Szrj 	gomp_sem_wait (&taskwait.taskwait_sem);
1619*38fd1498Szrj       gomp_mutex_lock (&team->task_lock);
1620*38fd1498Szrj       if (child_task)
1621*38fd1498Szrj 	{
1622*38fd1498Szrj 	 finish_cancelled:;
1623*38fd1498Szrj 	  size_t new_tasks
1624*38fd1498Szrj 	    = gomp_task_run_post_handle_depend (child_task, team);
1625*38fd1498Szrj 	  if (child_task->parent_depends_on)
1626*38fd1498Szrj 	    --taskwait.n_depend;
1627*38fd1498Szrj 
1628*38fd1498Szrj 	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1629*38fd1498Szrj 				 child_task, MEMMODEL_RELAXED);
1630*38fd1498Szrj 	  child_task->pnode[PQ_CHILDREN].next = NULL;
1631*38fd1498Szrj 	  child_task->pnode[PQ_CHILDREN].prev = NULL;
1632*38fd1498Szrj 
1633*38fd1498Szrj 	  gomp_clear_parent (&child_task->children_queue);
1634*38fd1498Szrj 	  gomp_task_run_post_remove_taskgroup (child_task);
1635*38fd1498Szrj 	  to_free = child_task;
1636*38fd1498Szrj 	  child_task = NULL;
1637*38fd1498Szrj 	  team->task_count--;
1638*38fd1498Szrj 	  if (new_tasks > 1)
1639*38fd1498Szrj 	    {
1640*38fd1498Szrj 	      do_wake = team->nthreads - team->task_running_count
1641*38fd1498Szrj 			- !task->in_tied_task;
1642*38fd1498Szrj 	      if (do_wake > new_tasks)
1643*38fd1498Szrj 		do_wake = new_tasks;
1644*38fd1498Szrj 	    }
1645*38fd1498Szrj 	}
1646*38fd1498Szrj     }
1647*38fd1498Szrj }
1648*38fd1498Szrj 
1649*38fd1498Szrj /* Called when encountering a taskyield directive.  */
1650*38fd1498Szrj 
1651*38fd1498Szrj void
GOMP_taskyield(void)1652*38fd1498Szrj GOMP_taskyield (void)
1653*38fd1498Szrj {
1654*38fd1498Szrj   /* Nothing at the moment.  */
1655*38fd1498Szrj }
1656*38fd1498Szrj 
1657*38fd1498Szrj void
GOMP_taskgroup_start(void)1658*38fd1498Szrj GOMP_taskgroup_start (void)
1659*38fd1498Szrj {
1660*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
1661*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
1662*38fd1498Szrj   struct gomp_task *task = thr->task;
1663*38fd1498Szrj   struct gomp_taskgroup *taskgroup;
1664*38fd1498Szrj 
1665*38fd1498Szrj   /* If team is NULL, all tasks are executed as
1666*38fd1498Szrj      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1667*38fd1498Szrj      taskgroup and their descendant tasks will be finished
1668*38fd1498Szrj      by the time GOMP_taskgroup_end is called.  */
1669*38fd1498Szrj   if (team == NULL)
1670*38fd1498Szrj     return;
1671*38fd1498Szrj   taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1672*38fd1498Szrj   taskgroup->prev = task->taskgroup;
1673*38fd1498Szrj   priority_queue_init (&taskgroup->taskgroup_queue);
1674*38fd1498Szrj   taskgroup->in_taskgroup_wait = false;
1675*38fd1498Szrj   taskgroup->cancelled = false;
1676*38fd1498Szrj   taskgroup->num_children = 0;
1677*38fd1498Szrj   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1678*38fd1498Szrj   task->taskgroup = taskgroup;
1679*38fd1498Szrj }
1680*38fd1498Szrj 
1681*38fd1498Szrj void
GOMP_taskgroup_end(void)1682*38fd1498Szrj GOMP_taskgroup_end (void)
1683*38fd1498Szrj {
1684*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
1685*38fd1498Szrj   struct gomp_team *team = thr->ts.team;
1686*38fd1498Szrj   struct gomp_task *task = thr->task;
1687*38fd1498Szrj   struct gomp_taskgroup *taskgroup;
1688*38fd1498Szrj   struct gomp_task *child_task = NULL;
1689*38fd1498Szrj   struct gomp_task *to_free = NULL;
1690*38fd1498Szrj   int do_wake = 0;
1691*38fd1498Szrj 
1692*38fd1498Szrj   if (team == NULL)
1693*38fd1498Szrj     return;
1694*38fd1498Szrj   taskgroup = task->taskgroup;
1695*38fd1498Szrj   if (__builtin_expect (taskgroup == NULL, 0)
1696*38fd1498Szrj       && thr->ts.level == 0)
1697*38fd1498Szrj     {
1698*38fd1498Szrj       /* This can happen if GOMP_taskgroup_start is called when
1699*38fd1498Szrj 	 thr->ts.team == NULL, but inside of the taskgroup there
1700*38fd1498Szrj 	 is #pragma omp target nowait that creates an implicit
1701*38fd1498Szrj 	 team with a single thread.  In this case, we want to wait
1702*38fd1498Szrj 	 for all outstanding tasks in this team.  */
1703*38fd1498Szrj       gomp_team_barrier_wait (&team->barrier);
1704*38fd1498Szrj       return;
1705*38fd1498Szrj     }
1706*38fd1498Szrj 
1707*38fd1498Szrj   /* The acquire barrier on load of taskgroup->num_children here
1708*38fd1498Szrj      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1709*38fd1498Szrj      It is not necessary that we synchronize with other non-0 writes at
1710*38fd1498Szrj      this point, but we must ensure that all writes to memory by a
1711*38fd1498Szrj      child thread task work function are seen before we exit from
1712*38fd1498Szrj      GOMP_taskgroup_end.  */
1713*38fd1498Szrj   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1714*38fd1498Szrj     goto finish;
1715*38fd1498Szrj 
1716*38fd1498Szrj   bool unused;
1717*38fd1498Szrj   gomp_mutex_lock (&team->task_lock);
1718*38fd1498Szrj   while (1)
1719*38fd1498Szrj     {
1720*38fd1498Szrj       bool cancelled = false;
1721*38fd1498Szrj       if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1722*38fd1498Szrj 				  MEMMODEL_RELAXED))
1723*38fd1498Szrj 	{
1724*38fd1498Szrj 	  if (taskgroup->num_children)
1725*38fd1498Szrj 	    {
1726*38fd1498Szrj 	      if (priority_queue_empty_p (&task->children_queue,
1727*38fd1498Szrj 					  MEMMODEL_RELAXED))
1728*38fd1498Szrj 		goto do_wait;
1729*38fd1498Szrj 	      child_task
1730*38fd1498Szrj 		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1731*38fd1498Szrj 					    PQ_TEAM, &team->task_queue,
1732*38fd1498Szrj 					    &unused);
1733*38fd1498Szrj 	    }
1734*38fd1498Szrj 	  else
1735*38fd1498Szrj 	    {
1736*38fd1498Szrj 	      gomp_mutex_unlock (&team->task_lock);
1737*38fd1498Szrj 	      if (to_free)
1738*38fd1498Szrj 		{
1739*38fd1498Szrj 		  gomp_finish_task (to_free);
1740*38fd1498Szrj 		  free (to_free);
1741*38fd1498Szrj 		}
1742*38fd1498Szrj 	      goto finish;
1743*38fd1498Szrj 	    }
1744*38fd1498Szrj 	}
1745*38fd1498Szrj       else
1746*38fd1498Szrj 	child_task
1747*38fd1498Szrj 	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1748*38fd1498Szrj 				      PQ_TEAM, &team->task_queue, &unused);
1749*38fd1498Szrj       if (child_task->kind == GOMP_TASK_WAITING)
1750*38fd1498Szrj 	{
1751*38fd1498Szrj 	  cancelled
1752*38fd1498Szrj 	    = gomp_task_run_pre (child_task, child_task->parent, team);
1753*38fd1498Szrj 	  if (__builtin_expect (cancelled, 0))
1754*38fd1498Szrj 	    {
1755*38fd1498Szrj 	      if (to_free)
1756*38fd1498Szrj 		{
1757*38fd1498Szrj 		  gomp_finish_task (to_free);
1758*38fd1498Szrj 		  free (to_free);
1759*38fd1498Szrj 		  to_free = NULL;
1760*38fd1498Szrj 		}
1761*38fd1498Szrj 	      goto finish_cancelled;
1762*38fd1498Szrj 	    }
1763*38fd1498Szrj 	}
1764*38fd1498Szrj       else
1765*38fd1498Szrj 	{
1766*38fd1498Szrj 	  child_task = NULL;
1767*38fd1498Szrj 	 do_wait:
1768*38fd1498Szrj 	/* All tasks we are waiting for are either running in other
1769*38fd1498Szrj 	   threads, or they are tasks that have not had their
1770*38fd1498Szrj 	   dependencies met (so they're not even in the queue).  Wait
1771*38fd1498Szrj 	   for them.  */
1772*38fd1498Szrj 	  taskgroup->in_taskgroup_wait = true;
1773*38fd1498Szrj 	}
1774*38fd1498Szrj       gomp_mutex_unlock (&team->task_lock);
1775*38fd1498Szrj       if (do_wake)
1776*38fd1498Szrj 	{
1777*38fd1498Szrj 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1778*38fd1498Szrj 	  do_wake = 0;
1779*38fd1498Szrj 	}
1780*38fd1498Szrj       if (to_free)
1781*38fd1498Szrj 	{
1782*38fd1498Szrj 	  gomp_finish_task (to_free);
1783*38fd1498Szrj 	  free (to_free);
1784*38fd1498Szrj 	  to_free = NULL;
1785*38fd1498Szrj 	}
1786*38fd1498Szrj       if (child_task)
1787*38fd1498Szrj 	{
1788*38fd1498Szrj 	  thr->task = child_task;
1789*38fd1498Szrj 	  if (__builtin_expect (child_task->fn == NULL, 0))
1790*38fd1498Szrj 	    {
1791*38fd1498Szrj 	      if (gomp_target_task_fn (child_task->fn_data))
1792*38fd1498Szrj 		{
1793*38fd1498Szrj 		  thr->task = task;
1794*38fd1498Szrj 		  gomp_mutex_lock (&team->task_lock);
1795*38fd1498Szrj 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1796*38fd1498Szrj 		  struct gomp_target_task *ttask
1797*38fd1498Szrj 		    = (struct gomp_target_task *) child_task->fn_data;
1798*38fd1498Szrj 		  /* If GOMP_PLUGIN_target_task_completion has run already
1799*38fd1498Szrj 		     in between gomp_target_task_fn and the mutex lock,
1800*38fd1498Szrj 		     perform the requeuing here.  */
1801*38fd1498Szrj 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1802*38fd1498Szrj 		    gomp_target_task_completion (team, child_task);
1803*38fd1498Szrj 		  else
1804*38fd1498Szrj 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1805*38fd1498Szrj 		  child_task = NULL;
1806*38fd1498Szrj 		  continue;
1807*38fd1498Szrj 		}
1808*38fd1498Szrj 	    }
1809*38fd1498Szrj 	  else
1810*38fd1498Szrj 	    child_task->fn (child_task->fn_data);
1811*38fd1498Szrj 	  thr->task = task;
1812*38fd1498Szrj 	}
1813*38fd1498Szrj       else
1814*38fd1498Szrj 	gomp_sem_wait (&taskgroup->taskgroup_sem);
1815*38fd1498Szrj       gomp_mutex_lock (&team->task_lock);
1816*38fd1498Szrj       if (child_task)
1817*38fd1498Szrj 	{
1818*38fd1498Szrj 	 finish_cancelled:;
1819*38fd1498Szrj 	  size_t new_tasks
1820*38fd1498Szrj 	    = gomp_task_run_post_handle_depend (child_task, team);
1821*38fd1498Szrj 	  gomp_task_run_post_remove_parent (child_task);
1822*38fd1498Szrj 	  gomp_clear_parent (&child_task->children_queue);
1823*38fd1498Szrj 	  gomp_task_run_post_remove_taskgroup (child_task);
1824*38fd1498Szrj 	  to_free = child_task;
1825*38fd1498Szrj 	  child_task = NULL;
1826*38fd1498Szrj 	  team->task_count--;
1827*38fd1498Szrj 	  if (new_tasks > 1)
1828*38fd1498Szrj 	    {
1829*38fd1498Szrj 	      do_wake = team->nthreads - team->task_running_count
1830*38fd1498Szrj 			- !task->in_tied_task;
1831*38fd1498Szrj 	      if (do_wake > new_tasks)
1832*38fd1498Szrj 		do_wake = new_tasks;
1833*38fd1498Szrj 	    }
1834*38fd1498Szrj 	}
1835*38fd1498Szrj     }
1836*38fd1498Szrj 
1837*38fd1498Szrj  finish:
1838*38fd1498Szrj   task->taskgroup = taskgroup->prev;
1839*38fd1498Szrj   gomp_sem_destroy (&taskgroup->taskgroup_sem);
1840*38fd1498Szrj   free (taskgroup);
1841*38fd1498Szrj }
1842*38fd1498Szrj 
1843*38fd1498Szrj int
omp_in_final(void)1844*38fd1498Szrj omp_in_final (void)
1845*38fd1498Szrj {
1846*38fd1498Szrj   struct gomp_thread *thr = gomp_thread ();
1847*38fd1498Szrj   return thr->task && thr->task->final_task;
1848*38fd1498Szrj }
1849*38fd1498Szrj 
1850*38fd1498Szrj ialias (omp_in_final)
1851