xref: /dflybsd-src/contrib/gcc-8.0/libgomp/task.c (revision 08e4ff685e25ec7a63f294885d1831439b4c253c)
1 /* Copyright (C) 2007-2018 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintainence of tasks in response to task
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33 
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35 
36 static inline void *
37 htab_alloc (size_t size)
38 {
39   return gomp_malloc (size);
40 }
41 
42 static inline void
43 htab_free (void *ptr)
44 {
45   free (ptr);
46 }
47 
48 #include "hashtab.h"
49 
50 static inline hashval_t
51 htab_hash (hash_entry_type element)
52 {
53   return hash_pointer (element->addr);
54 }
55 
56 static inline bool
57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59   return x->addr == y->addr;
60 }
61 
62 /* Create a new task data structure.  */
63 
64 void
65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 		struct gomp_task_icv *prev_icv)
67 {
68   /* It would seem that using memset here would be a win, but it turns
69      out that partially filling gomp_task allows us to keep the
70      overhead of task creation low.  In the nqueens-1.c test, for a
71      sufficiently large N, we drop the overhead from 5-6% to 1%.
72 
73      Note, the nqueens-1.c test in serial mode is a good test to
74      benchmark the overhead of creating tasks as there are millions of
75      tiny tasks created that all run undeferred.  */
76   task->parent = parent_task;
77   task->icv = *prev_icv;
78   task->kind = GOMP_TASK_IMPLICIT;
79   task->taskwait = NULL;
80   task->in_tied_task = false;
81   task->final_task = false;
82   task->copy_ctors_done = false;
83   task->parent_depends_on = false;
84   priority_queue_init (&task->children_queue);
85   task->taskgroup = NULL;
86   task->dependers = NULL;
87   task->depend_hash = NULL;
88   task->depend_count = 0;
89 }
90 
91 /* Clean up a task, after completing it.  */
92 
93 void
94 gomp_end_task (void)
95 {
96   struct gomp_thread *thr = gomp_thread ();
97   struct gomp_task *task = thr->task;
98 
99   gomp_finish_task (task);
100   thr->task = task->parent;
101 }
102 
103 /* Clear the parent field of every task in LIST.  */
104 
105 static inline void
106 gomp_clear_parent_in_list (struct priority_list *list)
107 {
108   struct priority_node *p = list->tasks;
109   if (p)
110     do
111       {
112 	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 	p = p->next;
114       }
115     while (p != list->tasks);
116 }
117 
118 /* Splay tree version of gomp_clear_parent_in_list.
119 
120    Clear the parent field of every task in NODE within SP, and free
121    the node when done.  */
122 
123 static void
124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125 {
126   if (!node)
127     return;
128   prio_splay_tree_node left = node->left, right = node->right;
129   gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131   memset (node, 0xaf, sizeof (*node));
132 #endif
133   /* No need to remove the node from the tree.  We're nuking
134      everything, so just free the nodes and our caller can clear the
135      entire splay tree.  */
136   free (node);
137   gomp_clear_parent_in_tree (sp, left);
138   gomp_clear_parent_in_tree (sp, right);
139 }
140 
141 /* Clear the parent field of every task in Q and remove every task
142    from Q.  */
143 
144 static inline void
145 gomp_clear_parent (struct priority_queue *q)
146 {
147   if (priority_queue_multi_p (q))
148     {
149       gomp_clear_parent_in_tree (&q->t, q->t.root);
150       /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 	 No need to remove anything.  We can just nuke everything.  */
152       q->t.root = NULL;
153     }
154   else
155     gomp_clear_parent_in_list (&q->l);
156 }
157 
158 /* Helper function for GOMP_task and gomp_create_target_task.
159 
160    For a TASK with in/out dependencies, fill in the various dependency
161    queues.  PARENT is the parent of said task.  DEPEND is as in
162    GOMP_task.  */
163 
164 static void
165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 			 void **depend)
167 {
168   size_t ndepend = (uintptr_t) depend[0];
169   size_t nout = (uintptr_t) depend[1];
170   size_t i;
171   hash_entry_type ent;
172 
173   task->depend_count = ndepend;
174   task->num_dependees = 0;
175   if (parent->depend_hash == NULL)
176     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177   for (i = 0; i < ndepend; i++)
178     {
179       task->depend[i].addr = depend[2 + i];
180       task->depend[i].next = NULL;
181       task->depend[i].prev = NULL;
182       task->depend[i].task = task;
183       task->depend[i].is_in = i >= nout;
184       task->depend[i].redundant = false;
185       task->depend[i].redundant_out = false;
186 
187       hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188 					      &task->depend[i], INSERT);
189       hash_entry_type out = NULL, last = NULL;
190       if (*slot)
191 	{
192 	  /* If multiple depends on the same task are the same, all but the
193 	     first one are redundant.  As inout/out come first, if any of them
194 	     is inout/out, it will win, which is the right semantics.  */
195 	  if ((*slot)->task == task)
196 	    {
197 	      task->depend[i].redundant = true;
198 	      continue;
199 	    }
200 	  for (ent = *slot; ent; ent = ent->next)
201 	    {
202 	      if (ent->redundant_out)
203 		break;
204 
205 	      last = ent;
206 
207 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
208 	      if (i >= nout && ent->is_in)
209 		continue;
210 
211 	      if (!ent->is_in)
212 		out = ent;
213 
214 	      struct gomp_task *tsk = ent->task;
215 	      if (tsk->dependers == NULL)
216 		{
217 		  tsk->dependers
218 		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
219 				   + 6 * sizeof (struct gomp_task *));
220 		  tsk->dependers->n_elem = 1;
221 		  tsk->dependers->allocated = 6;
222 		  tsk->dependers->elem[0] = task;
223 		  task->num_dependees++;
224 		  continue;
225 		}
226 	      /* We already have some other dependency on tsk from earlier
227 		 depend clause.  */
228 	      else if (tsk->dependers->n_elem
229 		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230 			   == task))
231 		continue;
232 	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
233 		{
234 		  tsk->dependers->allocated
235 		    = tsk->dependers->allocated * 2 + 2;
236 		  tsk->dependers
237 		    = gomp_realloc (tsk->dependers,
238 				    sizeof (struct gomp_dependers_vec)
239 				    + (tsk->dependers->allocated
240 				       * sizeof (struct gomp_task *)));
241 		}
242 	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243 	      task->num_dependees++;
244 	    }
245 	  task->depend[i].next = *slot;
246 	  (*slot)->prev = &task->depend[i];
247 	}
248       *slot = &task->depend[i];
249 
250       /* There is no need to store more than one depend({,in}out:) task per
251 	 address in the hash table chain for the purpose of creation of
252 	 deferred tasks, because each out depends on all earlier outs, thus it
253 	 is enough to record just the last depend({,in}out:).  For depend(in:),
254 	 we need to keep all of the previous ones not terminated yet, because
255 	 a later depend({,in}out:) might need to depend on all of them.  So, if
256 	 the new task's clause is depend({,in}out:), we know there is at most
257 	 one other depend({,in}out:) clause in the list (out).  For
258 	 non-deferred tasks we want to see all outs, so they are moved to the
259 	 end of the chain, after first redundant_out entry all following
260 	 entries should be redundant_out.  */
261       if (!task->depend[i].is_in && out)
262 	{
263 	  if (out != last)
264 	    {
265 	      out->next->prev = out->prev;
266 	      out->prev->next = out->next;
267 	      out->next = last->next;
268 	      out->prev = last;
269 	      last->next = out;
270 	      if (out->next)
271 		out->next->prev = out;
272 	    }
273 	  out->redundant_out = true;
274 	}
275     }
276 }
277 
278 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
279    false, then we must not delay in executing the task.  If UNTIED is true,
280    then the task may be executed by any member of the team.
281 
282    DEPEND is an array containing:
283 	depend[0]: number of depend elements.
284 	depend[1]: number of depend elements of type "out".
285 	depend[2..N+1]: address of [1..N]th depend element.  */
286 
287 void
288 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
290 	   void **depend, int priority)
291 {
292   struct gomp_thread *thr = gomp_thread ();
293   struct gomp_team *team = thr->ts.team;
294 
295 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297      tied to one thread all the time.  This means UNTIED tasks must be
298      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299      might be running on different thread than FN.  */
300   if (cpyfn)
301     if_clause = false;
302   flags &= ~GOMP_TASK_FLAG_UNTIED;
303 #endif
304 
305   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
306   if (team
307       && (gomp_team_barrier_cancelled (&team->barrier)
308 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309     return;
310 
311   if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312     priority = 0;
313   else if (priority > gomp_max_task_priority_var)
314     priority = gomp_max_task_priority_var;
315 
316   if (!if_clause || team == NULL
317       || (thr->task && thr->task->final_task)
318       || team->task_count > 64 * team->nthreads)
319     {
320       struct gomp_task task;
321 
322       /* If there are depend clauses and earlier deferred sibling tasks
323 	 with depend clauses, check if there isn't a dependency.  If there
324 	 is, we need to wait for them.  There is no need to handle
325 	 depend clauses for non-deferred tasks other than this, because
326 	 the parent task is suspended until the child task finishes and thus
327 	 it can't start further child tasks.  */
328       if ((flags & GOMP_TASK_FLAG_DEPEND)
329 	  && thr->task && thr->task->depend_hash)
330 	gomp_task_maybe_wait_for_dependencies (depend);
331 
332       gomp_init_task (&task, thr->task, gomp_icv (false));
333       task.kind = GOMP_TASK_UNDEFERRED;
334       task.final_task = (thr->task && thr->task->final_task)
335 			|| (flags & GOMP_TASK_FLAG_FINAL);
336       task.priority = priority;
337       if (thr->task)
338 	{
339 	  task.in_tied_task = thr->task->in_tied_task;
340 	  task.taskgroup = thr->task->taskgroup;
341 	}
342       thr->task = &task;
343       if (__builtin_expect (cpyfn != NULL, 0))
344 	{
345 	  char buf[arg_size + arg_align - 1];
346 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347 				& ~(uintptr_t) (arg_align - 1));
348 	  cpyfn (arg, data);
349 	  fn (arg);
350 	}
351       else
352 	fn (data);
353       /* Access to "children" is normally done inside a task_lock
354 	 mutex region, but the only way this particular task.children
355 	 can be set is if this thread's task work function (fn)
356 	 creates children.  So since the setter is *this* thread, we
357 	 need no barriers here when testing for non-NULL.  We can have
358 	 task.children set by the current thread then changed by a
359 	 child thread, but seeing a stale non-NULL value is not a
360 	 problem.  Once past the task_lock acquisition, this thread
361 	 will see the real value of task.children.  */
362       if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
363 	{
364 	  gomp_mutex_lock (&team->task_lock);
365 	  gomp_clear_parent (&task.children_queue);
366 	  gomp_mutex_unlock (&team->task_lock);
367 	}
368       gomp_end_task ();
369     }
370   else
371     {
372       struct gomp_task *task;
373       struct gomp_task *parent = thr->task;
374       struct gomp_taskgroup *taskgroup = parent->taskgroup;
375       char *arg;
376       bool do_wake;
377       size_t depend_size = 0;
378 
379       if (flags & GOMP_TASK_FLAG_DEPEND)
380 	depend_size = ((uintptr_t) depend[0]
381 		       * sizeof (struct gomp_task_depend_entry));
382       task = gomp_malloc (sizeof (*task) + depend_size
383 			  + arg_size + arg_align - 1);
384       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385 		      & ~(uintptr_t) (arg_align - 1));
386       gomp_init_task (task, parent, gomp_icv (false));
387       task->priority = priority;
388       task->kind = GOMP_TASK_UNDEFERRED;
389       task->in_tied_task = parent->in_tied_task;
390       task->taskgroup = taskgroup;
391       thr->task = task;
392       if (cpyfn)
393 	{
394 	  cpyfn (arg, data);
395 	  task->copy_ctors_done = true;
396 	}
397       else
398 	memcpy (arg, data, arg_size);
399       thr->task = parent;
400       task->kind = GOMP_TASK_WAITING;
401       task->fn = fn;
402       task->fn_data = arg;
403       task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404       gomp_mutex_lock (&team->task_lock);
405       /* If parallel or taskgroup has been cancelled, don't start new
406 	 tasks.  */
407       if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408 			     || (taskgroup && taskgroup->cancelled))
409 			    && !task->copy_ctors_done, 0))
410 	{
411 	  gomp_mutex_unlock (&team->task_lock);
412 	  gomp_finish_task (task);
413 	  free (task);
414 	  return;
415 	}
416       if (taskgroup)
417 	taskgroup->num_children++;
418       if (depend_size)
419 	{
420 	  gomp_task_handle_depend (task, parent, depend);
421 	  if (task->num_dependees)
422 	    {
423 	      /* Tasks that depend on other tasks are not put into the
424 		 various waiting queues, so we are done for now.  Said
425 		 tasks are instead put into the queues via
426 		 gomp_task_run_post_handle_dependers() after their
427 		 dependencies have been satisfied.  After which, they
428 		 can be picked up by the various scheduling
429 		 points.  */
430 	      gomp_mutex_unlock (&team->task_lock);
431 	      return;
432 	    }
433 	}
434 
435       priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436 			     task, priority,
437 			     PRIORITY_INSERT_BEGIN,
438 			     /*adjust_parent_depends_on=*/false,
439 			     task->parent_depends_on);
440       if (taskgroup)
441 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442 			       task, priority,
443 			       PRIORITY_INSERT_BEGIN,
444 			       /*adjust_parent_depends_on=*/false,
445 			       task->parent_depends_on);
446 
447       priority_queue_insert (PQ_TEAM, &team->task_queue,
448 			     task, priority,
449 			     PRIORITY_INSERT_END,
450 			     /*adjust_parent_depends_on=*/false,
451 			     task->parent_depends_on);
452 
453       ++team->task_count;
454       ++team->task_queued_count;
455       gomp_team_barrier_set_task_pending (&team->barrier);
456       do_wake = team->task_running_count + !parent->in_tied_task
457 		< team->nthreads;
458       gomp_mutex_unlock (&team->task_lock);
459       if (do_wake)
460 	gomp_team_barrier_wake (&team->barrier, 1);
461     }
462 }
463 
464 ialias (GOMP_taskgroup_start)
465 ialias (GOMP_taskgroup_end)
466 
467 #define TYPE long
468 #define UTYPE unsigned long
469 #define TYPE_is_long 1
470 #include "taskloop.c"
471 #undef TYPE
472 #undef UTYPE
473 #undef TYPE_is_long
474 
475 #define TYPE unsigned long long
476 #define UTYPE TYPE
477 #define GOMP_taskloop GOMP_taskloop_ull
478 #include "taskloop.c"
479 #undef TYPE
480 #undef UTYPE
481 #undef GOMP_taskloop
482 
483 static void inline
484 priority_queue_move_task_first (enum priority_queue_type type,
485 				struct priority_queue *head,
486 				struct gomp_task *task)
487 {
488 #if _LIBGOMP_CHECKING_
489   if (!priority_queue_task_in_queue_p (type, head, task))
490     gomp_fatal ("Attempt to move first missing task %p", task);
491 #endif
492   struct priority_list *list;
493   if (priority_queue_multi_p (head))
494     {
495       list = priority_queue_lookup_priority (head, task->priority);
496 #if _LIBGOMP_CHECKING_
497       if (!list)
498 	gomp_fatal ("Unable to find priority %d", task->priority);
499 #endif
500     }
501   else
502     list = &head->l;
503   priority_list_remove (list, task_to_priority_node (type, task), 0);
504   priority_list_insert (type, list, task, task->priority,
505 			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506 			task->parent_depends_on);
507 }
508 
509 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510    with team->task_lock held, or is executed in the thread that called
511    gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512    run before it acquires team->task_lock.  */
513 
514 static void
515 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
516 {
517   struct gomp_task *parent = task->parent;
518   if (parent)
519     priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520 				    task);
521 
522   struct gomp_taskgroup *taskgroup = task->taskgroup;
523   if (taskgroup)
524     priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525 				    task);
526 
527   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528 			 PRIORITY_INSERT_BEGIN, false,
529 			 task->parent_depends_on);
530   task->kind = GOMP_TASK_WAITING;
531   if (parent && parent->taskwait)
532     {
533       if (parent->taskwait->in_taskwait)
534 	{
535 	  /* One more task has had its dependencies met.
536 	     Inform any waiters.  */
537 	  parent->taskwait->in_taskwait = false;
538 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
539 	}
540       else if (parent->taskwait->in_depend_wait)
541 	{
542 	  /* One more task has had its dependencies met.
543 	     Inform any waiters.  */
544 	  parent->taskwait->in_depend_wait = false;
545 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
546 	}
547     }
548   if (taskgroup && taskgroup->in_taskgroup_wait)
549     {
550       /* One more task has had its dependencies met.
551 	 Inform any waiters.  */
552       taskgroup->in_taskgroup_wait = false;
553       gomp_sem_post (&taskgroup->taskgroup_sem);
554     }
555 
556   ++team->task_queued_count;
557   gomp_team_barrier_set_task_pending (&team->barrier);
558   /* I'm afraid this can't be done after releasing team->task_lock,
559      as gomp_target_task_completion is run from unrelated thread and
560      therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561      the team could be gone already.  */
562   if (team->nthreads > team->task_running_count)
563     gomp_team_barrier_wake (&team->barrier, 1);
564 }
565 
566 /* Signal that a target task TTASK has completed the asynchronously
567    running phase and should be requeued as a task to handle the
568    variable unmapping.  */
569 
570 void
571 GOMP_PLUGIN_target_task_completion (void *data)
572 {
573   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574   struct gomp_task *task = ttask->task;
575   struct gomp_team *team = ttask->team;
576 
577   gomp_mutex_lock (&team->task_lock);
578   if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
579     {
580       ttask->state = GOMP_TARGET_TASK_FINISHED;
581       gomp_mutex_unlock (&team->task_lock);
582       return;
583     }
584   ttask->state = GOMP_TARGET_TASK_FINISHED;
585   gomp_target_task_completion (team, task);
586   gomp_mutex_unlock (&team->task_lock);
587 }
588 
589 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
590 
591 /* Called for nowait target tasks.  */
592 
593 bool
594 gomp_create_target_task (struct gomp_device_descr *devicep,
595 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
596 			 size_t *sizes, unsigned short *kinds,
597 			 unsigned int flags, void **depend, void **args,
598 			 enum gomp_target_task_state state)
599 {
600   struct gomp_thread *thr = gomp_thread ();
601   struct gomp_team *team = thr->ts.team;
602 
603   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
604   if (team
605       && (gomp_team_barrier_cancelled (&team->barrier)
606 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
607     return true;
608 
609   struct gomp_target_task *ttask;
610   struct gomp_task *task;
611   struct gomp_task *parent = thr->task;
612   struct gomp_taskgroup *taskgroup = parent->taskgroup;
613   bool do_wake;
614   size_t depend_size = 0;
615   uintptr_t depend_cnt = 0;
616   size_t tgt_align = 0, tgt_size = 0;
617 
618   if (depend != NULL)
619     {
620       depend_cnt = (uintptr_t) depend[0];
621       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
622     }
623   if (fn)
624     {
625       /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
626 	 firstprivate on the target task.  */
627       size_t i;
628       for (i = 0; i < mapnum; i++)
629 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
630 	  {
631 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
632 	    if (tgt_align < align)
633 	      tgt_align = align;
634 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
635 	    tgt_size += sizes[i];
636 	  }
637       if (tgt_align)
638 	tgt_size += tgt_align - 1;
639       else
640 	tgt_size = 0;
641     }
642 
643   task = gomp_malloc (sizeof (*task) + depend_size
644 		      + sizeof (*ttask)
645 		      + mapnum * (sizeof (void *) + sizeof (size_t)
646 				  + sizeof (unsigned short))
647 		      + tgt_size);
648   gomp_init_task (task, parent, gomp_icv (false));
649   task->priority = 0;
650   task->kind = GOMP_TASK_WAITING;
651   task->in_tied_task = parent->in_tied_task;
652   task->taskgroup = taskgroup;
653   ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
654   ttask->devicep = devicep;
655   ttask->fn = fn;
656   ttask->mapnum = mapnum;
657   ttask->args = args;
658   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
659   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
660   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
661   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
662   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
663   if (tgt_align)
664     {
665       char *tgt = (char *) &ttask->kinds[mapnum];
666       size_t i;
667       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
668       if (al)
669 	tgt += tgt_align - al;
670       tgt_size = 0;
671       for (i = 0; i < mapnum; i++)
672 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
673 	  {
674 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
675 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
676 	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
677 	    ttask->hostaddrs[i] = tgt + tgt_size;
678 	    tgt_size = tgt_size + sizes[i];
679 	  }
680     }
681   ttask->flags = flags;
682   ttask->state = state;
683   ttask->task = task;
684   ttask->team = team;
685   task->fn = NULL;
686   task->fn_data = ttask;
687   task->final_task = 0;
688   gomp_mutex_lock (&team->task_lock);
689   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
690   if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
691 			|| (taskgroup && taskgroup->cancelled), 0))
692     {
693       gomp_mutex_unlock (&team->task_lock);
694       gomp_finish_task (task);
695       free (task);
696       return true;
697     }
698   if (depend_size)
699     {
700       gomp_task_handle_depend (task, parent, depend);
701       if (task->num_dependees)
702 	{
703 	  if (taskgroup)
704 	    taskgroup->num_children++;
705 	  gomp_mutex_unlock (&team->task_lock);
706 	  return true;
707 	}
708     }
709   if (state == GOMP_TARGET_TASK_DATA)
710     {
711       gomp_task_run_post_handle_depend_hash (task);
712       gomp_mutex_unlock (&team->task_lock);
713       gomp_finish_task (task);
714       free (task);
715       return false;
716     }
717   if (taskgroup)
718     taskgroup->num_children++;
719   /* For async offloading, if we don't need to wait for dependencies,
720      run the gomp_target_task_fn right away, essentially schedule the
721      mapping part of the task in the current thread.  */
722   if (devicep != NULL
723       && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
724     {
725       priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
726 			     PRIORITY_INSERT_END,
727 			     /*adjust_parent_depends_on=*/false,
728 			     task->parent_depends_on);
729       if (taskgroup)
730 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
731 			       task, 0, PRIORITY_INSERT_END,
732 			       /*adjust_parent_depends_on=*/false,
733 			       task->parent_depends_on);
734       task->pnode[PQ_TEAM].next = NULL;
735       task->pnode[PQ_TEAM].prev = NULL;
736       task->kind = GOMP_TASK_TIED;
737       ++team->task_count;
738       gomp_mutex_unlock (&team->task_lock);
739 
740       thr->task = task;
741       gomp_target_task_fn (task->fn_data);
742       thr->task = parent;
743 
744       gomp_mutex_lock (&team->task_lock);
745       task->kind = GOMP_TASK_ASYNC_RUNNING;
746       /* If GOMP_PLUGIN_target_task_completion has run already
747 	 in between gomp_target_task_fn and the mutex lock,
748 	 perform the requeuing here.  */
749       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
750 	gomp_target_task_completion (team, task);
751       else
752 	ttask->state = GOMP_TARGET_TASK_RUNNING;
753       gomp_mutex_unlock (&team->task_lock);
754       return true;
755     }
756   priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
757 			 PRIORITY_INSERT_BEGIN,
758 			 /*adjust_parent_depends_on=*/false,
759 			 task->parent_depends_on);
760   if (taskgroup)
761     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
762 			   PRIORITY_INSERT_BEGIN,
763 			   /*adjust_parent_depends_on=*/false,
764 			   task->parent_depends_on);
765   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
766 			 PRIORITY_INSERT_END,
767 			 /*adjust_parent_depends_on=*/false,
768 			 task->parent_depends_on);
769   ++team->task_count;
770   ++team->task_queued_count;
771   gomp_team_barrier_set_task_pending (&team->barrier);
772   do_wake = team->task_running_count + !parent->in_tied_task
773 	    < team->nthreads;
774   gomp_mutex_unlock (&team->task_lock);
775   if (do_wake)
776     gomp_team_barrier_wake (&team->barrier, 1);
777   return true;
778 }
779 
780 /* Given a parent_depends_on task in LIST, move it to the front of its
781    priority so it is run as soon as possible.
782 
783    Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
784 
785    We rearrange the queue such that all parent_depends_on tasks are
786    first, and last_parent_depends_on points to the last such task we
787    rearranged.  For example, given the following tasks in a queue
788    where PD[123] are the parent_depends_on tasks:
789 
790 	task->children
791 	|
792 	V
793 	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
794 
795 	We rearrange such that:
796 
797 	task->children
798 	|	       +--- last_parent_depends_on
799 	|	       |
800 	V	       V
801 	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
802 
803 static void inline
804 priority_list_upgrade_task (struct priority_list *list,
805 			    struct priority_node *node)
806 {
807   struct priority_node *last_parent_depends_on
808     = list->last_parent_depends_on;
809   if (last_parent_depends_on)
810     {
811       node->prev->next = node->next;
812       node->next->prev = node->prev;
813       node->prev = last_parent_depends_on;
814       node->next = last_parent_depends_on->next;
815       node->prev->next = node;
816       node->next->prev = node;
817     }
818   else if (node != list->tasks)
819     {
820       node->prev->next = node->next;
821       node->next->prev = node->prev;
822       node->prev = list->tasks->prev;
823       node->next = list->tasks;
824       list->tasks = node;
825       node->prev->next = node;
826       node->next->prev = node;
827     }
828   list->last_parent_depends_on = node;
829 }
830 
831 /* Given a parent_depends_on TASK in its parent's children_queue, move
832    it to the front of its priority so it is run as soon as possible.
833 
834    PARENT is passed as an optimization.
835 
836    (This function could be defined in priority_queue.c, but we want it
837    inlined, and putting it in priority_queue.h is not an option, given
838    that gomp_task has not been properly defined at that point).  */
839 
840 static void inline
841 priority_queue_upgrade_task (struct gomp_task *task,
842 			     struct gomp_task *parent)
843 {
844   struct priority_queue *head = &parent->children_queue;
845   struct priority_node *node = &task->pnode[PQ_CHILDREN];
846 #if _LIBGOMP_CHECKING_
847   if (!task->parent_depends_on)
848     gomp_fatal ("priority_queue_upgrade_task: task must be a "
849 		"parent_depends_on task");
850   if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
851     gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
852 #endif
853   if (priority_queue_multi_p (head))
854     {
855       struct priority_list *list
856 	= priority_queue_lookup_priority (head, task->priority);
857       priority_list_upgrade_task (list, node);
858     }
859   else
860     priority_list_upgrade_task (&head->l, node);
861 }
862 
863 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
864    the way in LIST so that other tasks can be considered for
865    execution.  LIST contains tasks of type TYPE.
866 
867    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
868    if applicable.  */
869 
870 static void inline
871 priority_list_downgrade_task (enum priority_queue_type type,
872 			      struct priority_list *list,
873 			      struct gomp_task *child_task)
874 {
875   struct priority_node *node = task_to_priority_node (type, child_task);
876   if (list->tasks == node)
877     list->tasks = node->next;
878   else if (node->next != list->tasks)
879     {
880       /* The task in NODE is about to become TIED and TIED tasks
881 	 cannot come before WAITING tasks.  If we're about to
882 	 leave the queue in such an indeterminate state, rewire
883 	 things appropriately.  However, a TIED task at the end is
884 	 perfectly fine.  */
885       struct gomp_task *next_task = priority_node_to_task (type, node->next);
886       if (next_task->kind == GOMP_TASK_WAITING)
887 	{
888 	  /* Remove from list.  */
889 	  node->prev->next = node->next;
890 	  node->next->prev = node->prev;
891 	  /* Rewire at the end.  */
892 	  node->next = list->tasks;
893 	  node->prev = list->tasks->prev;
894 	  list->tasks->prev->next = node;
895 	  list->tasks->prev = node;
896 	}
897     }
898 
899   /* If the current task is the last_parent_depends_on for its
900      priority, adjust last_parent_depends_on appropriately.  */
901   if (__builtin_expect (child_task->parent_depends_on, 0)
902       && list->last_parent_depends_on == node)
903     {
904       struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
905       if (node->prev != node
906 	  && prev_child->kind == GOMP_TASK_WAITING
907 	  && prev_child->parent_depends_on)
908 	list->last_parent_depends_on = node->prev;
909       else
910 	{
911 	  /* There are no more parent_depends_on entries waiting
912 	     to run, clear the list.  */
913 	  list->last_parent_depends_on = NULL;
914 	}
915     }
916 }
917 
918 /* Given a TASK in HEAD that is about to be executed, move it out of
919    the way so that other tasks can be considered for execution.  HEAD
920    contains tasks of type TYPE.
921 
922    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
923    if applicable.
924 
925    (This function could be defined in priority_queue.c, but we want it
926    inlined, and putting it in priority_queue.h is not an option, given
927    that gomp_task has not been properly defined at that point).  */
928 
929 static void inline
930 priority_queue_downgrade_task (enum priority_queue_type type,
931 			       struct priority_queue *head,
932 			       struct gomp_task *task)
933 {
934 #if _LIBGOMP_CHECKING_
935   if (!priority_queue_task_in_queue_p (type, head, task))
936     gomp_fatal ("Attempt to downgrade missing task %p", task);
937 #endif
938   if (priority_queue_multi_p (head))
939     {
940       struct priority_list *list
941 	= priority_queue_lookup_priority (head, task->priority);
942       priority_list_downgrade_task (type, list, task);
943     }
944   else
945     priority_list_downgrade_task (type, &head->l, task);
946 }
947 
948 /* Setup CHILD_TASK to execute.  This is done by setting the task to
949    TIED, and updating all relevant queues so that CHILD_TASK is no
950    longer chosen for scheduling.  Also, remove CHILD_TASK from the
951    overall team task queue entirely.
952 
953    Return TRUE if task or its containing taskgroup has been
954    cancelled.  */
955 
956 static inline bool
957 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
958 		   struct gomp_team *team)
959 {
960 #if _LIBGOMP_CHECKING_
961   if (child_task->parent)
962     priority_queue_verify (PQ_CHILDREN,
963 			   &child_task->parent->children_queue, true);
964   if (child_task->taskgroup)
965     priority_queue_verify (PQ_TASKGROUP,
966 			   &child_task->taskgroup->taskgroup_queue, false);
967   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
968 #endif
969 
970   /* Task is about to go tied, move it out of the way.  */
971   if (parent)
972     priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
973 				   child_task);
974 
975   /* Task is about to go tied, move it out of the way.  */
976   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
977   if (taskgroup)
978     priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
979 				   child_task);
980 
981   priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
982 			 MEMMODEL_RELAXED);
983   child_task->pnode[PQ_TEAM].next = NULL;
984   child_task->pnode[PQ_TEAM].prev = NULL;
985   child_task->kind = GOMP_TASK_TIED;
986 
987   if (--team->task_queued_count == 0)
988     gomp_team_barrier_clear_task_pending (&team->barrier);
989   if ((gomp_team_barrier_cancelled (&team->barrier)
990        || (taskgroup && taskgroup->cancelled))
991       && !child_task->copy_ctors_done)
992     return true;
993   return false;
994 }
995 
996 static void
997 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
998 {
999   struct gomp_task *parent = child_task->parent;
1000   size_t i;
1001 
1002   for (i = 0; i < child_task->depend_count; i++)
1003     if (!child_task->depend[i].redundant)
1004       {
1005 	if (child_task->depend[i].next)
1006 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
1007 	if (child_task->depend[i].prev)
1008 	  child_task->depend[i].prev->next = child_task->depend[i].next;
1009 	else
1010 	  {
1011 	    hash_entry_type *slot
1012 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1013 				NO_INSERT);
1014 	    if (*slot != &child_task->depend[i])
1015 	      abort ();
1016 	    if (child_task->depend[i].next)
1017 	      *slot = child_task->depend[i].next;
1018 	    else
1019 	      htab_clear_slot (parent->depend_hash, slot);
1020 	  }
1021       }
1022 }
1023 
1024 /* After a CHILD_TASK has been run, adjust the dependency queue for
1025    each task that depends on CHILD_TASK, to record the fact that there
1026    is one less dependency to worry about.  If a task that depended on
1027    CHILD_TASK now has no dependencies, place it in the various queues
1028    so it gets scheduled to run.
1029 
1030    TEAM is the team to which CHILD_TASK belongs to.  */
1031 
1032 static size_t
1033 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1034 				     struct gomp_team *team)
1035 {
1036   struct gomp_task *parent = child_task->parent;
1037   size_t i, count = child_task->dependers->n_elem, ret = 0;
1038   for (i = 0; i < count; i++)
1039     {
1040       struct gomp_task *task = child_task->dependers->elem[i];
1041 
1042       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
1043 	 TASK's remaining dependencies.  Once TASK has no other
1044 	 depenencies, put it into the various queues so it will get
1045 	 scheduled for execution.  */
1046       if (--task->num_dependees != 0)
1047 	continue;
1048 
1049       struct gomp_taskgroup *taskgroup = task->taskgroup;
1050       if (parent)
1051 	{
1052 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1053 				 task, task->priority,
1054 				 PRIORITY_INSERT_BEGIN,
1055 				 /*adjust_parent_depends_on=*/true,
1056 				 task->parent_depends_on);
1057 	  if (parent->taskwait)
1058 	    {
1059 	      if (parent->taskwait->in_taskwait)
1060 		{
1061 		  /* One more task has had its dependencies met.
1062 		     Inform any waiters.  */
1063 		  parent->taskwait->in_taskwait = false;
1064 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1065 		}
1066 	      else if (parent->taskwait->in_depend_wait)
1067 		{
1068 		  /* One more task has had its dependencies met.
1069 		     Inform any waiters.  */
1070 		  parent->taskwait->in_depend_wait = false;
1071 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1072 		}
1073 	    }
1074 	}
1075       if (taskgroup)
1076 	{
1077 	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1078 				 task, task->priority,
1079 				 PRIORITY_INSERT_BEGIN,
1080 				 /*adjust_parent_depends_on=*/false,
1081 				 task->parent_depends_on);
1082 	  if (taskgroup->in_taskgroup_wait)
1083 	    {
1084 	      /* One more task has had its dependencies met.
1085 		 Inform any waiters.  */
1086 	      taskgroup->in_taskgroup_wait = false;
1087 	      gomp_sem_post (&taskgroup->taskgroup_sem);
1088 	    }
1089 	}
1090       priority_queue_insert (PQ_TEAM, &team->task_queue,
1091 			     task, task->priority,
1092 			     PRIORITY_INSERT_END,
1093 			     /*adjust_parent_depends_on=*/false,
1094 			     task->parent_depends_on);
1095       ++team->task_count;
1096       ++team->task_queued_count;
1097       ++ret;
1098     }
1099   free (child_task->dependers);
1100   child_task->dependers = NULL;
1101   if (ret > 1)
1102     gomp_team_barrier_set_task_pending (&team->barrier);
1103   return ret;
1104 }
1105 
1106 static inline size_t
1107 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1108 				  struct gomp_team *team)
1109 {
1110   if (child_task->depend_count == 0)
1111     return 0;
1112 
1113   /* If parent is gone already, the hash table is freed and nothing
1114      will use the hash table anymore, no need to remove anything from it.  */
1115   if (child_task->parent != NULL)
1116     gomp_task_run_post_handle_depend_hash (child_task);
1117 
1118   if (child_task->dependers == NULL)
1119     return 0;
1120 
1121   return gomp_task_run_post_handle_dependers (child_task, team);
1122 }
1123 
1124 /* Remove CHILD_TASK from its parent.  */
1125 
1126 static inline void
1127 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1128 {
1129   struct gomp_task *parent = child_task->parent;
1130   if (parent == NULL)
1131     return;
1132 
1133   /* If this was the last task the parent was depending on,
1134      synchronize with gomp_task_maybe_wait_for_dependencies so it can
1135      clean up and return.  */
1136   if (__builtin_expect (child_task->parent_depends_on, 0)
1137       && --parent->taskwait->n_depend == 0
1138       && parent->taskwait->in_depend_wait)
1139     {
1140       parent->taskwait->in_depend_wait = false;
1141       gomp_sem_post (&parent->taskwait->taskwait_sem);
1142     }
1143 
1144   if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1145 			     child_task, MEMMODEL_RELEASE)
1146       && parent->taskwait && parent->taskwait->in_taskwait)
1147     {
1148       parent->taskwait->in_taskwait = false;
1149       gomp_sem_post (&parent->taskwait->taskwait_sem);
1150     }
1151   child_task->pnode[PQ_CHILDREN].next = NULL;
1152   child_task->pnode[PQ_CHILDREN].prev = NULL;
1153 }
1154 
1155 /* Remove CHILD_TASK from its taskgroup.  */
1156 
1157 static inline void
1158 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1159 {
1160   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1161   if (taskgroup == NULL)
1162     return;
1163   bool empty = priority_queue_remove (PQ_TASKGROUP,
1164 				      &taskgroup->taskgroup_queue,
1165 				      child_task, MEMMODEL_RELAXED);
1166   child_task->pnode[PQ_TASKGROUP].next = NULL;
1167   child_task->pnode[PQ_TASKGROUP].prev = NULL;
1168   if (taskgroup->num_children > 1)
1169     --taskgroup->num_children;
1170   else
1171     {
1172       /* We access taskgroup->num_children in GOMP_taskgroup_end
1173 	 outside of the task lock mutex region, so
1174 	 need a release barrier here to ensure memory
1175 	 written by child_task->fn above is flushed
1176 	 before the NULL is written.  */
1177       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1178     }
1179   if (empty && taskgroup->in_taskgroup_wait)
1180     {
1181       taskgroup->in_taskgroup_wait = false;
1182       gomp_sem_post (&taskgroup->taskgroup_sem);
1183     }
1184 }
1185 
1186 void
1187 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1188 {
1189   struct gomp_thread *thr = gomp_thread ();
1190   struct gomp_team *team = thr->ts.team;
1191   struct gomp_task *task = thr->task;
1192   struct gomp_task *child_task = NULL;
1193   struct gomp_task *to_free = NULL;
1194   int do_wake = 0;
1195 
1196   gomp_mutex_lock (&team->task_lock);
1197   if (gomp_barrier_last_thread (state))
1198     {
1199       if (team->task_count == 0)
1200 	{
1201 	  gomp_team_barrier_done (&team->barrier, state);
1202 	  gomp_mutex_unlock (&team->task_lock);
1203 	  gomp_team_barrier_wake (&team->barrier, 0);
1204 	  return;
1205 	}
1206       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1207     }
1208 
1209   while (1)
1210     {
1211       bool cancelled = false;
1212       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1213 	{
1214 	  bool ignored;
1215 	  child_task
1216 	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1217 					PQ_IGNORED, NULL,
1218 					&ignored);
1219 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
1220 					 team);
1221 	  if (__builtin_expect (cancelled, 0))
1222 	    {
1223 	      if (to_free)
1224 		{
1225 		  gomp_finish_task (to_free);
1226 		  free (to_free);
1227 		  to_free = NULL;
1228 		}
1229 	      goto finish_cancelled;
1230 	    }
1231 	  team->task_running_count++;
1232 	  child_task->in_tied_task = true;
1233 	}
1234       gomp_mutex_unlock (&team->task_lock);
1235       if (do_wake)
1236 	{
1237 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1238 	  do_wake = 0;
1239 	}
1240       if (to_free)
1241 	{
1242 	  gomp_finish_task (to_free);
1243 	  free (to_free);
1244 	  to_free = NULL;
1245 	}
1246       if (child_task)
1247 	{
1248 	  thr->task = child_task;
1249 	  if (__builtin_expect (child_task->fn == NULL, 0))
1250 	    {
1251 	      if (gomp_target_task_fn (child_task->fn_data))
1252 		{
1253 		  thr->task = task;
1254 		  gomp_mutex_lock (&team->task_lock);
1255 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1256 		  team->task_running_count--;
1257 		  struct gomp_target_task *ttask
1258 		    = (struct gomp_target_task *) child_task->fn_data;
1259 		  /* If GOMP_PLUGIN_target_task_completion has run already
1260 		     in between gomp_target_task_fn and the mutex lock,
1261 		     perform the requeuing here.  */
1262 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1263 		    gomp_target_task_completion (team, child_task);
1264 		  else
1265 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1266 		  child_task = NULL;
1267 		  continue;
1268 		}
1269 	    }
1270 	  else
1271 	    child_task->fn (child_task->fn_data);
1272 	  thr->task = task;
1273 	}
1274       else
1275 	return;
1276       gomp_mutex_lock (&team->task_lock);
1277       if (child_task)
1278 	{
1279 	 finish_cancelled:;
1280 	  size_t new_tasks
1281 	    = gomp_task_run_post_handle_depend (child_task, team);
1282 	  gomp_task_run_post_remove_parent (child_task);
1283 	  gomp_clear_parent (&child_task->children_queue);
1284 	  gomp_task_run_post_remove_taskgroup (child_task);
1285 	  to_free = child_task;
1286 	  child_task = NULL;
1287 	  if (!cancelled)
1288 	    team->task_running_count--;
1289 	  if (new_tasks > 1)
1290 	    {
1291 	      do_wake = team->nthreads - team->task_running_count;
1292 	      if (do_wake > new_tasks)
1293 		do_wake = new_tasks;
1294 	    }
1295 	  if (--team->task_count == 0
1296 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1297 	    {
1298 	      gomp_team_barrier_done (&team->barrier, state);
1299 	      gomp_mutex_unlock (&team->task_lock);
1300 	      gomp_team_barrier_wake (&team->barrier, 0);
1301 	      gomp_mutex_lock (&team->task_lock);
1302 	    }
1303 	}
1304     }
1305 }
1306 
1307 /* Called when encountering a taskwait directive.
1308 
1309    Wait for all children of the current task.  */
1310 
1311 void
1312 GOMP_taskwait (void)
1313 {
1314   struct gomp_thread *thr = gomp_thread ();
1315   struct gomp_team *team = thr->ts.team;
1316   struct gomp_task *task = thr->task;
1317   struct gomp_task *child_task = NULL;
1318   struct gomp_task *to_free = NULL;
1319   struct gomp_taskwait taskwait;
1320   int do_wake = 0;
1321 
1322   /* The acquire barrier on load of task->children here synchronizes
1323      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
1324      not necessary that we synchronize with other non-NULL writes at
1325      this point, but we must ensure that all writes to memory by a
1326      child thread task work function are seen before we exit from
1327      GOMP_taskwait.  */
1328   if (task == NULL
1329       || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1330     return;
1331 
1332   memset (&taskwait, 0, sizeof (taskwait));
1333   bool child_q = false;
1334   gomp_mutex_lock (&team->task_lock);
1335   while (1)
1336     {
1337       bool cancelled = false;
1338       if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1339 	{
1340 	  bool destroy_taskwait = task->taskwait != NULL;
1341 	  task->taskwait = NULL;
1342 	  gomp_mutex_unlock (&team->task_lock);
1343 	  if (to_free)
1344 	    {
1345 	      gomp_finish_task (to_free);
1346 	      free (to_free);
1347 	    }
1348 	  if (destroy_taskwait)
1349 	    gomp_sem_destroy (&taskwait.taskwait_sem);
1350 	  return;
1351 	}
1352       struct gomp_task *next_task
1353 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1354 				    PQ_TEAM, &team->task_queue, &child_q);
1355       if (next_task->kind == GOMP_TASK_WAITING)
1356 	{
1357 	  child_task = next_task;
1358 	  cancelled
1359 	    = gomp_task_run_pre (child_task, task, team);
1360 	  if (__builtin_expect (cancelled, 0))
1361 	    {
1362 	      if (to_free)
1363 		{
1364 		  gomp_finish_task (to_free);
1365 		  free (to_free);
1366 		  to_free = NULL;
1367 		}
1368 	      goto finish_cancelled;
1369 	    }
1370 	}
1371       else
1372 	{
1373 	/* All tasks we are waiting for are either running in other
1374 	   threads, or they are tasks that have not had their
1375 	   dependencies met (so they're not even in the queue).  Wait
1376 	   for them.  */
1377 	  if (task->taskwait == NULL)
1378 	    {
1379 	      taskwait.in_depend_wait = false;
1380 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
1381 	      task->taskwait = &taskwait;
1382 	    }
1383 	  taskwait.in_taskwait = true;
1384 	}
1385       gomp_mutex_unlock (&team->task_lock);
1386       if (do_wake)
1387 	{
1388 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1389 	  do_wake = 0;
1390 	}
1391       if (to_free)
1392 	{
1393 	  gomp_finish_task (to_free);
1394 	  free (to_free);
1395 	  to_free = NULL;
1396 	}
1397       if (child_task)
1398 	{
1399 	  thr->task = child_task;
1400 	  if (__builtin_expect (child_task->fn == NULL, 0))
1401 	    {
1402 	      if (gomp_target_task_fn (child_task->fn_data))
1403 		{
1404 		  thr->task = task;
1405 		  gomp_mutex_lock (&team->task_lock);
1406 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1407 		  struct gomp_target_task *ttask
1408 		    = (struct gomp_target_task *) child_task->fn_data;
1409 		  /* If GOMP_PLUGIN_target_task_completion has run already
1410 		     in between gomp_target_task_fn and the mutex lock,
1411 		     perform the requeuing here.  */
1412 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1413 		    gomp_target_task_completion (team, child_task);
1414 		  else
1415 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1416 		  child_task = NULL;
1417 		  continue;
1418 		}
1419 	    }
1420 	  else
1421 	    child_task->fn (child_task->fn_data);
1422 	  thr->task = task;
1423 	}
1424       else
1425 	gomp_sem_wait (&taskwait.taskwait_sem);
1426       gomp_mutex_lock (&team->task_lock);
1427       if (child_task)
1428 	{
1429 	 finish_cancelled:;
1430 	  size_t new_tasks
1431 	    = gomp_task_run_post_handle_depend (child_task, team);
1432 
1433 	  if (child_q)
1434 	    {
1435 	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1436 				     child_task, MEMMODEL_RELAXED);
1437 	      child_task->pnode[PQ_CHILDREN].next = NULL;
1438 	      child_task->pnode[PQ_CHILDREN].prev = NULL;
1439 	    }
1440 
1441 	  gomp_clear_parent (&child_task->children_queue);
1442 
1443 	  gomp_task_run_post_remove_taskgroup (child_task);
1444 
1445 	  to_free = child_task;
1446 	  child_task = NULL;
1447 	  team->task_count--;
1448 	  if (new_tasks > 1)
1449 	    {
1450 	      do_wake = team->nthreads - team->task_running_count
1451 			- !task->in_tied_task;
1452 	      if (do_wake > new_tasks)
1453 		do_wake = new_tasks;
1454 	    }
1455 	}
1456     }
1457 }
1458 
1459 /* An undeferred task is about to run.  Wait for all tasks that this
1460    undeferred task depends on.
1461 
1462    This is done by first putting all known ready dependencies
1463    (dependencies that have their own dependencies met) at the top of
1464    the scheduling queues.  Then we iterate through these imminently
1465    ready tasks (and possibly other high priority tasks), and run them.
1466    If we run out of ready dependencies to execute, we either wait for
1467    the reamining dependencies to finish, or wait for them to get
1468    scheduled so we can run them.
1469 
1470    DEPEND is as in GOMP_task.  */
1471 
1472 void
1473 gomp_task_maybe_wait_for_dependencies (void **depend)
1474 {
1475   struct gomp_thread *thr = gomp_thread ();
1476   struct gomp_task *task = thr->task;
1477   struct gomp_team *team = thr->ts.team;
1478   struct gomp_task_depend_entry elem, *ent = NULL;
1479   struct gomp_taskwait taskwait;
1480   size_t ndepend = (uintptr_t) depend[0];
1481   size_t nout = (uintptr_t) depend[1];
1482   size_t i;
1483   size_t num_awaited = 0;
1484   struct gomp_task *child_task = NULL;
1485   struct gomp_task *to_free = NULL;
1486   int do_wake = 0;
1487 
1488   gomp_mutex_lock (&team->task_lock);
1489   for (i = 0; i < ndepend; i++)
1490     {
1491       elem.addr = depend[i + 2];
1492       ent = htab_find (task->depend_hash, &elem);
1493       for (; ent; ent = ent->next)
1494 	if (i >= nout && ent->is_in)
1495 	  continue;
1496 	else
1497 	  {
1498 	    struct gomp_task *tsk = ent->task;
1499 	    if (!tsk->parent_depends_on)
1500 	      {
1501 		tsk->parent_depends_on = true;
1502 		++num_awaited;
1503 		/* If depenency TSK itself has no dependencies and is
1504 		   ready to run, move it up front so that we run it as
1505 		   soon as possible.  */
1506 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1507 		  priority_queue_upgrade_task (tsk, task);
1508 	      }
1509 	  }
1510     }
1511   if (num_awaited == 0)
1512     {
1513       gomp_mutex_unlock (&team->task_lock);
1514       return;
1515     }
1516 
1517   memset (&taskwait, 0, sizeof (taskwait));
1518   taskwait.n_depend = num_awaited;
1519   gomp_sem_init (&taskwait.taskwait_sem, 0);
1520   task->taskwait = &taskwait;
1521 
1522   while (1)
1523     {
1524       bool cancelled = false;
1525       if (taskwait.n_depend == 0)
1526 	{
1527 	  task->taskwait = NULL;
1528 	  gomp_mutex_unlock (&team->task_lock);
1529 	  if (to_free)
1530 	    {
1531 	      gomp_finish_task (to_free);
1532 	      free (to_free);
1533 	    }
1534 	  gomp_sem_destroy (&taskwait.taskwait_sem);
1535 	  return;
1536 	}
1537 
1538       /* Theoretically when we have multiple priorities, we should
1539 	 chose between the highest priority item in
1540 	 task->children_queue and team->task_queue here, so we should
1541 	 use priority_queue_next_task().  However, since we are
1542 	 running an undeferred task, perhaps that makes all tasks it
1543 	 depends on undeferred, thus a priority of INF?  This would
1544 	 make it unnecessary to take anything into account here,
1545 	 but the dependencies.
1546 
1547 	 On the other hand, if we want to use priority_queue_next_task(),
1548 	 care should be taken to only use priority_queue_remove()
1549 	 below if the task was actually removed from the children
1550 	 queue.  */
1551       bool ignored;
1552       struct gomp_task *next_task
1553 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1554 				    PQ_IGNORED, NULL, &ignored);
1555 
1556       if (next_task->kind == GOMP_TASK_WAITING)
1557 	{
1558 	  child_task = next_task;
1559 	  cancelled
1560 	    = gomp_task_run_pre (child_task, task, team);
1561 	  if (__builtin_expect (cancelled, 0))
1562 	    {
1563 	      if (to_free)
1564 		{
1565 		  gomp_finish_task (to_free);
1566 		  free (to_free);
1567 		  to_free = NULL;
1568 		}
1569 	      goto finish_cancelled;
1570 	    }
1571 	}
1572       else
1573 	/* All tasks we are waiting for are either running in other
1574 	   threads, or they are tasks that have not had their
1575 	   dependencies met (so they're not even in the queue).  Wait
1576 	   for them.  */
1577 	taskwait.in_depend_wait = true;
1578       gomp_mutex_unlock (&team->task_lock);
1579       if (do_wake)
1580 	{
1581 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1582 	  do_wake = 0;
1583 	}
1584       if (to_free)
1585 	{
1586 	  gomp_finish_task (to_free);
1587 	  free (to_free);
1588 	  to_free = NULL;
1589 	}
1590       if (child_task)
1591 	{
1592 	  thr->task = child_task;
1593 	  if (__builtin_expect (child_task->fn == NULL, 0))
1594 	    {
1595 	      if (gomp_target_task_fn (child_task->fn_data))
1596 		{
1597 		  thr->task = task;
1598 		  gomp_mutex_lock (&team->task_lock);
1599 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1600 		  struct gomp_target_task *ttask
1601 		    = (struct gomp_target_task *) child_task->fn_data;
1602 		  /* If GOMP_PLUGIN_target_task_completion has run already
1603 		     in between gomp_target_task_fn and the mutex lock,
1604 		     perform the requeuing here.  */
1605 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1606 		    gomp_target_task_completion (team, child_task);
1607 		  else
1608 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1609 		  child_task = NULL;
1610 		  continue;
1611 		}
1612 	    }
1613 	  else
1614 	    child_task->fn (child_task->fn_data);
1615 	  thr->task = task;
1616 	}
1617       else
1618 	gomp_sem_wait (&taskwait.taskwait_sem);
1619       gomp_mutex_lock (&team->task_lock);
1620       if (child_task)
1621 	{
1622 	 finish_cancelled:;
1623 	  size_t new_tasks
1624 	    = gomp_task_run_post_handle_depend (child_task, team);
1625 	  if (child_task->parent_depends_on)
1626 	    --taskwait.n_depend;
1627 
1628 	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1629 				 child_task, MEMMODEL_RELAXED);
1630 	  child_task->pnode[PQ_CHILDREN].next = NULL;
1631 	  child_task->pnode[PQ_CHILDREN].prev = NULL;
1632 
1633 	  gomp_clear_parent (&child_task->children_queue);
1634 	  gomp_task_run_post_remove_taskgroup (child_task);
1635 	  to_free = child_task;
1636 	  child_task = NULL;
1637 	  team->task_count--;
1638 	  if (new_tasks > 1)
1639 	    {
1640 	      do_wake = team->nthreads - team->task_running_count
1641 			- !task->in_tied_task;
1642 	      if (do_wake > new_tasks)
1643 		do_wake = new_tasks;
1644 	    }
1645 	}
1646     }
1647 }
1648 
1649 /* Called when encountering a taskyield directive.  */
1650 
1651 void
1652 GOMP_taskyield (void)
1653 {
1654   /* Nothing at the moment.  */
1655 }
1656 
1657 void
1658 GOMP_taskgroup_start (void)
1659 {
1660   struct gomp_thread *thr = gomp_thread ();
1661   struct gomp_team *team = thr->ts.team;
1662   struct gomp_task *task = thr->task;
1663   struct gomp_taskgroup *taskgroup;
1664 
1665   /* If team is NULL, all tasks are executed as
1666      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1667      taskgroup and their descendant tasks will be finished
1668      by the time GOMP_taskgroup_end is called.  */
1669   if (team == NULL)
1670     return;
1671   taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1672   taskgroup->prev = task->taskgroup;
1673   priority_queue_init (&taskgroup->taskgroup_queue);
1674   taskgroup->in_taskgroup_wait = false;
1675   taskgroup->cancelled = false;
1676   taskgroup->num_children = 0;
1677   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1678   task->taskgroup = taskgroup;
1679 }
1680 
1681 void
1682 GOMP_taskgroup_end (void)
1683 {
1684   struct gomp_thread *thr = gomp_thread ();
1685   struct gomp_team *team = thr->ts.team;
1686   struct gomp_task *task = thr->task;
1687   struct gomp_taskgroup *taskgroup;
1688   struct gomp_task *child_task = NULL;
1689   struct gomp_task *to_free = NULL;
1690   int do_wake = 0;
1691 
1692   if (team == NULL)
1693     return;
1694   taskgroup = task->taskgroup;
1695   if (__builtin_expect (taskgroup == NULL, 0)
1696       && thr->ts.level == 0)
1697     {
1698       /* This can happen if GOMP_taskgroup_start is called when
1699 	 thr->ts.team == NULL, but inside of the taskgroup there
1700 	 is #pragma omp target nowait that creates an implicit
1701 	 team with a single thread.  In this case, we want to wait
1702 	 for all outstanding tasks in this team.  */
1703       gomp_team_barrier_wait (&team->barrier);
1704       return;
1705     }
1706 
1707   /* The acquire barrier on load of taskgroup->num_children here
1708      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1709      It is not necessary that we synchronize with other non-0 writes at
1710      this point, but we must ensure that all writes to memory by a
1711      child thread task work function are seen before we exit from
1712      GOMP_taskgroup_end.  */
1713   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1714     goto finish;
1715 
1716   bool unused;
1717   gomp_mutex_lock (&team->task_lock);
1718   while (1)
1719     {
1720       bool cancelled = false;
1721       if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1722 				  MEMMODEL_RELAXED))
1723 	{
1724 	  if (taskgroup->num_children)
1725 	    {
1726 	      if (priority_queue_empty_p (&task->children_queue,
1727 					  MEMMODEL_RELAXED))
1728 		goto do_wait;
1729 	      child_task
1730 		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1731 					    PQ_TEAM, &team->task_queue,
1732 					    &unused);
1733 	    }
1734 	  else
1735 	    {
1736 	      gomp_mutex_unlock (&team->task_lock);
1737 	      if (to_free)
1738 		{
1739 		  gomp_finish_task (to_free);
1740 		  free (to_free);
1741 		}
1742 	      goto finish;
1743 	    }
1744 	}
1745       else
1746 	child_task
1747 	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1748 				      PQ_TEAM, &team->task_queue, &unused);
1749       if (child_task->kind == GOMP_TASK_WAITING)
1750 	{
1751 	  cancelled
1752 	    = gomp_task_run_pre (child_task, child_task->parent, team);
1753 	  if (__builtin_expect (cancelled, 0))
1754 	    {
1755 	      if (to_free)
1756 		{
1757 		  gomp_finish_task (to_free);
1758 		  free (to_free);
1759 		  to_free = NULL;
1760 		}
1761 	      goto finish_cancelled;
1762 	    }
1763 	}
1764       else
1765 	{
1766 	  child_task = NULL;
1767 	 do_wait:
1768 	/* All tasks we are waiting for are either running in other
1769 	   threads, or they are tasks that have not had their
1770 	   dependencies met (so they're not even in the queue).  Wait
1771 	   for them.  */
1772 	  taskgroup->in_taskgroup_wait = true;
1773 	}
1774       gomp_mutex_unlock (&team->task_lock);
1775       if (do_wake)
1776 	{
1777 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1778 	  do_wake = 0;
1779 	}
1780       if (to_free)
1781 	{
1782 	  gomp_finish_task (to_free);
1783 	  free (to_free);
1784 	  to_free = NULL;
1785 	}
1786       if (child_task)
1787 	{
1788 	  thr->task = child_task;
1789 	  if (__builtin_expect (child_task->fn == NULL, 0))
1790 	    {
1791 	      if (gomp_target_task_fn (child_task->fn_data))
1792 		{
1793 		  thr->task = task;
1794 		  gomp_mutex_lock (&team->task_lock);
1795 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1796 		  struct gomp_target_task *ttask
1797 		    = (struct gomp_target_task *) child_task->fn_data;
1798 		  /* If GOMP_PLUGIN_target_task_completion has run already
1799 		     in between gomp_target_task_fn and the mutex lock,
1800 		     perform the requeuing here.  */
1801 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1802 		    gomp_target_task_completion (team, child_task);
1803 		  else
1804 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1805 		  child_task = NULL;
1806 		  continue;
1807 		}
1808 	    }
1809 	  else
1810 	    child_task->fn (child_task->fn_data);
1811 	  thr->task = task;
1812 	}
1813       else
1814 	gomp_sem_wait (&taskgroup->taskgroup_sem);
1815       gomp_mutex_lock (&team->task_lock);
1816       if (child_task)
1817 	{
1818 	 finish_cancelled:;
1819 	  size_t new_tasks
1820 	    = gomp_task_run_post_handle_depend (child_task, team);
1821 	  gomp_task_run_post_remove_parent (child_task);
1822 	  gomp_clear_parent (&child_task->children_queue);
1823 	  gomp_task_run_post_remove_taskgroup (child_task);
1824 	  to_free = child_task;
1825 	  child_task = NULL;
1826 	  team->task_count--;
1827 	  if (new_tasks > 1)
1828 	    {
1829 	      do_wake = team->nthreads - team->task_running_count
1830 			- !task->in_tied_task;
1831 	      if (do_wake > new_tasks)
1832 		do_wake = new_tasks;
1833 	    }
1834 	}
1835     }
1836 
1837  finish:
1838   task->taskgroup = taskgroup->prev;
1839   gomp_sem_destroy (&taskgroup->taskgroup_sem);
1840   free (taskgroup);
1841 }
1842 
1843 int
1844 omp_in_final (void)
1845 {
1846   struct gomp_thread *thr = gomp_thread ();
1847   return thr->task && thr->task->final_task;
1848 }
1849 
1850 ialias (omp_in_final)
1851