xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/task.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Copyright (C) 2007-2015 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintainence of tasks in response to task
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 
33 typedef struct gomp_task_depend_entry *hash_entry_type;
34 
35 static inline void *
36 htab_alloc (size_t size)
37 {
38   return gomp_malloc (size);
39 }
40 
41 static inline void
42 htab_free (void *ptr)
43 {
44   free (ptr);
45 }
46 
47 #include "hashtab.h"
48 
49 static inline hashval_t
50 htab_hash (hash_entry_type element)
51 {
52   return hash_pointer (element->addr);
53 }
54 
55 static inline bool
56 htab_eq (hash_entry_type x, hash_entry_type y)
57 {
58   return x->addr == y->addr;
59 }
60 
61 /* Create a new task data structure.  */
62 
63 void
64 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
65 		struct gomp_task_icv *prev_icv)
66 {
67   task->parent = parent_task;
68   task->icv = *prev_icv;
69   task->kind = GOMP_TASK_IMPLICIT;
70   task->taskwait = NULL;
71   task->in_tied_task = false;
72   task->final_task = false;
73   task->copy_ctors_done = false;
74   task->parent_depends_on = false;
75   task->children = NULL;
76   task->taskgroup = NULL;
77   task->dependers = NULL;
78   task->depend_hash = NULL;
79   task->depend_count = 0;
80 }
81 
82 /* Clean up a task, after completing it.  */
83 
84 void
85 gomp_end_task (void)
86 {
87   struct gomp_thread *thr = gomp_thread ();
88   struct gomp_task *task = thr->task;
89 
90   gomp_finish_task (task);
91   thr->task = task->parent;
92 }
93 
94 static inline void
95 gomp_clear_parent (struct gomp_task *children)
96 {
97   struct gomp_task *task = children;
98 
99   if (task)
100     do
101       {
102 	task->parent = NULL;
103 	task = task->next_child;
104       }
105     while (task != children);
106 }
107 
108 static void gomp_task_maybe_wait_for_dependencies (void **depend);
109 
110 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
111    false, then we must not delay in executing the task.  If UNTIED is true,
112    then the task may be executed by any member of the team.  */
113 
114 void
115 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
116 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
117 	   void **depend)
118 {
119   struct gomp_thread *thr = gomp_thread ();
120   struct gomp_team *team = thr->ts.team;
121 
122 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
123   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
124      tied to one thread all the time.  This means UNTIED tasks must be
125      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
126      might be running on different thread than FN.  */
127   if (cpyfn)
128     if_clause = false;
129   if (flags & 1)
130     flags &= ~1;
131 #endif
132 
133   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
134   if (team
135       && (gomp_team_barrier_cancelled (&team->barrier)
136 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
137     return;
138 
139   if (!if_clause || team == NULL
140       || (thr->task && thr->task->final_task)
141       || team->task_count > 64 * team->nthreads)
142     {
143       struct gomp_task task;
144 
145       /* If there are depend clauses and earlier deferred sibling tasks
146 	 with depend clauses, check if there isn't a dependency.  If there
147 	 is, we need to wait for them.  There is no need to handle
148 	 depend clauses for non-deferred tasks other than this, because
149 	 the parent task is suspended until the child task finishes and thus
150 	 it can't start further child tasks.  */
151       if ((flags & 8) && thr->task && thr->task->depend_hash)
152 	gomp_task_maybe_wait_for_dependencies (depend);
153 
154       gomp_init_task (&task, thr->task, gomp_icv (false));
155       task.kind = GOMP_TASK_IFFALSE;
156       task.final_task = (thr->task && thr->task->final_task) || (flags & 2);
157       if (thr->task)
158 	{
159 	  task.in_tied_task = thr->task->in_tied_task;
160 	  task.taskgroup = thr->task->taskgroup;
161 	}
162       thr->task = &task;
163       if (__builtin_expect (cpyfn != NULL, 0))
164 	{
165 	  char buf[arg_size + arg_align - 1];
166 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
167 				& ~(uintptr_t) (arg_align - 1));
168 	  cpyfn (arg, data);
169 	  fn (arg);
170 	}
171       else
172 	fn (data);
173       /* Access to "children" is normally done inside a task_lock
174 	 mutex region, but the only way this particular task.children
175 	 can be set is if this thread's task work function (fn)
176 	 creates children.  So since the setter is *this* thread, we
177 	 need no barriers here when testing for non-NULL.  We can have
178 	 task.children set by the current thread then changed by a
179 	 child thread, but seeing a stale non-NULL value is not a
180 	 problem.  Once past the task_lock acquisition, this thread
181 	 will see the real value of task.children.  */
182       if (task.children != NULL)
183 	{
184 	  gomp_mutex_lock (&team->task_lock);
185 	  gomp_clear_parent (task.children);
186 	  gomp_mutex_unlock (&team->task_lock);
187 	}
188       gomp_end_task ();
189     }
190   else
191     {
192       struct gomp_task *task;
193       struct gomp_task *parent = thr->task;
194       struct gomp_taskgroup *taskgroup = parent->taskgroup;
195       char *arg;
196       bool do_wake;
197       size_t depend_size = 0;
198 
199       if (flags & 8)
200 	depend_size = ((uintptr_t) depend[0]
201 		       * sizeof (struct gomp_task_depend_entry));
202       task = gomp_malloc (sizeof (*task) + depend_size
203 			  + arg_size + arg_align - 1);
204       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
205 		      & ~(uintptr_t) (arg_align - 1));
206       gomp_init_task (task, parent, gomp_icv (false));
207       task->kind = GOMP_TASK_IFFALSE;
208       task->in_tied_task = parent->in_tied_task;
209       task->taskgroup = taskgroup;
210       thr->task = task;
211       if (cpyfn)
212 	{
213 	  cpyfn (arg, data);
214 	  task->copy_ctors_done = true;
215 	}
216       else
217 	memcpy (arg, data, arg_size);
218       thr->task = parent;
219       task->kind = GOMP_TASK_WAITING;
220       task->fn = fn;
221       task->fn_data = arg;
222       task->final_task = (flags & 2) >> 1;
223       gomp_mutex_lock (&team->task_lock);
224       /* If parallel or taskgroup has been cancelled, don't start new
225 	 tasks.  */
226       if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
227 			     || (taskgroup && taskgroup->cancelled))
228 			    && !task->copy_ctors_done, 0))
229 	{
230 	  gomp_mutex_unlock (&team->task_lock);
231 	  gomp_finish_task (task);
232 	  free (task);
233 	  return;
234 	}
235       if (taskgroup)
236 	taskgroup->num_children++;
237       if (depend_size)
238 	{
239 	  size_t ndepend = (uintptr_t) depend[0];
240 	  size_t nout = (uintptr_t) depend[1];
241 	  size_t i;
242 	  hash_entry_type ent;
243 
244 	  task->depend_count = ndepend;
245 	  task->num_dependees = 0;
246 	  if (parent->depend_hash == NULL)
247 	    parent->depend_hash
248 	      = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
249 	  for (i = 0; i < ndepend; i++)
250 	    {
251 	      task->depend[i].addr = depend[2 + i];
252 	      task->depend[i].next = NULL;
253 	      task->depend[i].prev = NULL;
254 	      task->depend[i].task = task;
255 	      task->depend[i].is_in = i >= nout;
256 	      task->depend[i].redundant = false;
257 	      task->depend[i].redundant_out = false;
258 
259 	      hash_entry_type *slot
260 		= htab_find_slot (&parent->depend_hash, &task->depend[i],
261 				  INSERT);
262 	      hash_entry_type out = NULL, last = NULL;
263 	      if (*slot)
264 		{
265 		  /* If multiple depends on the same task are the
266 		     same, all but the first one are redundant.
267 		     As inout/out come first, if any of them is
268 		     inout/out, it will win, which is the right
269 		     semantics.  */
270 		  if ((*slot)->task == task)
271 		    {
272 		      task->depend[i].redundant = true;
273 		      continue;
274 		    }
275 		  for (ent = *slot; ent; ent = ent->next)
276 		    {
277 		      if (ent->redundant_out)
278 			break;
279 
280 		      last = ent;
281 
282 		      /* depend(in:...) doesn't depend on earlier
283 			 depend(in:...).  */
284 		      if (i >= nout && ent->is_in)
285 			continue;
286 
287 		      if (!ent->is_in)
288 			out = ent;
289 
290 		      struct gomp_task *tsk = ent->task;
291 		      if (tsk->dependers == NULL)
292 			{
293 			  tsk->dependers
294 			    = gomp_malloc (sizeof (struct gomp_dependers_vec)
295 					   + 6 * sizeof (struct gomp_task *));
296 			  tsk->dependers->n_elem = 1;
297 			  tsk->dependers->allocated = 6;
298 			  tsk->dependers->elem[0] = task;
299 			  task->num_dependees++;
300 			  continue;
301 			}
302 		      /* We already have some other dependency on tsk
303 			 from earlier depend clause.  */
304 		      else if (tsk->dependers->n_elem
305 			       && (tsk->dependers->elem[tsk->dependers->n_elem
306 							- 1]
307 				   == task))
308 			continue;
309 		      else if (tsk->dependers->n_elem
310 			       == tsk->dependers->allocated)
311 			{
312 			  tsk->dependers->allocated
313 			    = tsk->dependers->allocated * 2 + 2;
314 			  tsk->dependers
315 			    = gomp_realloc (tsk->dependers,
316 					    sizeof (struct gomp_dependers_vec)
317 					    + (tsk->dependers->allocated
318 					       * sizeof (struct gomp_task *)));
319 			}
320 		      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
321 		      task->num_dependees++;
322 		    }
323 		  task->depend[i].next = *slot;
324 		  (*slot)->prev = &task->depend[i];
325 		}
326 	      *slot = &task->depend[i];
327 
328 	      /* There is no need to store more than one depend({,in}out:)
329 		 task per address in the hash table chain for the purpose
330 		 of creation of deferred tasks, because each out
331 		 depends on all earlier outs, thus it is enough to record
332 		 just the last depend({,in}out:).  For depend(in:), we need
333 		 to keep all of the previous ones not terminated yet, because
334 		 a later depend({,in}out:) might need to depend on all of
335 		 them.  So, if the new task's clause is depend({,in}out:),
336 		 we know there is at most one other depend({,in}out:) clause
337 		 in the list (out).  For non-deferred tasks we want to see
338 		 all outs, so they are moved to the end of the chain,
339 		 after first redundant_out entry all following entries
340 		 should be redundant_out.  */
341 	      if (!task->depend[i].is_in && out)
342 		{
343 		  if (out != last)
344 		    {
345 		      out->next->prev = out->prev;
346 		      out->prev->next = out->next;
347 		      out->next = last->next;
348 		      out->prev = last;
349 		      last->next = out;
350 		      if (out->next)
351 			out->next->prev = out;
352 		    }
353 		  out->redundant_out = true;
354 		}
355 	    }
356 	  if (task->num_dependees)
357 	    {
358 	      gomp_mutex_unlock (&team->task_lock);
359 	      return;
360 	    }
361 	}
362       if (parent->children)
363 	{
364 	  task->next_child = parent->children;
365 	  task->prev_child = parent->children->prev_child;
366 	  task->next_child->prev_child = task;
367 	  task->prev_child->next_child = task;
368 	}
369       else
370 	{
371 	  task->next_child = task;
372 	  task->prev_child = task;
373 	}
374       parent->children = task;
375       if (taskgroup)
376 	{
377 	  if (taskgroup->children)
378 	    {
379 	      task->next_taskgroup = taskgroup->children;
380 	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
381 	      task->next_taskgroup->prev_taskgroup = task;
382 	      task->prev_taskgroup->next_taskgroup = task;
383 	    }
384 	  else
385 	    {
386 	      task->next_taskgroup = task;
387 	      task->prev_taskgroup = task;
388 	    }
389 	  taskgroup->children = task;
390 	}
391       if (team->task_queue)
392 	{
393 	  task->next_queue = team->task_queue;
394 	  task->prev_queue = team->task_queue->prev_queue;
395 	  task->next_queue->prev_queue = task;
396 	  task->prev_queue->next_queue = task;
397 	}
398       else
399 	{
400 	  task->next_queue = task;
401 	  task->prev_queue = task;
402 	  team->task_queue = task;
403 	}
404       ++team->task_count;
405       ++team->task_queued_count;
406       gomp_team_barrier_set_task_pending (&team->barrier);
407       do_wake = team->task_running_count + !parent->in_tied_task
408 		< team->nthreads;
409       gomp_mutex_unlock (&team->task_lock);
410       if (do_wake)
411 	gomp_team_barrier_wake (&team->barrier, 1);
412     }
413 }
414 
415 static inline bool
416 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
417 		   struct gomp_taskgroup *taskgroup, struct gomp_team *team)
418 {
419   if (parent)
420     {
421       if (parent->children == child_task)
422 	parent->children = child_task->next_child;
423       if (__builtin_expect (child_task->parent_depends_on, 0)
424 	  && parent->taskwait->last_parent_depends_on == child_task)
425 	{
426 	  if (child_task->prev_child->kind == GOMP_TASK_WAITING
427 	      && child_task->prev_child->parent_depends_on)
428 	    parent->taskwait->last_parent_depends_on = child_task->prev_child;
429 	  else
430 	    parent->taskwait->last_parent_depends_on = NULL;
431 	}
432     }
433   if (taskgroup && taskgroup->children == child_task)
434     taskgroup->children = child_task->next_taskgroup;
435   child_task->prev_queue->next_queue = child_task->next_queue;
436   child_task->next_queue->prev_queue = child_task->prev_queue;
437   if (team->task_queue == child_task)
438     {
439       if (child_task->next_queue != child_task)
440 	team->task_queue = child_task->next_queue;
441       else
442 	team->task_queue = NULL;
443     }
444   child_task->kind = GOMP_TASK_TIED;
445   if (--team->task_queued_count == 0)
446     gomp_team_barrier_clear_task_pending (&team->barrier);
447   if ((gomp_team_barrier_cancelled (&team->barrier)
448        || (taskgroup && taskgroup->cancelled))
449       && !child_task->copy_ctors_done)
450     return true;
451   return false;
452 }
453 
454 static void
455 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
456 {
457   struct gomp_task *parent = child_task->parent;
458   size_t i;
459 
460   for (i = 0; i < child_task->depend_count; i++)
461     if (!child_task->depend[i].redundant)
462       {
463 	if (child_task->depend[i].next)
464 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
465 	if (child_task->depend[i].prev)
466 	  child_task->depend[i].prev->next = child_task->depend[i].next;
467 	else
468 	  {
469 	    hash_entry_type *slot
470 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
471 				NO_INSERT);
472 	    if (*slot != &child_task->depend[i])
473 	      abort ();
474 	    if (child_task->depend[i].next)
475 	      *slot = child_task->depend[i].next;
476 	    else
477 	      htab_clear_slot (parent->depend_hash, slot);
478 	  }
479       }
480 }
481 
482 static size_t
483 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
484 				     struct gomp_team *team)
485 {
486   struct gomp_task *parent = child_task->parent;
487   size_t i, count = child_task->dependers->n_elem, ret = 0;
488   for (i = 0; i < count; i++)
489     {
490       struct gomp_task *task = child_task->dependers->elem[i];
491       if (--task->num_dependees != 0)
492 	continue;
493 
494       struct gomp_taskgroup *taskgroup = task->taskgroup;
495       if (parent)
496 	{
497 	  if (parent->children)
498 	    {
499 	      /* If parent is in gomp_task_maybe_wait_for_dependencies
500 		 and it doesn't need to wait for this task, put it after
501 		 all ready to run tasks it needs to wait for.  */
502 	      if (parent->taskwait && parent->taskwait->last_parent_depends_on
503 		  && !task->parent_depends_on)
504 		{
505 		  struct gomp_task *last_parent_depends_on
506 		    = parent->taskwait->last_parent_depends_on;
507 		  task->next_child = last_parent_depends_on->next_child;
508 		  task->prev_child = last_parent_depends_on;
509 		}
510 	      else
511 		{
512 		  task->next_child = parent->children;
513 		  task->prev_child = parent->children->prev_child;
514 		  parent->children = task;
515 		}
516 	      task->next_child->prev_child = task;
517 	      task->prev_child->next_child = task;
518 	    }
519 	  else
520 	    {
521 	      task->next_child = task;
522 	      task->prev_child = task;
523 	      parent->children = task;
524 	    }
525 	  if (parent->taskwait)
526 	    {
527 	      if (parent->taskwait->in_taskwait)
528 		{
529 		  parent->taskwait->in_taskwait = false;
530 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
531 		}
532 	      else if (parent->taskwait->in_depend_wait)
533 		{
534 		  parent->taskwait->in_depend_wait = false;
535 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
536 		}
537 	      if (parent->taskwait->last_parent_depends_on == NULL
538 		  && task->parent_depends_on)
539 		parent->taskwait->last_parent_depends_on = task;
540 	    }
541 	}
542       if (taskgroup)
543 	{
544 	  if (taskgroup->children)
545 	    {
546 	      task->next_taskgroup = taskgroup->children;
547 	      task->prev_taskgroup = taskgroup->children->prev_taskgroup;
548 	      task->next_taskgroup->prev_taskgroup = task;
549 	      task->prev_taskgroup->next_taskgroup = task;
550 	    }
551 	  else
552 	    {
553 	      task->next_taskgroup = task;
554 	      task->prev_taskgroup = task;
555 	    }
556 	  taskgroup->children = task;
557 	  if (taskgroup->in_taskgroup_wait)
558 	    {
559 	      taskgroup->in_taskgroup_wait = false;
560 	      gomp_sem_post (&taskgroup->taskgroup_sem);
561 	    }
562 	}
563       if (team->task_queue)
564 	{
565 	  task->next_queue = team->task_queue;
566 	  task->prev_queue = team->task_queue->prev_queue;
567 	  task->next_queue->prev_queue = task;
568 	  task->prev_queue->next_queue = task;
569 	}
570       else
571 	{
572 	  task->next_queue = task;
573 	  task->prev_queue = task;
574 	  team->task_queue = task;
575 	}
576       ++team->task_count;
577       ++team->task_queued_count;
578       ++ret;
579     }
580   free (child_task->dependers);
581   child_task->dependers = NULL;
582   if (ret > 1)
583     gomp_team_barrier_set_task_pending (&team->barrier);
584   return ret;
585 }
586 
587 static inline size_t
588 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
589 				  struct gomp_team *team)
590 {
591   if (child_task->depend_count == 0)
592     return 0;
593 
594   /* If parent is gone already, the hash table is freed and nothing
595      will use the hash table anymore, no need to remove anything from it.  */
596   if (child_task->parent != NULL)
597     gomp_task_run_post_handle_depend_hash (child_task);
598 
599   if (child_task->dependers == NULL)
600     return 0;
601 
602   return gomp_task_run_post_handle_dependers (child_task, team);
603 }
604 
605 static inline void
606 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
607 {
608   struct gomp_task *parent = child_task->parent;
609   if (parent == NULL)
610     return;
611   if (__builtin_expect (child_task->parent_depends_on, 0)
612       && --parent->taskwait->n_depend == 0
613       && parent->taskwait->in_depend_wait)
614     {
615       parent->taskwait->in_depend_wait = false;
616       gomp_sem_post (&parent->taskwait->taskwait_sem);
617     }
618   child_task->prev_child->next_child = child_task->next_child;
619   child_task->next_child->prev_child = child_task->prev_child;
620   if (parent->children != child_task)
621     return;
622   if (child_task->next_child != child_task)
623     parent->children = child_task->next_child;
624   else
625     {
626       /* We access task->children in GOMP_taskwait
627 	 outside of the task lock mutex region, so
628 	 need a release barrier here to ensure memory
629 	 written by child_task->fn above is flushed
630 	 before the NULL is written.  */
631       __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE);
632       if (parent->taskwait && parent->taskwait->in_taskwait)
633 	{
634 	  parent->taskwait->in_taskwait = false;
635 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
636 	}
637     }
638 }
639 
640 static inline void
641 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
642 {
643   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
644   if (taskgroup == NULL)
645     return;
646   child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup;
647   child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup;
648   if (taskgroup->num_children > 1)
649     --taskgroup->num_children;
650   else
651     {
652       /* We access taskgroup->num_children in GOMP_taskgroup_end
653 	 outside of the task lock mutex region, so
654 	 need a release barrier here to ensure memory
655 	 written by child_task->fn above is flushed
656 	 before the NULL is written.  */
657       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
658     }
659   if (taskgroup->children != child_task)
660     return;
661   if (child_task->next_taskgroup != child_task)
662     taskgroup->children = child_task->next_taskgroup;
663   else
664     {
665       taskgroup->children = NULL;
666       if (taskgroup->in_taskgroup_wait)
667 	{
668 	  taskgroup->in_taskgroup_wait = false;
669 	  gomp_sem_post (&taskgroup->taskgroup_sem);
670 	}
671     }
672 }
673 
674 void
675 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
676 {
677   struct gomp_thread *thr = gomp_thread ();
678   struct gomp_team *team = thr->ts.team;
679   struct gomp_task *task = thr->task;
680   struct gomp_task *child_task = NULL;
681   struct gomp_task *to_free = NULL;
682   int do_wake = 0;
683 
684   gomp_mutex_lock (&team->task_lock);
685   if (gomp_barrier_last_thread (state))
686     {
687       if (team->task_count == 0)
688 	{
689 	  gomp_team_barrier_done (&team->barrier, state);
690 	  gomp_mutex_unlock (&team->task_lock);
691 	  gomp_team_barrier_wake (&team->barrier, 0);
692 	  return;
693 	}
694       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
695     }
696 
697   while (1)
698     {
699       bool cancelled = false;
700       if (team->task_queue != NULL)
701 	{
702 	  child_task = team->task_queue;
703 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
704 					 child_task->taskgroup, team);
705 	  if (__builtin_expect (cancelled, 0))
706 	    {
707 	      if (to_free)
708 		{
709 		  gomp_finish_task (to_free);
710 		  free (to_free);
711 		  to_free = NULL;
712 		}
713 	      goto finish_cancelled;
714 	    }
715 	  team->task_running_count++;
716 	  child_task->in_tied_task = true;
717 	}
718       gomp_mutex_unlock (&team->task_lock);
719       if (do_wake)
720 	{
721 	  gomp_team_barrier_wake (&team->barrier, do_wake);
722 	  do_wake = 0;
723 	}
724       if (to_free)
725 	{
726 	  gomp_finish_task (to_free);
727 	  free (to_free);
728 	  to_free = NULL;
729 	}
730       if (child_task)
731 	{
732 	  thr->task = child_task;
733 	  child_task->fn (child_task->fn_data);
734 	  thr->task = task;
735 	}
736       else
737 	return;
738       gomp_mutex_lock (&team->task_lock);
739       if (child_task)
740 	{
741 	 finish_cancelled:;
742 	  size_t new_tasks
743 	    = gomp_task_run_post_handle_depend (child_task, team);
744 	  gomp_task_run_post_remove_parent (child_task);
745 	  gomp_clear_parent (child_task->children);
746 	  gomp_task_run_post_remove_taskgroup (child_task);
747 	  to_free = child_task;
748 	  child_task = NULL;
749 	  if (!cancelled)
750 	    team->task_running_count--;
751 	  if (new_tasks > 1)
752 	    {
753 	      do_wake = team->nthreads - team->task_running_count;
754 	      if (do_wake > new_tasks)
755 		do_wake = new_tasks;
756 	    }
757 	  if (--team->task_count == 0
758 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
759 	    {
760 	      gomp_team_barrier_done (&team->barrier, state);
761 	      gomp_mutex_unlock (&team->task_lock);
762 	      gomp_team_barrier_wake (&team->barrier, 0);
763 	      gomp_mutex_lock (&team->task_lock);
764 	    }
765 	}
766     }
767 }
768 
769 /* Called when encountering a taskwait directive.  */
770 
771 void
772 GOMP_taskwait (void)
773 {
774   struct gomp_thread *thr = gomp_thread ();
775   struct gomp_team *team = thr->ts.team;
776   struct gomp_task *task = thr->task;
777   struct gomp_task *child_task = NULL;
778   struct gomp_task *to_free = NULL;
779   struct gomp_taskwait taskwait;
780   int do_wake = 0;
781 
782   /* The acquire barrier on load of task->children here synchronizes
783      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
784      not necessary that we synchronize with other non-NULL writes at
785      this point, but we must ensure that all writes to memory by a
786      child thread task work function are seen before we exit from
787      GOMP_taskwait.  */
788   if (task == NULL
789       || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL)
790     return;
791 
792   memset (&taskwait, 0, sizeof (taskwait));
793   gomp_mutex_lock (&team->task_lock);
794   while (1)
795     {
796       bool cancelled = false;
797       if (task->children == NULL)
798 	{
799 	  bool destroy_taskwait = task->taskwait != NULL;
800 	  task->taskwait = NULL;
801 	  gomp_mutex_unlock (&team->task_lock);
802 	  if (to_free)
803 	    {
804 	      gomp_finish_task (to_free);
805 	      free (to_free);
806 	    }
807 	  if (destroy_taskwait)
808 	    gomp_sem_destroy (&taskwait.taskwait_sem);
809 	  return;
810 	}
811       if (task->children->kind == GOMP_TASK_WAITING)
812 	{
813 	  child_task = task->children;
814 	  cancelled
815 	    = gomp_task_run_pre (child_task, task, child_task->taskgroup,
816 				 team);
817 	  if (__builtin_expect (cancelled, 0))
818 	    {
819 	      if (to_free)
820 		{
821 		  gomp_finish_task (to_free);
822 		  free (to_free);
823 		  to_free = NULL;
824 		}
825 	      goto finish_cancelled;
826 	    }
827 	}
828       else
829 	{
830 	  /* All tasks we are waiting for are already running
831 	     in other threads.  Wait for them.  */
832 	  if (task->taskwait == NULL)
833 	    {
834 	      taskwait.in_depend_wait = false;
835 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
836 	      task->taskwait = &taskwait;
837 	    }
838 	  taskwait.in_taskwait = true;
839 	}
840       gomp_mutex_unlock (&team->task_lock);
841       if (do_wake)
842 	{
843 	  gomp_team_barrier_wake (&team->barrier, do_wake);
844 	  do_wake = 0;
845 	}
846       if (to_free)
847 	{
848 	  gomp_finish_task (to_free);
849 	  free (to_free);
850 	  to_free = NULL;
851 	}
852       if (child_task)
853 	{
854 	  thr->task = child_task;
855 	  child_task->fn (child_task->fn_data);
856 	  thr->task = task;
857 	}
858       else
859 	gomp_sem_wait (&taskwait.taskwait_sem);
860       gomp_mutex_lock (&team->task_lock);
861       if (child_task)
862 	{
863 	 finish_cancelled:;
864 	  size_t new_tasks
865 	    = gomp_task_run_post_handle_depend (child_task, team);
866 	  child_task->prev_child->next_child = child_task->next_child;
867 	  child_task->next_child->prev_child = child_task->prev_child;
868 	  if (task->children == child_task)
869 	    {
870 	      if (child_task->next_child != child_task)
871 		task->children = child_task->next_child;
872 	      else
873 		task->children = NULL;
874 	    }
875 	  gomp_clear_parent (child_task->children);
876 	  gomp_task_run_post_remove_taskgroup (child_task);
877 	  to_free = child_task;
878 	  child_task = NULL;
879 	  team->task_count--;
880 	  if (new_tasks > 1)
881 	    {
882 	      do_wake = team->nthreads - team->task_running_count
883 			- !task->in_tied_task;
884 	      if (do_wake > new_tasks)
885 		do_wake = new_tasks;
886 	    }
887 	}
888     }
889 }
890 
891 /* This is like GOMP_taskwait, but we only wait for tasks that the
892    upcoming task depends on.  */
893 
894 static void
895 gomp_task_maybe_wait_for_dependencies (void **depend)
896 {
897   struct gomp_thread *thr = gomp_thread ();
898   struct gomp_task *task = thr->task;
899   struct gomp_team *team = thr->ts.team;
900   struct gomp_task_depend_entry elem, *ent = NULL;
901   struct gomp_taskwait taskwait;
902   struct gomp_task *last_parent_depends_on = NULL;
903   size_t ndepend = (uintptr_t) depend[0];
904   size_t nout = (uintptr_t) depend[1];
905   size_t i;
906   size_t num_awaited = 0;
907   struct gomp_task *child_task = NULL;
908   struct gomp_task *to_free = NULL;
909   int do_wake = 0;
910 
911   gomp_mutex_lock (&team->task_lock);
912   for (i = 0; i < ndepend; i++)
913     {
914       elem.addr = depend[i + 2];
915       ent = htab_find (task->depend_hash, &elem);
916       for (; ent; ent = ent->next)
917 	if (i >= nout && ent->is_in)
918 	  continue;
919 	else
920 	  {
921 	    struct gomp_task *tsk = ent->task;
922 	    if (!tsk->parent_depends_on)
923 	      {
924 		tsk->parent_depends_on = true;
925 		++num_awaited;
926 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
927 		  {
928 		    /* If a task we need to wait for is not already
929 		       running and is ready to be scheduled, move it
930 		       to front, so that we run it as soon as possible.  */
931 		    if (last_parent_depends_on)
932 		      {
933 			tsk->prev_child->next_child = tsk->next_child;
934 			tsk->next_child->prev_child = tsk->prev_child;
935 			tsk->prev_child = last_parent_depends_on;
936 			tsk->next_child = last_parent_depends_on->next_child;
937 			tsk->prev_child->next_child = tsk;
938 			tsk->next_child->prev_child = tsk;
939 		      }
940 		    else if (tsk != task->children)
941 		      {
942 			tsk->prev_child->next_child = tsk->next_child;
943 			tsk->next_child->prev_child = tsk->prev_child;
944 			tsk->prev_child = task->children;
945 			tsk->next_child = task->children->next_child;
946 			task->children = tsk;
947 			tsk->prev_child->next_child = tsk;
948 			tsk->next_child->prev_child = tsk;
949 		      }
950 		    last_parent_depends_on = tsk;
951 		  }
952 	      }
953 	  }
954     }
955   if (num_awaited == 0)
956     {
957       gomp_mutex_unlock (&team->task_lock);
958       return;
959     }
960 
961   memset (&taskwait, 0, sizeof (taskwait));
962   taskwait.n_depend = num_awaited;
963   taskwait.last_parent_depends_on = last_parent_depends_on;
964   gomp_sem_init (&taskwait.taskwait_sem, 0);
965   task->taskwait = &taskwait;
966 
967   while (1)
968     {
969       bool cancelled = false;
970       if (taskwait.n_depend == 0)
971 	{
972 	  task->taskwait = NULL;
973 	  gomp_mutex_unlock (&team->task_lock);
974 	  if (to_free)
975 	    {
976 	      gomp_finish_task (to_free);
977 	      free (to_free);
978 	    }
979 	  gomp_sem_destroy (&taskwait.taskwait_sem);
980 	  return;
981 	}
982       if (task->children->kind == GOMP_TASK_WAITING)
983 	{
984 	  child_task = task->children;
985 	  cancelled
986 	    = gomp_task_run_pre (child_task, task, child_task->taskgroup,
987 				 team);
988 	  if (__builtin_expect (cancelled, 0))
989 	    {
990 	      if (to_free)
991 		{
992 		  gomp_finish_task (to_free);
993 		  free (to_free);
994 		  to_free = NULL;
995 		}
996 	      goto finish_cancelled;
997 	    }
998 	}
999       else
1000 	/* All tasks we are waiting for are already running
1001 	   in other threads.  Wait for them.  */
1002 	taskwait.in_depend_wait = true;
1003       gomp_mutex_unlock (&team->task_lock);
1004       if (do_wake)
1005 	{
1006 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1007 	  do_wake = 0;
1008 	}
1009       if (to_free)
1010 	{
1011 	  gomp_finish_task (to_free);
1012 	  free (to_free);
1013 	  to_free = NULL;
1014 	}
1015       if (child_task)
1016 	{
1017 	  thr->task = child_task;
1018 	  child_task->fn (child_task->fn_data);
1019 	  thr->task = task;
1020 	}
1021       else
1022 	gomp_sem_wait (&taskwait.taskwait_sem);
1023       gomp_mutex_lock (&team->task_lock);
1024       if (child_task)
1025 	{
1026 	 finish_cancelled:;
1027 	  size_t new_tasks
1028 	    = gomp_task_run_post_handle_depend (child_task, team);
1029 	  if (child_task->parent_depends_on)
1030 	    --taskwait.n_depend;
1031 	  child_task->prev_child->next_child = child_task->next_child;
1032 	  child_task->next_child->prev_child = child_task->prev_child;
1033 	  if (task->children == child_task)
1034 	    {
1035 	      if (child_task->next_child != child_task)
1036 		task->children = child_task->next_child;
1037 	      else
1038 		task->children = NULL;
1039 	    }
1040 	  gomp_clear_parent (child_task->children);
1041 	  gomp_task_run_post_remove_taskgroup (child_task);
1042 	  to_free = child_task;
1043 	  child_task = NULL;
1044 	  team->task_count--;
1045 	  if (new_tasks > 1)
1046 	    {
1047 	      do_wake = team->nthreads - team->task_running_count
1048 			- !task->in_tied_task;
1049 	      if (do_wake > new_tasks)
1050 		do_wake = new_tasks;
1051 	    }
1052 	}
1053     }
1054 }
1055 
1056 /* Called when encountering a taskyield directive.  */
1057 
1058 void
1059 GOMP_taskyield (void)
1060 {
1061   /* Nothing at the moment.  */
1062 }
1063 
1064 void
1065 GOMP_taskgroup_start (void)
1066 {
1067   struct gomp_thread *thr = gomp_thread ();
1068   struct gomp_team *team = thr->ts.team;
1069   struct gomp_task *task = thr->task;
1070   struct gomp_taskgroup *taskgroup;
1071 
1072   /* If team is NULL, all tasks are executed as
1073      GOMP_TASK_IFFALSE tasks and thus all children tasks of
1074      taskgroup and their descendant tasks will be finished
1075      by the time GOMP_taskgroup_end is called.  */
1076   if (team == NULL)
1077     return;
1078   taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1079   taskgroup->prev = task->taskgroup;
1080   taskgroup->children = NULL;
1081   taskgroup->in_taskgroup_wait = false;
1082   taskgroup->cancelled = false;
1083   taskgroup->num_children = 0;
1084   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1085   task->taskgroup = taskgroup;
1086 }
1087 
1088 void
1089 GOMP_taskgroup_end (void)
1090 {
1091   struct gomp_thread *thr = gomp_thread ();
1092   struct gomp_team *team = thr->ts.team;
1093   struct gomp_task *task = thr->task;
1094   struct gomp_taskgroup *taskgroup;
1095   struct gomp_task *child_task = NULL;
1096   struct gomp_task *to_free = NULL;
1097   int do_wake = 0;
1098 
1099   if (team == NULL)
1100     return;
1101   taskgroup = task->taskgroup;
1102 
1103   /* The acquire barrier on load of taskgroup->num_children here
1104      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1105      It is not necessary that we synchronize with other non-0 writes at
1106      this point, but we must ensure that all writes to memory by a
1107      child thread task work function are seen before we exit from
1108      GOMP_taskgroup_end.  */
1109   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1110     goto finish;
1111 
1112   gomp_mutex_lock (&team->task_lock);
1113   while (1)
1114     {
1115       bool cancelled = false;
1116       if (taskgroup->children == NULL)
1117 	{
1118 	  if (taskgroup->num_children)
1119 	    {
1120 	      if (task->children == NULL)
1121 		goto do_wait;
1122 	      child_task = task->children;
1123             }
1124           else
1125 	    {
1126 	      gomp_mutex_unlock (&team->task_lock);
1127 	      if (to_free)
1128 		{
1129 		  gomp_finish_task (to_free);
1130 		  free (to_free);
1131 		}
1132 	      goto finish;
1133 	    }
1134 	}
1135       else
1136 	child_task = taskgroup->children;
1137       if (child_task->kind == GOMP_TASK_WAITING)
1138 	{
1139 	  cancelled
1140 	    = gomp_task_run_pre (child_task, child_task->parent, taskgroup,
1141 				 team);
1142 	  if (__builtin_expect (cancelled, 0))
1143 	    {
1144 	      if (to_free)
1145 		{
1146 		  gomp_finish_task (to_free);
1147 		  free (to_free);
1148 		  to_free = NULL;
1149 		}
1150 	      goto finish_cancelled;
1151 	    }
1152 	}
1153       else
1154 	{
1155 	  child_task = NULL;
1156 	 do_wait:
1157 	  /* All tasks we are waiting for are already running
1158 	     in other threads.  Wait for them.  */
1159 	  taskgroup->in_taskgroup_wait = true;
1160 	}
1161       gomp_mutex_unlock (&team->task_lock);
1162       if (do_wake)
1163 	{
1164 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1165 	  do_wake = 0;
1166 	}
1167       if (to_free)
1168 	{
1169 	  gomp_finish_task (to_free);
1170 	  free (to_free);
1171 	  to_free = NULL;
1172 	}
1173       if (child_task)
1174 	{
1175 	  thr->task = child_task;
1176 	  child_task->fn (child_task->fn_data);
1177 	  thr->task = task;
1178 	}
1179       else
1180 	gomp_sem_wait (&taskgroup->taskgroup_sem);
1181       gomp_mutex_lock (&team->task_lock);
1182       if (child_task)
1183 	{
1184 	 finish_cancelled:;
1185 	  size_t new_tasks
1186 	    = gomp_task_run_post_handle_depend (child_task, team);
1187 	  gomp_task_run_post_remove_parent (child_task);
1188 	  gomp_clear_parent (child_task->children);
1189 	  gomp_task_run_post_remove_taskgroup (child_task);
1190 	  to_free = child_task;
1191 	  child_task = NULL;
1192 	  team->task_count--;
1193 	  if (new_tasks > 1)
1194 	    {
1195 	      do_wake = team->nthreads - team->task_running_count
1196 			- !task->in_tied_task;
1197 	      if (do_wake > new_tasks)
1198 		do_wake = new_tasks;
1199 	    }
1200 	}
1201     }
1202 
1203  finish:
1204   task->taskgroup = taskgroup->prev;
1205   gomp_sem_destroy (&taskgroup->taskgroup_sem);
1206   free (taskgroup);
1207 }
1208 
1209 int
1210 omp_in_final (void)
1211 {
1212   struct gomp_thread *thr = gomp_thread ();
1213   return thr->task && thr->task->final_task;
1214 }
1215 
1216 ialias (omp_in_final)
1217