xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/task.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Copyright (C) 2007-2022 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintenance of tasks in response to task
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "gomp-constants.h"
34 
35 typedef struct gomp_task_depend_entry *hash_entry_type;
36 
37 static inline void *
htab_alloc(size_t size)38 htab_alloc (size_t size)
39 {
40   return gomp_malloc (size);
41 }
42 
43 static inline void
htab_free(void * ptr)44 htab_free (void *ptr)
45 {
46   free (ptr);
47 }
48 
49 #include "hashtab.h"
50 
51 static inline hashval_t
htab_hash(hash_entry_type element)52 htab_hash (hash_entry_type element)
53 {
54   return hash_pointer (element->addr);
55 }
56 
57 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)58 htab_eq (hash_entry_type x, hash_entry_type y)
59 {
60   return x->addr == y->addr;
61 }
62 
63 /* Create a new task data structure.  */
64 
65 void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)66 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
67 		struct gomp_task_icv *prev_icv)
68 {
69   /* It would seem that using memset here would be a win, but it turns
70      out that partially filling gomp_task allows us to keep the
71      overhead of task creation low.  In the nqueens-1.c test, for a
72      sufficiently large N, we drop the overhead from 5-6% to 1%.
73 
74      Note, the nqueens-1.c test in serial mode is a good test to
75      benchmark the overhead of creating tasks as there are millions of
76      tiny tasks created that all run undeferred.  */
77   task->parent = parent_task;
78   priority_queue_init (&task->children_queue);
79   task->taskgroup = NULL;
80   task->dependers = NULL;
81   task->depend_hash = NULL;
82   task->taskwait = NULL;
83   task->depend_count = 0;
84   task->completion_sem = NULL;
85   task->deferred_p = false;
86   task->icv = *prev_icv;
87   task->kind = GOMP_TASK_IMPLICIT;
88   task->in_tied_task = false;
89   task->final_task = false;
90   task->copy_ctors_done = false;
91   task->parent_depends_on = false;
92 }
93 
94 /* Clean up a task, after completing it.  */
95 
96 void
gomp_end_task(void)97 gomp_end_task (void)
98 {
99   struct gomp_thread *thr = gomp_thread ();
100   struct gomp_task *task = thr->task;
101 
102   gomp_finish_task (task);
103   thr->task = task->parent;
104 }
105 
106 /* Clear the parent field of every task in LIST.  */
107 
108 static inline void
gomp_clear_parent_in_list(struct priority_list * list)109 gomp_clear_parent_in_list (struct priority_list *list)
110 {
111   struct priority_node *p = list->tasks;
112   if (p)
113     do
114       {
115 	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
116 	p = p->next;
117       }
118     while (p != list->tasks);
119 }
120 
121 /* Splay tree version of gomp_clear_parent_in_list.
122 
123    Clear the parent field of every task in NODE within SP, and free
124    the node when done.  */
125 
126 static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)127 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
128 {
129   if (!node)
130     return;
131   prio_splay_tree_node left = node->left, right = node->right;
132   gomp_clear_parent_in_list (&node->key.l);
133 #if _LIBGOMP_CHECKING_
134   memset (node, 0xaf, sizeof (*node));
135 #endif
136   /* No need to remove the node from the tree.  We're nuking
137      everything, so just free the nodes and our caller can clear the
138      entire splay tree.  */
139   free (node);
140   gomp_clear_parent_in_tree (sp, left);
141   gomp_clear_parent_in_tree (sp, right);
142 }
143 
144 /* Clear the parent field of every task in Q and remove every task
145    from Q.  */
146 
147 static inline void
gomp_clear_parent(struct priority_queue * q)148 gomp_clear_parent (struct priority_queue *q)
149 {
150   if (priority_queue_multi_p (q))
151     {
152       gomp_clear_parent_in_tree (&q->t, q->t.root);
153       /* All the nodes have been cleared in gomp_clear_parent_in_tree.
154 	 No need to remove anything.  We can just nuke everything.  */
155       q->t.root = NULL;
156     }
157   else
158     gomp_clear_parent_in_list (&q->l);
159 }
160 
161 /* Helper function for GOMP_task and gomp_create_target_task.
162 
163    For a TASK with in/out dependencies, fill in the various dependency
164    queues.  PARENT is the parent of said task.  DEPEND is as in
165    GOMP_task.  */
166 
167 static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)168 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
169 			 void **depend)
170 {
171   size_t ndepend = (uintptr_t) depend[0];
172   size_t i;
173   hash_entry_type ent;
174 
175   if (ndepend)
176     {
177       /* depend[0] is total # */
178       size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
179       /* ndepend - nout is # of in: */
180       for (i = 0; i < ndepend; i++)
181 	{
182 	  task->depend[i].addr = depend[2 + i];
183 	  task->depend[i].is_in = i >= nout;
184 	}
185     }
186   else
187     {
188       ndepend = (uintptr_t) depend[1]; /* total # */
189       size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
190       size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
191       /* For now we treat mutexinoutset like out, which is compliant, but
192 	 inefficient.  */
193       size_t nin = (uintptr_t) depend[4]; /* # of in: */
194       /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
195       size_t normal = nout + nmutexinoutset + nin;
196       size_t n = 0;
197       for (i = normal; i < ndepend; i++)
198 	{
199 	  void **d = (void **) (uintptr_t) depend[5 + i];
200 	  switch ((uintptr_t) d[1])
201 	    {
202 	    case GOMP_DEPEND_OUT:
203 	    case GOMP_DEPEND_INOUT:
204 	    case GOMP_DEPEND_MUTEXINOUTSET:
205 	      break;
206 	    case GOMP_DEPEND_IN:
207 	      continue;
208 	    default:
209 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
210 			  (int) (uintptr_t) d[1]);
211 	    }
212 	  task->depend[n].addr = d[0];
213 	  task->depend[n++].is_in = 0;
214 	}
215       for (i = 0; i < normal; i++)
216 	{
217 	  task->depend[n].addr = depend[5 + i];
218 	  task->depend[n++].is_in = i >= nout + nmutexinoutset;
219 	}
220       for (i = normal; i < ndepend; i++)
221 	{
222 	  void **d = (void **) (uintptr_t) depend[5 + i];
223 	  if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
224 	    continue;
225 	  task->depend[n].addr = d[0];
226 	  task->depend[n++].is_in = 1;
227 	}
228     }
229   task->depend_count = ndepend;
230   task->num_dependees = 0;
231   if (parent->depend_hash == NULL)
232     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
233   for (i = 0; i < ndepend; i++)
234     {
235       task->depend[i].next = NULL;
236       task->depend[i].prev = NULL;
237       task->depend[i].task = task;
238       task->depend[i].redundant = false;
239       task->depend[i].redundant_out = false;
240 
241       hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
242 					      &task->depend[i], INSERT);
243       hash_entry_type out = NULL, last = NULL;
244       if (*slot)
245 	{
246 	  /* If multiple depends on the same task are the same, all but the
247 	     first one are redundant.  As inout/out come first, if any of them
248 	     is inout/out, it will win, which is the right semantics.  */
249 	  if ((*slot)->task == task)
250 	    {
251 	      task->depend[i].redundant = true;
252 	      continue;
253 	    }
254 	  for (ent = *slot; ent; ent = ent->next)
255 	    {
256 	      if (ent->redundant_out)
257 		break;
258 
259 	      last = ent;
260 
261 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
262 	      if (task->depend[i].is_in && ent->is_in)
263 		continue;
264 
265 	      if (!ent->is_in)
266 		out = ent;
267 
268 	      struct gomp_task *tsk = ent->task;
269 	      if (tsk->dependers == NULL)
270 		{
271 		  tsk->dependers
272 		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
273 				   + 6 * sizeof (struct gomp_task *));
274 		  tsk->dependers->n_elem = 1;
275 		  tsk->dependers->allocated = 6;
276 		  tsk->dependers->elem[0] = task;
277 		  task->num_dependees++;
278 		  continue;
279 		}
280 	      /* We already have some other dependency on tsk from earlier
281 		 depend clause.  */
282 	      else if (tsk->dependers->n_elem
283 		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
284 			   == task))
285 		continue;
286 	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
287 		{
288 		  tsk->dependers->allocated
289 		    = tsk->dependers->allocated * 2 + 2;
290 		  tsk->dependers
291 		    = gomp_realloc (tsk->dependers,
292 				    sizeof (struct gomp_dependers_vec)
293 				    + (tsk->dependers->allocated
294 				       * sizeof (struct gomp_task *)));
295 		}
296 	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
297 	      task->num_dependees++;
298 	    }
299 	  task->depend[i].next = *slot;
300 	  (*slot)->prev = &task->depend[i];
301 	}
302       *slot = &task->depend[i];
303 
304       /* There is no need to store more than one depend({,in}out:) task per
305 	 address in the hash table chain for the purpose of creation of
306 	 deferred tasks, because each out depends on all earlier outs, thus it
307 	 is enough to record just the last depend({,in}out:).  For depend(in:),
308 	 we need to keep all of the previous ones not terminated yet, because
309 	 a later depend({,in}out:) might need to depend on all of them.  So, if
310 	 the new task's clause is depend({,in}out:), we know there is at most
311 	 one other depend({,in}out:) clause in the list (out).  For
312 	 non-deferred tasks we want to see all outs, so they are moved to the
313 	 end of the chain, after first redundant_out entry all following
314 	 entries should be redundant_out.  */
315       if (!task->depend[i].is_in && out)
316 	{
317 	  if (out != last)
318 	    {
319 	      out->next->prev = out->prev;
320 	      out->prev->next = out->next;
321 	      out->next = last->next;
322 	      out->prev = last;
323 	      last->next = out;
324 	      if (out->next)
325 		out->next->prev = out;
326 	    }
327 	  out->redundant_out = true;
328 	}
329     }
330 }
331 
332 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
333    false, then we must not delay in executing the task.  If UNTIED is true,
334    then the task may be executed by any member of the team.
335 
336    DEPEND is an array containing:
337      if depend[0] is non-zero, then:
338 	depend[0]: number of depend elements.
339 	depend[1]: number of depend elements of type "out/inout".
340 	depend[2..N+1]: address of [1..N]th depend element.
341      otherwise, when depend[0] is zero, then:
342 	depend[1]: number of depend elements.
343 	depend[2]: number of depend elements of type "out/inout".
344 	depend[3]: number of depend elements of type "mutexinoutset".
345 	depend[4]: number of depend elements of type "in".
346 	depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
347 	depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
348 		   omp_depend_t objects.  */
349 
350 void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority_arg,void * detach)351 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
352 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
353 	   void **depend, int priority_arg, void *detach)
354 {
355   struct gomp_thread *thr = gomp_thread ();
356   struct gomp_team *team = thr->ts.team;
357   int priority = 0;
358 
359 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
360   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
361      tied to one thread all the time.  This means UNTIED tasks must be
362      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
363      might be running on different thread than FN.  */
364   if (cpyfn)
365     if_clause = false;
366   flags &= ~GOMP_TASK_FLAG_UNTIED;
367 #endif
368 
369   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
370   if (__builtin_expect (gomp_cancel_var, 0) && team)
371     {
372       if (gomp_team_barrier_cancelled (&team->barrier))
373 	return;
374       if (thr->task->taskgroup)
375 	{
376 	  if (thr->task->taskgroup->cancelled)
377 	    return;
378 	  if (thr->task->taskgroup->workshare
379 	      && thr->task->taskgroup->prev
380 	      && thr->task->taskgroup->prev->cancelled)
381 	    return;
382 	}
383     }
384 
385   if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
386     {
387       priority = priority_arg;
388       if (priority > gomp_max_task_priority_var)
389 	priority = gomp_max_task_priority_var;
390     }
391 
392   if (!if_clause || team == NULL
393       || (thr->task && thr->task->final_task)
394       || team->task_count > 64 * team->nthreads)
395     {
396       struct gomp_task task;
397       gomp_sem_t completion_sem;
398 
399       /* If there are depend clauses and earlier deferred sibling tasks
400 	 with depend clauses, check if there isn't a dependency.  If there
401 	 is, we need to wait for them.  There is no need to handle
402 	 depend clauses for non-deferred tasks other than this, because
403 	 the parent task is suspended until the child task finishes and thus
404 	 it can't start further child tasks.  */
405       if ((flags & GOMP_TASK_FLAG_DEPEND)
406 	  && thr->task && thr->task->depend_hash)
407 	gomp_task_maybe_wait_for_dependencies (depend);
408 
409       gomp_init_task (&task, thr->task, gomp_icv (false));
410       task.kind = GOMP_TASK_UNDEFERRED;
411       task.final_task = (thr->task && thr->task->final_task)
412 			|| (flags & GOMP_TASK_FLAG_FINAL);
413       task.priority = priority;
414 
415       if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
416 	{
417 	  gomp_sem_init (&completion_sem, 0);
418 	  task.completion_sem = &completion_sem;
419 	  *(void **) detach = &task;
420 	  if (data)
421 	    *(void **) data = &task;
422 
423 	  gomp_debug (0, "Thread %d: new event: %p\n",
424 		      thr->ts.team_id, &task);
425 	}
426 
427       if (thr->task)
428 	{
429 	  task.in_tied_task = thr->task->in_tied_task;
430 	  task.taskgroup = thr->task->taskgroup;
431 	}
432       thr->task = &task;
433       if (__builtin_expect (cpyfn != NULL, 0))
434 	{
435 	  char buf[arg_size + arg_align - 1];
436 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
437 				& ~(uintptr_t) (arg_align - 1));
438 	  cpyfn (arg, data);
439 	  fn (arg);
440 	}
441       else
442 	fn (data);
443 
444       if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
445 	{
446 	  gomp_sem_wait (&completion_sem);
447 	  gomp_sem_destroy (&completion_sem);
448 	}
449 
450       /* Access to "children" is normally done inside a task_lock
451 	 mutex region, but the only way this particular task.children
452 	 can be set is if this thread's task work function (fn)
453 	 creates children.  So since the setter is *this* thread, we
454 	 need no barriers here when testing for non-NULL.  We can have
455 	 task.children set by the current thread then changed by a
456 	 child thread, but seeing a stale non-NULL value is not a
457 	 problem.  Once past the task_lock acquisition, this thread
458 	 will see the real value of task.children.  */
459       if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
460 	{
461 	  gomp_mutex_lock (&team->task_lock);
462 	  gomp_clear_parent (&task.children_queue);
463 	  gomp_mutex_unlock (&team->task_lock);
464 	}
465       gomp_end_task ();
466     }
467   else
468     {
469       struct gomp_task *task;
470       struct gomp_task *parent = thr->task;
471       struct gomp_taskgroup *taskgroup = parent->taskgroup;
472       char *arg;
473       bool do_wake;
474       size_t depend_size = 0;
475 
476       if (flags & GOMP_TASK_FLAG_DEPEND)
477 	depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
478 		       * sizeof (struct gomp_task_depend_entry));
479       task = gomp_malloc (sizeof (*task) + depend_size
480 			  + arg_size + arg_align - 1);
481       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
482 		      & ~(uintptr_t) (arg_align - 1));
483       gomp_init_task (task, parent, gomp_icv (false));
484       task->priority = priority;
485       task->kind = GOMP_TASK_UNDEFERRED;
486       task->in_tied_task = parent->in_tied_task;
487       task->taskgroup = taskgroup;
488       task->deferred_p = true;
489       if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
490 	{
491 	  task->detach_team = team;
492 
493 	  *(void **) detach = task;
494 	  if (data)
495 	    *(void **) data = task;
496 
497 	  gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
498 	}
499       thr->task = task;
500       if (cpyfn)
501 	{
502 	  cpyfn (arg, data);
503 	  task->copy_ctors_done = true;
504 	}
505       else
506 	memcpy (arg, data, arg_size);
507       thr->task = parent;
508       task->kind = GOMP_TASK_WAITING;
509       task->fn = fn;
510       task->fn_data = arg;
511       task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
512       gomp_mutex_lock (&team->task_lock);
513       /* If parallel or taskgroup has been cancelled, don't start new
514 	 tasks.  */
515       if (__builtin_expect (gomp_cancel_var, 0)
516 	  && !task->copy_ctors_done)
517 	{
518 	  if (gomp_team_barrier_cancelled (&team->barrier))
519 	    {
520 	    do_cancel:
521 	      gomp_mutex_unlock (&team->task_lock);
522 	      gomp_finish_task (task);
523 	      free (task);
524 	      return;
525 	    }
526 	  if (taskgroup)
527 	    {
528 	      if (taskgroup->cancelled)
529 		goto do_cancel;
530 	      if (taskgroup->workshare
531 		  && taskgroup->prev
532 		  && taskgroup->prev->cancelled)
533 		goto do_cancel;
534 	    }
535 	}
536       if (taskgroup)
537 	taskgroup->num_children++;
538       if (depend_size)
539 	{
540 	  gomp_task_handle_depend (task, parent, depend);
541 	  if (task->num_dependees)
542 	    {
543 	      /* Tasks that depend on other tasks are not put into the
544 		 various waiting queues, so we are done for now.  Said
545 		 tasks are instead put into the queues via
546 		 gomp_task_run_post_handle_dependers() after their
547 		 dependencies have been satisfied.  After which, they
548 		 can be picked up by the various scheduling
549 		 points.  */
550 	      gomp_mutex_unlock (&team->task_lock);
551 	      return;
552 	    }
553 	}
554 
555       priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
556 			     task, priority,
557 			     PRIORITY_INSERT_BEGIN,
558 			     /*adjust_parent_depends_on=*/false,
559 			     task->parent_depends_on);
560       if (taskgroup)
561 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
562 			       task, priority,
563 			       PRIORITY_INSERT_BEGIN,
564 			       /*adjust_parent_depends_on=*/false,
565 			       task->parent_depends_on);
566 
567       priority_queue_insert (PQ_TEAM, &team->task_queue,
568 			     task, priority,
569 			     PRIORITY_INSERT_END,
570 			     /*adjust_parent_depends_on=*/false,
571 			     task->parent_depends_on);
572 
573       ++team->task_count;
574       ++team->task_queued_count;
575       gomp_team_barrier_set_task_pending (&team->barrier);
576       do_wake = team->task_running_count + !parent->in_tied_task
577 		< team->nthreads;
578       gomp_mutex_unlock (&team->task_lock);
579       if (do_wake)
580 	gomp_team_barrier_wake (&team->barrier, 1);
581     }
582 }
583 
584 ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)585 ialias (GOMP_taskgroup_end)
586 ialias (GOMP_taskgroup_reduction_register)
587 
588 #define TYPE long
589 #define UTYPE unsigned long
590 #define TYPE_is_long 1
591 #include "taskloop.c"
592 #undef TYPE
593 #undef UTYPE
594 #undef TYPE_is_long
595 
596 #define TYPE unsigned long long
597 #define UTYPE TYPE
598 #define GOMP_taskloop GOMP_taskloop_ull
599 #include "taskloop.c"
600 #undef TYPE
601 #undef UTYPE
602 #undef GOMP_taskloop
603 
604 static void inline
605 priority_queue_move_task_first (enum priority_queue_type type,
606 				struct priority_queue *head,
607 				struct gomp_task *task)
608 {
609 #if _LIBGOMP_CHECKING_
610   if (!priority_queue_task_in_queue_p (type, head, task))
611     gomp_fatal ("Attempt to move first missing task %p", task);
612 #endif
613   struct priority_list *list;
614   if (priority_queue_multi_p (head))
615     {
616       list = priority_queue_lookup_priority (head, task->priority);
617 #if _LIBGOMP_CHECKING_
618       if (!list)
619 	gomp_fatal ("Unable to find priority %d", task->priority);
620 #endif
621     }
622   else
623     list = &head->l;
624   priority_list_remove (list, task_to_priority_node (type, task), 0);
625   priority_list_insert (type, list, task, task->priority,
626 			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
627 			task->parent_depends_on);
628 }
629 
630 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
631    with team->task_lock held, or is executed in the thread that called
632    gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
633    run before it acquires team->task_lock.  */
634 
635 static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)636 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
637 {
638   struct gomp_task *parent = task->parent;
639   if (parent)
640     priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
641 				    task);
642 
643   struct gomp_taskgroup *taskgroup = task->taskgroup;
644   if (taskgroup)
645     priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
646 				    task);
647 
648   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
649 			 PRIORITY_INSERT_BEGIN, false,
650 			 task->parent_depends_on);
651   task->kind = GOMP_TASK_WAITING;
652   if (parent && parent->taskwait)
653     {
654       if (parent->taskwait->in_taskwait)
655 	{
656 	  /* One more task has had its dependencies met.
657 	     Inform any waiters.  */
658 	  parent->taskwait->in_taskwait = false;
659 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
660 	}
661       else if (parent->taskwait->in_depend_wait)
662 	{
663 	  /* One more task has had its dependencies met.
664 	     Inform any waiters.  */
665 	  parent->taskwait->in_depend_wait = false;
666 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
667 	}
668     }
669   if (taskgroup && taskgroup->in_taskgroup_wait)
670     {
671       /* One more task has had its dependencies met.
672 	 Inform any waiters.  */
673       taskgroup->in_taskgroup_wait = false;
674       gomp_sem_post (&taskgroup->taskgroup_sem);
675     }
676 
677   ++team->task_queued_count;
678   gomp_team_barrier_set_task_pending (&team->barrier);
679   /* I'm afraid this can't be done after releasing team->task_lock,
680      as gomp_target_task_completion is run from unrelated thread and
681      therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
682      the team could be gone already.  */
683   if (team->nthreads > team->task_running_count)
684     gomp_team_barrier_wake (&team->barrier, 1);
685 }
686 
687 /* Signal that a target task TTASK has completed the asynchronously
688    running phase and should be requeued as a task to handle the
689    variable unmapping.  */
690 
691 void
GOMP_PLUGIN_target_task_completion(void * data)692 GOMP_PLUGIN_target_task_completion (void *data)
693 {
694   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
695   struct gomp_task *task = ttask->task;
696   struct gomp_team *team = ttask->team;
697 
698   gomp_mutex_lock (&team->task_lock);
699   if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
700     {
701       ttask->state = GOMP_TARGET_TASK_FINISHED;
702       gomp_mutex_unlock (&team->task_lock);
703       return;
704     }
705   ttask->state = GOMP_TARGET_TASK_FINISHED;
706   gomp_target_task_completion (team, task);
707   gomp_mutex_unlock (&team->task_lock);
708 }
709 
710 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
711 
712 /* Called for nowait target tasks.  */
713 
714 bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)715 gomp_create_target_task (struct gomp_device_descr *devicep,
716 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
717 			 size_t *sizes, unsigned short *kinds,
718 			 unsigned int flags, void **depend, void **args,
719 			 enum gomp_target_task_state state)
720 {
721   struct gomp_thread *thr = gomp_thread ();
722   struct gomp_team *team = thr->ts.team;
723 
724   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
725   if (__builtin_expect (gomp_cancel_var, 0) && team)
726     {
727       if (gomp_team_barrier_cancelled (&team->barrier))
728 	return true;
729       if (thr->task->taskgroup)
730 	{
731 	  if (thr->task->taskgroup->cancelled)
732 	    return true;
733 	  if (thr->task->taskgroup->workshare
734 	      && thr->task->taskgroup->prev
735 	      && thr->task->taskgroup->prev->cancelled)
736 	    return true;
737 	}
738     }
739 
740   struct gomp_target_task *ttask;
741   struct gomp_task *task;
742   struct gomp_task *parent = thr->task;
743   struct gomp_taskgroup *taskgroup = parent->taskgroup;
744   bool do_wake;
745   size_t depend_size = 0;
746   uintptr_t depend_cnt = 0;
747   size_t tgt_align = 0, tgt_size = 0;
748   uintptr_t args_cnt = 0;
749 
750   if (depend != NULL)
751     {
752       depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
753       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
754     }
755   if (fn)
756     {
757       /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
758 	 firstprivate on the target task.  */
759       size_t i;
760       for (i = 0; i < mapnum; i++)
761 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
762 	  {
763 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
764 	    if (tgt_align < align)
765 	      tgt_align = align;
766 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
767 	    tgt_size += sizes[i];
768 	  }
769       if (tgt_align)
770 	tgt_size += tgt_align - 1;
771       else
772 	tgt_size = 0;
773       if (args)
774 	{
775 	  void **cargs = args;
776 	  while (*cargs)
777 	    {
778 	      intptr_t id = (intptr_t) *cargs++;
779 	      if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
780 		cargs++;
781 	    }
782 	  args_cnt = cargs + 1 - args;
783 	}
784     }
785 
786   task = gomp_malloc (sizeof (*task) + depend_size
787 		      + sizeof (*ttask)
788 		      + args_cnt * sizeof (void *)
789 		      + mapnum * (sizeof (void *) + sizeof (size_t)
790 				  + sizeof (unsigned short))
791 		      + tgt_size);
792   gomp_init_task (task, parent, gomp_icv (false));
793   task->priority = 0;
794   task->kind = GOMP_TASK_WAITING;
795   task->in_tied_task = parent->in_tied_task;
796   task->taskgroup = taskgroup;
797   ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
798   ttask->devicep = devicep;
799   ttask->fn = fn;
800   ttask->mapnum = mapnum;
801   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
802   if (args_cnt)
803     {
804       ttask->args = (void **) &ttask->hostaddrs[mapnum];
805       memcpy (ttask->args, args, args_cnt * sizeof (void *));
806       ttask->sizes = (size_t *) &ttask->args[args_cnt];
807     }
808   else
809     {
810       ttask->args = args;
811       ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
812     }
813   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
814   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
815   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
816   if (tgt_align)
817     {
818       char *tgt = (char *) &ttask->kinds[mapnum];
819       size_t i;
820       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
821       if (al)
822 	tgt += tgt_align - al;
823       tgt_size = 0;
824       for (i = 0; i < mapnum; i++)
825 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
826 	  {
827 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
828 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
829 	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
830 	    ttask->hostaddrs[i] = tgt + tgt_size;
831 	    tgt_size = tgt_size + sizes[i];
832 	  }
833     }
834   ttask->flags = flags;
835   ttask->state = state;
836   ttask->task = task;
837   ttask->team = team;
838   task->fn = NULL;
839   task->fn_data = ttask;
840   task->final_task = 0;
841   gomp_mutex_lock (&team->task_lock);
842   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
843   if (__builtin_expect (gomp_cancel_var, 0))
844     {
845       if (gomp_team_barrier_cancelled (&team->barrier))
846 	{
847 	do_cancel:
848 	  gomp_mutex_unlock (&team->task_lock);
849 	  gomp_finish_task (task);
850 	  free (task);
851 	  return true;
852 	}
853       if (taskgroup)
854 	{
855 	  if (taskgroup->cancelled)
856 	    goto do_cancel;
857 	  if (taskgroup->workshare
858 	      && taskgroup->prev
859 	      && taskgroup->prev->cancelled)
860 	    goto do_cancel;
861 	}
862     }
863   if (depend_size)
864     {
865       gomp_task_handle_depend (task, parent, depend);
866       if (task->num_dependees)
867 	{
868 	  if (taskgroup)
869 	    taskgroup->num_children++;
870 	  gomp_mutex_unlock (&team->task_lock);
871 	  return true;
872 	}
873     }
874   if (state == GOMP_TARGET_TASK_DATA)
875     {
876       gomp_task_run_post_handle_depend_hash (task);
877       gomp_mutex_unlock (&team->task_lock);
878       gomp_finish_task (task);
879       free (task);
880       return false;
881     }
882   if (taskgroup)
883     taskgroup->num_children++;
884   /* For async offloading, if we don't need to wait for dependencies,
885      run the gomp_target_task_fn right away, essentially schedule the
886      mapping part of the task in the current thread.  */
887   if (devicep != NULL
888       && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
889     {
890       priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
891 			     PRIORITY_INSERT_END,
892 			     /*adjust_parent_depends_on=*/false,
893 			     task->parent_depends_on);
894       if (taskgroup)
895 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
896 			       task, 0, PRIORITY_INSERT_END,
897 			       /*adjust_parent_depends_on=*/false,
898 			       task->parent_depends_on);
899       task->pnode[PQ_TEAM].next = NULL;
900       task->pnode[PQ_TEAM].prev = NULL;
901       task->kind = GOMP_TASK_TIED;
902       ++team->task_count;
903       gomp_mutex_unlock (&team->task_lock);
904 
905       thr->task = task;
906       gomp_target_task_fn (task->fn_data);
907       thr->task = parent;
908 
909       gomp_mutex_lock (&team->task_lock);
910       task->kind = GOMP_TASK_ASYNC_RUNNING;
911       /* If GOMP_PLUGIN_target_task_completion has run already
912 	 in between gomp_target_task_fn and the mutex lock,
913 	 perform the requeuing here.  */
914       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
915 	gomp_target_task_completion (team, task);
916       else
917 	ttask->state = GOMP_TARGET_TASK_RUNNING;
918       gomp_mutex_unlock (&team->task_lock);
919       return true;
920     }
921   priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
922 			 PRIORITY_INSERT_BEGIN,
923 			 /*adjust_parent_depends_on=*/false,
924 			 task->parent_depends_on);
925   if (taskgroup)
926     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
927 			   PRIORITY_INSERT_BEGIN,
928 			   /*adjust_parent_depends_on=*/false,
929 			   task->parent_depends_on);
930   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
931 			 PRIORITY_INSERT_END,
932 			 /*adjust_parent_depends_on=*/false,
933 			 task->parent_depends_on);
934   ++team->task_count;
935   ++team->task_queued_count;
936   gomp_team_barrier_set_task_pending (&team->barrier);
937   do_wake = team->task_running_count + !parent->in_tied_task
938 	    < team->nthreads;
939   gomp_mutex_unlock (&team->task_lock);
940   if (do_wake)
941     gomp_team_barrier_wake (&team->barrier, 1);
942   return true;
943 }
944 
945 /* Given a parent_depends_on task in LIST, move it to the front of its
946    priority so it is run as soon as possible.
947 
948    Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
949 
950    We rearrange the queue such that all parent_depends_on tasks are
951    first, and last_parent_depends_on points to the last such task we
952    rearranged.  For example, given the following tasks in a queue
953    where PD[123] are the parent_depends_on tasks:
954 
955 	task->children
956 	|
957 	V
958 	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
959 
960 	We rearrange such that:
961 
962 	task->children
963 	|	       +--- last_parent_depends_on
964 	|	       |
965 	V	       V
966 	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
967 
968 static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)969 priority_list_upgrade_task (struct priority_list *list,
970 			    struct priority_node *node)
971 {
972   struct priority_node *last_parent_depends_on
973     = list->last_parent_depends_on;
974   if (last_parent_depends_on)
975     {
976       node->prev->next = node->next;
977       node->next->prev = node->prev;
978       node->prev = last_parent_depends_on;
979       node->next = last_parent_depends_on->next;
980       node->prev->next = node;
981       node->next->prev = node;
982     }
983   else if (node != list->tasks)
984     {
985       node->prev->next = node->next;
986       node->next->prev = node->prev;
987       node->prev = list->tasks->prev;
988       node->next = list->tasks;
989       list->tasks = node;
990       node->prev->next = node;
991       node->next->prev = node;
992     }
993   list->last_parent_depends_on = node;
994 }
995 
996 /* Given a parent_depends_on TASK in its parent's children_queue, move
997    it to the front of its priority so it is run as soon as possible.
998 
999    PARENT is passed as an optimization.
1000 
1001    (This function could be defined in priority_queue.c, but we want it
1002    inlined, and putting it in priority_queue.h is not an option, given
1003    that gomp_task has not been properly defined at that point).  */
1004 
1005 static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)1006 priority_queue_upgrade_task (struct gomp_task *task,
1007 			     struct gomp_task *parent)
1008 {
1009   struct priority_queue *head = &parent->children_queue;
1010   struct priority_node *node = &task->pnode[PQ_CHILDREN];
1011 #if _LIBGOMP_CHECKING_
1012   if (!task->parent_depends_on)
1013     gomp_fatal ("priority_queue_upgrade_task: task must be a "
1014 		"parent_depends_on task");
1015   if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
1016     gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
1017 #endif
1018   if (priority_queue_multi_p (head))
1019     {
1020       struct priority_list *list
1021 	= priority_queue_lookup_priority (head, task->priority);
1022       priority_list_upgrade_task (list, node);
1023     }
1024   else
1025     priority_list_upgrade_task (&head->l, node);
1026 }
1027 
1028 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1029    the way in LIST so that other tasks can be considered for
1030    execution.  LIST contains tasks of type TYPE.
1031 
1032    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1033    if applicable.  */
1034 
1035 static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)1036 priority_list_downgrade_task (enum priority_queue_type type,
1037 			      struct priority_list *list,
1038 			      struct gomp_task *child_task)
1039 {
1040   struct priority_node *node = task_to_priority_node (type, child_task);
1041   if (list->tasks == node)
1042     list->tasks = node->next;
1043   else if (node->next != list->tasks)
1044     {
1045       /* The task in NODE is about to become TIED and TIED tasks
1046 	 cannot come before WAITING tasks.  If we're about to
1047 	 leave the queue in such an indeterminate state, rewire
1048 	 things appropriately.  However, a TIED task at the end is
1049 	 perfectly fine.  */
1050       struct gomp_task *next_task = priority_node_to_task (type, node->next);
1051       if (next_task->kind == GOMP_TASK_WAITING)
1052 	{
1053 	  /* Remove from list.  */
1054 	  node->prev->next = node->next;
1055 	  node->next->prev = node->prev;
1056 	  /* Rewire at the end.  */
1057 	  node->next = list->tasks;
1058 	  node->prev = list->tasks->prev;
1059 	  list->tasks->prev->next = node;
1060 	  list->tasks->prev = node;
1061 	}
1062     }
1063 
1064   /* If the current task is the last_parent_depends_on for its
1065      priority, adjust last_parent_depends_on appropriately.  */
1066   if (__builtin_expect (child_task->parent_depends_on, 0)
1067       && list->last_parent_depends_on == node)
1068     {
1069       struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1070       if (node->prev != node
1071 	  && prev_child->kind == GOMP_TASK_WAITING
1072 	  && prev_child->parent_depends_on)
1073 	list->last_parent_depends_on = node->prev;
1074       else
1075 	{
1076 	  /* There are no more parent_depends_on entries waiting
1077 	     to run, clear the list.  */
1078 	  list->last_parent_depends_on = NULL;
1079 	}
1080     }
1081 }
1082 
1083 /* Given a TASK in HEAD that is about to be executed, move it out of
1084    the way so that other tasks can be considered for execution.  HEAD
1085    contains tasks of type TYPE.
1086 
1087    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1088    if applicable.
1089 
1090    (This function could be defined in priority_queue.c, but we want it
1091    inlined, and putting it in priority_queue.h is not an option, given
1092    that gomp_task has not been properly defined at that point).  */
1093 
1094 static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)1095 priority_queue_downgrade_task (enum priority_queue_type type,
1096 			       struct priority_queue *head,
1097 			       struct gomp_task *task)
1098 {
1099 #if _LIBGOMP_CHECKING_
1100   if (!priority_queue_task_in_queue_p (type, head, task))
1101     gomp_fatal ("Attempt to downgrade missing task %p", task);
1102 #endif
1103   if (priority_queue_multi_p (head))
1104     {
1105       struct priority_list *list
1106 	= priority_queue_lookup_priority (head, task->priority);
1107       priority_list_downgrade_task (type, list, task);
1108     }
1109   else
1110     priority_list_downgrade_task (type, &head->l, task);
1111 }
1112 
1113 /* Setup CHILD_TASK to execute.  This is done by setting the task to
1114    TIED, and updating all relevant queues so that CHILD_TASK is no
1115    longer chosen for scheduling.  Also, remove CHILD_TASK from the
1116    overall team task queue entirely.
1117 
1118    Return TRUE if task or its containing taskgroup has been
1119    cancelled.  */
1120 
1121 static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)1122 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1123 		   struct gomp_team *team)
1124 {
1125 #if _LIBGOMP_CHECKING_
1126   if (child_task->parent)
1127     priority_queue_verify (PQ_CHILDREN,
1128 			   &child_task->parent->children_queue, true);
1129   if (child_task->taskgroup)
1130     priority_queue_verify (PQ_TASKGROUP,
1131 			   &child_task->taskgroup->taskgroup_queue, false);
1132   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1133 #endif
1134 
1135   /* Task is about to go tied, move it out of the way.  */
1136   if (parent)
1137     priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1138 				   child_task);
1139 
1140   /* Task is about to go tied, move it out of the way.  */
1141   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1142   if (taskgroup)
1143     priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1144 				   child_task);
1145 
1146   priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1147 			 MEMMODEL_RELAXED);
1148   child_task->pnode[PQ_TEAM].next = NULL;
1149   child_task->pnode[PQ_TEAM].prev = NULL;
1150   child_task->kind = GOMP_TASK_TIED;
1151 
1152   if (--team->task_queued_count == 0)
1153     gomp_team_barrier_clear_task_pending (&team->barrier);
1154   if (__builtin_expect (gomp_cancel_var, 0)
1155       && !child_task->copy_ctors_done)
1156     {
1157       if (gomp_team_barrier_cancelled (&team->barrier))
1158 	return true;
1159       if (taskgroup)
1160 	{
1161 	  if (taskgroup->cancelled)
1162 	    return true;
1163 	  if (taskgroup->workshare
1164 	      && taskgroup->prev
1165 	      && taskgroup->prev->cancelled)
1166 	    return true;
1167 	}
1168     }
1169   return false;
1170 }
1171 
1172 static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)1173 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1174 {
1175   struct gomp_task *parent = child_task->parent;
1176   size_t i;
1177 
1178   for (i = 0; i < child_task->depend_count; i++)
1179     if (!child_task->depend[i].redundant)
1180       {
1181 	if (child_task->depend[i].next)
1182 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
1183 	if (child_task->depend[i].prev)
1184 	  child_task->depend[i].prev->next = child_task->depend[i].next;
1185 	else
1186 	  {
1187 	    hash_entry_type *slot
1188 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1189 				NO_INSERT);
1190 	    if (*slot != &child_task->depend[i])
1191 	      abort ();
1192 	    if (child_task->depend[i].next)
1193 	      *slot = child_task->depend[i].next;
1194 	    else
1195 	      htab_clear_slot (parent->depend_hash, slot);
1196 	  }
1197       }
1198 }
1199 
1200 /* After a CHILD_TASK has been run, adjust the dependency queue for
1201    each task that depends on CHILD_TASK, to record the fact that there
1202    is one less dependency to worry about.  If a task that depended on
1203    CHILD_TASK now has no dependencies, place it in the various queues
1204    so it gets scheduled to run.
1205 
1206    TEAM is the team to which CHILD_TASK belongs to.  */
1207 
1208 static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1209 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1210 				     struct gomp_team *team)
1211 {
1212   struct gomp_task *parent = child_task->parent;
1213   size_t i, count = child_task->dependers->n_elem, ret = 0;
1214   for (i = 0; i < count; i++)
1215     {
1216       struct gomp_task *task = child_task->dependers->elem[i];
1217 
1218       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
1219 	 TASK's remaining dependencies.  Once TASK has no other
1220 	 dependencies, put it into the various queues so it will get
1221 	 scheduled for execution.  */
1222       if (--task->num_dependees != 0)
1223 	continue;
1224 
1225       struct gomp_taskgroup *taskgroup = task->taskgroup;
1226       if (parent)
1227 	{
1228 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1229 				 task, task->priority,
1230 				 PRIORITY_INSERT_BEGIN,
1231 				 /*adjust_parent_depends_on=*/true,
1232 				 task->parent_depends_on);
1233 	  if (parent->taskwait)
1234 	    {
1235 	      if (parent->taskwait->in_taskwait)
1236 		{
1237 		  /* One more task has had its dependencies met.
1238 		     Inform any waiters.  */
1239 		  parent->taskwait->in_taskwait = false;
1240 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1241 		}
1242 	      else if (parent->taskwait->in_depend_wait)
1243 		{
1244 		  /* One more task has had its dependencies met.
1245 		     Inform any waiters.  */
1246 		  parent->taskwait->in_depend_wait = false;
1247 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1248 		}
1249 	    }
1250 	}
1251       else
1252 	task->parent = NULL;
1253       if (taskgroup)
1254 	{
1255 	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1256 				 task, task->priority,
1257 				 PRIORITY_INSERT_BEGIN,
1258 				 /*adjust_parent_depends_on=*/false,
1259 				 task->parent_depends_on);
1260 	  if (taskgroup->in_taskgroup_wait)
1261 	    {
1262 	      /* One more task has had its dependencies met.
1263 		 Inform any waiters.  */
1264 	      taskgroup->in_taskgroup_wait = false;
1265 	      gomp_sem_post (&taskgroup->taskgroup_sem);
1266 	    }
1267 	}
1268       priority_queue_insert (PQ_TEAM, &team->task_queue,
1269 			     task, task->priority,
1270 			     PRIORITY_INSERT_END,
1271 			     /*adjust_parent_depends_on=*/false,
1272 			     task->parent_depends_on);
1273       ++team->task_count;
1274       ++team->task_queued_count;
1275       ++ret;
1276     }
1277   free (child_task->dependers);
1278   child_task->dependers = NULL;
1279   if (ret > 1)
1280     gomp_team_barrier_set_task_pending (&team->barrier);
1281   return ret;
1282 }
1283 
1284 static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1285 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1286 				  struct gomp_team *team)
1287 {
1288   if (child_task->depend_count == 0)
1289     return 0;
1290 
1291   /* If parent is gone already, the hash table is freed and nothing
1292      will use the hash table anymore, no need to remove anything from it.  */
1293   if (child_task->parent != NULL)
1294     gomp_task_run_post_handle_depend_hash (child_task);
1295 
1296   if (child_task->dependers == NULL)
1297     return 0;
1298 
1299   return gomp_task_run_post_handle_dependers (child_task, team);
1300 }
1301 
1302 /* Remove CHILD_TASK from its parent.  */
1303 
1304 static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1305 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1306 {
1307   struct gomp_task *parent = child_task->parent;
1308   if (parent == NULL)
1309     return;
1310 
1311   /* If this was the last task the parent was depending on,
1312      synchronize with gomp_task_maybe_wait_for_dependencies so it can
1313      clean up and return.  */
1314   if (__builtin_expect (child_task->parent_depends_on, 0)
1315       && --parent->taskwait->n_depend == 0
1316       && parent->taskwait->in_depend_wait)
1317     {
1318       parent->taskwait->in_depend_wait = false;
1319       gomp_sem_post (&parent->taskwait->taskwait_sem);
1320     }
1321 
1322   if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1323 			     child_task, MEMMODEL_RELEASE)
1324       && parent->taskwait && parent->taskwait->in_taskwait)
1325     {
1326       parent->taskwait->in_taskwait = false;
1327       gomp_sem_post (&parent->taskwait->taskwait_sem);
1328     }
1329   child_task->pnode[PQ_CHILDREN].next = NULL;
1330   child_task->pnode[PQ_CHILDREN].prev = NULL;
1331 }
1332 
1333 /* Remove CHILD_TASK from its taskgroup.  */
1334 
1335 static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1336 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1337 {
1338   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1339   if (taskgroup == NULL)
1340     return;
1341   bool empty = priority_queue_remove (PQ_TASKGROUP,
1342 				      &taskgroup->taskgroup_queue,
1343 				      child_task, MEMMODEL_RELAXED);
1344   child_task->pnode[PQ_TASKGROUP].next = NULL;
1345   child_task->pnode[PQ_TASKGROUP].prev = NULL;
1346   if (taskgroup->num_children > 1)
1347     --taskgroup->num_children;
1348   else
1349     {
1350       /* We access taskgroup->num_children in GOMP_taskgroup_end
1351 	 outside of the task lock mutex region, so
1352 	 need a release barrier here to ensure memory
1353 	 written by child_task->fn above is flushed
1354 	 before the NULL is written.  */
1355       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1356     }
1357   if (empty && taskgroup->in_taskgroup_wait)
1358     {
1359       taskgroup->in_taskgroup_wait = false;
1360       gomp_sem_post (&taskgroup->taskgroup_sem);
1361     }
1362 }
1363 
1364 void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1365 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1366 {
1367   struct gomp_thread *thr = gomp_thread ();
1368   struct gomp_team *team = thr->ts.team;
1369   struct gomp_task *task = thr->task;
1370   struct gomp_task *child_task = NULL;
1371   struct gomp_task *to_free = NULL;
1372   int do_wake = 0;
1373 
1374   gomp_mutex_lock (&team->task_lock);
1375   if (gomp_barrier_last_thread (state))
1376     {
1377       if (team->task_count == 0)
1378 	{
1379 	  gomp_team_barrier_done (&team->barrier, state);
1380 	  gomp_mutex_unlock (&team->task_lock);
1381 	  gomp_team_barrier_wake (&team->barrier, 0);
1382 	  return;
1383 	}
1384       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1385     }
1386 
1387   while (1)
1388     {
1389       bool cancelled = false;
1390 
1391       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1392 	{
1393 	  bool ignored;
1394 	  child_task
1395 	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1396 					PQ_IGNORED, NULL,
1397 					&ignored);
1398 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
1399 					 team);
1400 	  if (__builtin_expect (cancelled, 0))
1401 	    {
1402 	      if (to_free)
1403 		{
1404 		  gomp_finish_task (to_free);
1405 		  free (to_free);
1406 		  to_free = NULL;
1407 		}
1408 	      goto finish_cancelled;
1409 	    }
1410 	  team->task_running_count++;
1411 	  child_task->in_tied_task = true;
1412 	}
1413       else if (team->task_count == 0
1414 	       && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1415 	{
1416 	  gomp_team_barrier_done (&team->barrier, state);
1417 	  gomp_mutex_unlock (&team->task_lock);
1418 	  gomp_team_barrier_wake (&team->barrier, 0);
1419 	  if (to_free)
1420 	    {
1421 	      gomp_finish_task (to_free);
1422 	      free (to_free);
1423 	    }
1424 	  return;
1425 	}
1426       gomp_mutex_unlock (&team->task_lock);
1427       if (do_wake)
1428 	{
1429 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1430 	  do_wake = 0;
1431 	}
1432       if (to_free)
1433 	{
1434 	  gomp_finish_task (to_free);
1435 	  free (to_free);
1436 	  to_free = NULL;
1437 	}
1438       if (child_task)
1439 	{
1440 	  thr->task = child_task;
1441 	  if (__builtin_expect (child_task->fn == NULL, 0))
1442 	    {
1443 	      if (gomp_target_task_fn (child_task->fn_data))
1444 		{
1445 		  thr->task = task;
1446 		  gomp_mutex_lock (&team->task_lock);
1447 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1448 		  team->task_running_count--;
1449 		  struct gomp_target_task *ttask
1450 		    = (struct gomp_target_task *) child_task->fn_data;
1451 		  /* If GOMP_PLUGIN_target_task_completion has run already
1452 		     in between gomp_target_task_fn and the mutex lock,
1453 		     perform the requeuing here.  */
1454 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1455 		    gomp_target_task_completion (team, child_task);
1456 		  else
1457 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1458 		  child_task = NULL;
1459 		  continue;
1460 		}
1461 	    }
1462 	  else
1463 	    child_task->fn (child_task->fn_data);
1464 	  thr->task = task;
1465 	}
1466       else
1467 	return;
1468       gomp_mutex_lock (&team->task_lock);
1469       if (child_task)
1470 	{
1471 	  if (child_task->detach_team)
1472 	    {
1473 	      assert (child_task->detach_team == team);
1474 	      child_task->kind = GOMP_TASK_DETACHED;
1475 	      ++team->task_detach_count;
1476 	      --team->task_running_count;
1477 	      gomp_debug (0,
1478 			  "thread %d: task with event %p finished without "
1479 			  "completion event fulfilled in team barrier\n",
1480 			  thr->ts.team_id, child_task);
1481 	      child_task = NULL;
1482 	      continue;
1483 	    }
1484 
1485 	 finish_cancelled:;
1486 	  size_t new_tasks
1487 	    = gomp_task_run_post_handle_depend (child_task, team);
1488 	  gomp_task_run_post_remove_parent (child_task);
1489 	  gomp_clear_parent (&child_task->children_queue);
1490 	  gomp_task_run_post_remove_taskgroup (child_task);
1491 	  to_free = child_task;
1492 	  if (!cancelled)
1493 	    team->task_running_count--;
1494 	  child_task = NULL;
1495 	  if (new_tasks > 1)
1496 	    {
1497 	      do_wake = team->nthreads - team->task_running_count;
1498 	      if (do_wake > new_tasks)
1499 		do_wake = new_tasks;
1500 	    }
1501 	  --team->task_count;
1502 	}
1503     }
1504 }
1505 
1506 /* Called when encountering a taskwait directive.
1507 
1508    Wait for all children of the current task.  */
1509 
1510 void
GOMP_taskwait(void)1511 GOMP_taskwait (void)
1512 {
1513   struct gomp_thread *thr = gomp_thread ();
1514   struct gomp_team *team = thr->ts.team;
1515   struct gomp_task *task = thr->task;
1516   struct gomp_task *child_task = NULL;
1517   struct gomp_task *to_free = NULL;
1518   struct gomp_taskwait taskwait;
1519   int do_wake = 0;
1520 
1521   /* The acquire barrier on load of task->children here synchronizes
1522      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
1523      not necessary that we synchronize with other non-NULL writes at
1524      this point, but we must ensure that all writes to memory by a
1525      child thread task work function are seen before we exit from
1526      GOMP_taskwait.  */
1527   if (task == NULL
1528       || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1529     return;
1530 
1531   memset (&taskwait, 0, sizeof (taskwait));
1532   bool child_q = false;
1533   gomp_mutex_lock (&team->task_lock);
1534   while (1)
1535     {
1536       bool cancelled = false;
1537       if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1538 	{
1539 	  bool destroy_taskwait = task->taskwait != NULL;
1540 	  task->taskwait = NULL;
1541 	  gomp_mutex_unlock (&team->task_lock);
1542 	  if (to_free)
1543 	    {
1544 	      gomp_finish_task (to_free);
1545 	      free (to_free);
1546 	    }
1547 	  if (destroy_taskwait)
1548 	    gomp_sem_destroy (&taskwait.taskwait_sem);
1549 	  return;
1550 	}
1551       struct gomp_task *next_task
1552 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1553 				    PQ_TEAM, &team->task_queue, &child_q);
1554       if (next_task->kind == GOMP_TASK_WAITING)
1555 	{
1556 	  child_task = next_task;
1557 	  cancelled
1558 	    = gomp_task_run_pre (child_task, task, team);
1559 	  if (__builtin_expect (cancelled, 0))
1560 	    {
1561 	      if (to_free)
1562 		{
1563 		  gomp_finish_task (to_free);
1564 		  free (to_free);
1565 		  to_free = NULL;
1566 		}
1567 	      goto finish_cancelled;
1568 	    }
1569 	}
1570       else
1571 	{
1572 	/* All tasks we are waiting for are either running in other
1573 	   threads, are detached and waiting for the completion event to be
1574 	   fulfilled, or they are tasks that have not had their
1575 	   dependencies met (so they're not even in the queue).  Wait
1576 	   for them.  */
1577 	  if (task->taskwait == NULL)
1578 	    {
1579 	      taskwait.in_depend_wait = false;
1580 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
1581 	      task->taskwait = &taskwait;
1582 	    }
1583 	  taskwait.in_taskwait = true;
1584 	}
1585       gomp_mutex_unlock (&team->task_lock);
1586       if (do_wake)
1587 	{
1588 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1589 	  do_wake = 0;
1590 	}
1591       if (to_free)
1592 	{
1593 	  gomp_finish_task (to_free);
1594 	  free (to_free);
1595 	  to_free = NULL;
1596 	}
1597       if (child_task)
1598 	{
1599 	  thr->task = child_task;
1600 	  if (__builtin_expect (child_task->fn == NULL, 0))
1601 	    {
1602 	      if (gomp_target_task_fn (child_task->fn_data))
1603 		{
1604 		  thr->task = task;
1605 		  gomp_mutex_lock (&team->task_lock);
1606 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1607 		  struct gomp_target_task *ttask
1608 		    = (struct gomp_target_task *) child_task->fn_data;
1609 		  /* If GOMP_PLUGIN_target_task_completion has run already
1610 		     in between gomp_target_task_fn and the mutex lock,
1611 		     perform the requeuing here.  */
1612 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1613 		    gomp_target_task_completion (team, child_task);
1614 		  else
1615 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1616 		  child_task = NULL;
1617 		  continue;
1618 		}
1619 	    }
1620 	  else
1621 	    child_task->fn (child_task->fn_data);
1622 	  thr->task = task;
1623 	}
1624       else
1625 	gomp_sem_wait (&taskwait.taskwait_sem);
1626       gomp_mutex_lock (&team->task_lock);
1627       if (child_task)
1628 	{
1629 	  if (child_task->detach_team)
1630 	    {
1631 	      assert (child_task->detach_team == team);
1632 	      child_task->kind = GOMP_TASK_DETACHED;
1633 	      ++team->task_detach_count;
1634 	      gomp_debug (0,
1635 			  "thread %d: task with event %p finished without "
1636 			  "completion event fulfilled in taskwait\n",
1637 			  thr->ts.team_id, child_task);
1638 	      child_task = NULL;
1639 	      continue;
1640 	    }
1641 
1642 	 finish_cancelled:;
1643 	  size_t new_tasks
1644 	    = gomp_task_run_post_handle_depend (child_task, team);
1645 
1646 	  if (child_q)
1647 	    {
1648 	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1649 				     child_task, MEMMODEL_RELAXED);
1650 	      child_task->pnode[PQ_CHILDREN].next = NULL;
1651 	      child_task->pnode[PQ_CHILDREN].prev = NULL;
1652 	    }
1653 
1654 	  gomp_clear_parent (&child_task->children_queue);
1655 
1656 	  gomp_task_run_post_remove_taskgroup (child_task);
1657 
1658 	  to_free = child_task;
1659 	  child_task = NULL;
1660 	  team->task_count--;
1661 	  if (new_tasks > 1)
1662 	    {
1663 	      do_wake = team->nthreads - team->task_running_count
1664 			- !task->in_tied_task;
1665 	      if (do_wake > new_tasks)
1666 		do_wake = new_tasks;
1667 	    }
1668 	}
1669     }
1670 }
1671 
1672 /* Called when encountering a taskwait directive with depend clause(s).
1673    Wait as if it was an mergeable included task construct with empty body.  */
1674 
1675 void
GOMP_taskwait_depend(void ** depend)1676 GOMP_taskwait_depend (void **depend)
1677 {
1678   struct gomp_thread *thr = gomp_thread ();
1679   struct gomp_team *team = thr->ts.team;
1680 
1681   /* If parallel or taskgroup has been cancelled, return early.  */
1682   if (__builtin_expect (gomp_cancel_var, 0) && team)
1683     {
1684       if (gomp_team_barrier_cancelled (&team->barrier))
1685 	return;
1686       if (thr->task->taskgroup)
1687 	{
1688 	  if (thr->task->taskgroup->cancelled)
1689 	    return;
1690 	  if (thr->task->taskgroup->workshare
1691 	      && thr->task->taskgroup->prev
1692 	      && thr->task->taskgroup->prev->cancelled)
1693 	    return;
1694 	}
1695     }
1696 
1697   if (thr->task && thr->task->depend_hash)
1698     gomp_task_maybe_wait_for_dependencies (depend);
1699 }
1700 
1701 /* An undeferred task is about to run.  Wait for all tasks that this
1702    undeferred task depends on.
1703 
1704    This is done by first putting all known ready dependencies
1705    (dependencies that have their own dependencies met) at the top of
1706    the scheduling queues.  Then we iterate through these imminently
1707    ready tasks (and possibly other high priority tasks), and run them.
1708    If we run out of ready dependencies to execute, we either wait for
1709    the remaining dependencies to finish, or wait for them to get
1710    scheduled so we can run them.
1711 
1712    DEPEND is as in GOMP_task.  */
1713 
1714 void
gomp_task_maybe_wait_for_dependencies(void ** depend)1715 gomp_task_maybe_wait_for_dependencies (void **depend)
1716 {
1717   struct gomp_thread *thr = gomp_thread ();
1718   struct gomp_task *task = thr->task;
1719   struct gomp_team *team = thr->ts.team;
1720   struct gomp_task_depend_entry elem, *ent = NULL;
1721   struct gomp_taskwait taskwait;
1722   size_t orig_ndepend = (uintptr_t) depend[0];
1723   size_t nout = (uintptr_t) depend[1];
1724   size_t ndepend = orig_ndepend;
1725   size_t normal = ndepend;
1726   size_t n = 2;
1727   size_t i;
1728   size_t num_awaited = 0;
1729   struct gomp_task *child_task = NULL;
1730   struct gomp_task *to_free = NULL;
1731   int do_wake = 0;
1732 
1733   if (ndepend == 0)
1734     {
1735       ndepend = nout;
1736       nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1737       normal = nout + (uintptr_t) depend[4];
1738       n = 5;
1739     }
1740   gomp_mutex_lock (&team->task_lock);
1741   for (i = 0; i < ndepend; i++)
1742     {
1743       elem.addr = depend[i + n];
1744       elem.is_in = i >= nout;
1745       if (__builtin_expect (i >= normal, 0))
1746 	{
1747 	  void **d = (void **) elem.addr;
1748 	  switch ((uintptr_t) d[1])
1749 	    {
1750 	    case GOMP_DEPEND_IN:
1751 	      break;
1752 	    case GOMP_DEPEND_OUT:
1753 	    case GOMP_DEPEND_INOUT:
1754 	    case GOMP_DEPEND_MUTEXINOUTSET:
1755 	      elem.is_in = 0;
1756 	      break;
1757 	    default:
1758 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
1759 			  (int) (uintptr_t) d[1]);
1760 	    }
1761 	  elem.addr = d[0];
1762 	}
1763       ent = htab_find (task->depend_hash, &elem);
1764       for (; ent; ent = ent->next)
1765 	if (elem.is_in && ent->is_in)
1766 	  continue;
1767 	else
1768 	  {
1769 	    struct gomp_task *tsk = ent->task;
1770 	    if (!tsk->parent_depends_on)
1771 	      {
1772 		tsk->parent_depends_on = true;
1773 		++num_awaited;
1774 		/* If dependency TSK itself has no dependencies and is
1775 		   ready to run, move it up front so that we run it as
1776 		   soon as possible.  */
1777 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1778 		  priority_queue_upgrade_task (tsk, task);
1779 	      }
1780 	  }
1781     }
1782   if (num_awaited == 0)
1783     {
1784       gomp_mutex_unlock (&team->task_lock);
1785       return;
1786     }
1787 
1788   memset (&taskwait, 0, sizeof (taskwait));
1789   taskwait.n_depend = num_awaited;
1790   gomp_sem_init (&taskwait.taskwait_sem, 0);
1791   task->taskwait = &taskwait;
1792 
1793   while (1)
1794     {
1795       bool cancelled = false;
1796       if (taskwait.n_depend == 0)
1797 	{
1798 	  task->taskwait = NULL;
1799 	  gomp_mutex_unlock (&team->task_lock);
1800 	  if (to_free)
1801 	    {
1802 	      gomp_finish_task (to_free);
1803 	      free (to_free);
1804 	    }
1805 	  gomp_sem_destroy (&taskwait.taskwait_sem);
1806 	  return;
1807 	}
1808 
1809       /* Theoretically when we have multiple priorities, we should
1810 	 chose between the highest priority item in
1811 	 task->children_queue and team->task_queue here, so we should
1812 	 use priority_queue_next_task().  However, since we are
1813 	 running an undeferred task, perhaps that makes all tasks it
1814 	 depends on undeferred, thus a priority of INF?  This would
1815 	 make it unnecessary to take anything into account here,
1816 	 but the dependencies.
1817 
1818 	 On the other hand, if we want to use priority_queue_next_task(),
1819 	 care should be taken to only use priority_queue_remove()
1820 	 below if the task was actually removed from the children
1821 	 queue.  */
1822       bool ignored;
1823       struct gomp_task *next_task
1824 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1825 				    PQ_IGNORED, NULL, &ignored);
1826 
1827       if (next_task->kind == GOMP_TASK_WAITING)
1828 	{
1829 	  child_task = next_task;
1830 	  cancelled
1831 	    = gomp_task_run_pre (child_task, task, team);
1832 	  if (__builtin_expect (cancelled, 0))
1833 	    {
1834 	      if (to_free)
1835 		{
1836 		  gomp_finish_task (to_free);
1837 		  free (to_free);
1838 		  to_free = NULL;
1839 		}
1840 	      goto finish_cancelled;
1841 	    }
1842 	}
1843       else
1844 	/* All tasks we are waiting for are either running in other
1845 	   threads, or they are tasks that have not had their
1846 	   dependencies met (so they're not even in the queue).  Wait
1847 	   for them.  */
1848 	taskwait.in_depend_wait = true;
1849       gomp_mutex_unlock (&team->task_lock);
1850       if (do_wake)
1851 	{
1852 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1853 	  do_wake = 0;
1854 	}
1855       if (to_free)
1856 	{
1857 	  gomp_finish_task (to_free);
1858 	  free (to_free);
1859 	  to_free = NULL;
1860 	}
1861       if (child_task)
1862 	{
1863 	  thr->task = child_task;
1864 	  if (__builtin_expect (child_task->fn == NULL, 0))
1865 	    {
1866 	      if (gomp_target_task_fn (child_task->fn_data))
1867 		{
1868 		  thr->task = task;
1869 		  gomp_mutex_lock (&team->task_lock);
1870 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1871 		  struct gomp_target_task *ttask
1872 		    = (struct gomp_target_task *) child_task->fn_data;
1873 		  /* If GOMP_PLUGIN_target_task_completion has run already
1874 		     in between gomp_target_task_fn and the mutex lock,
1875 		     perform the requeuing here.  */
1876 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1877 		    gomp_target_task_completion (team, child_task);
1878 		  else
1879 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1880 		  child_task = NULL;
1881 		  continue;
1882 		}
1883 	    }
1884 	  else
1885 	    child_task->fn (child_task->fn_data);
1886 	  thr->task = task;
1887 	}
1888       else
1889 	gomp_sem_wait (&taskwait.taskwait_sem);
1890       gomp_mutex_lock (&team->task_lock);
1891       if (child_task)
1892 	{
1893 	 finish_cancelled:;
1894 	  size_t new_tasks
1895 	    = gomp_task_run_post_handle_depend (child_task, team);
1896 	  if (child_task->parent_depends_on)
1897 	    --taskwait.n_depend;
1898 
1899 	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1900 				 child_task, MEMMODEL_RELAXED);
1901 	  child_task->pnode[PQ_CHILDREN].next = NULL;
1902 	  child_task->pnode[PQ_CHILDREN].prev = NULL;
1903 
1904 	  gomp_clear_parent (&child_task->children_queue);
1905 	  gomp_task_run_post_remove_taskgroup (child_task);
1906 	  to_free = child_task;
1907 	  child_task = NULL;
1908 	  team->task_count--;
1909 	  if (new_tasks > 1)
1910 	    {
1911 	      do_wake = team->nthreads - team->task_running_count
1912 			- !task->in_tied_task;
1913 	      if (do_wake > new_tasks)
1914 		do_wake = new_tasks;
1915 	    }
1916 	}
1917     }
1918 }
1919 
1920 /* Called when encountering a taskyield directive.  */
1921 
1922 void
GOMP_taskyield(void)1923 GOMP_taskyield (void)
1924 {
1925   /* Nothing at the moment.  */
1926 }
1927 
1928 static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1929 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1930 {
1931   struct gomp_taskgroup *taskgroup
1932     = gomp_malloc (sizeof (struct gomp_taskgroup));
1933   taskgroup->prev = prev;
1934   priority_queue_init (&taskgroup->taskgroup_queue);
1935   taskgroup->reductions = prev ? prev->reductions : NULL;
1936   taskgroup->in_taskgroup_wait = false;
1937   taskgroup->cancelled = false;
1938   taskgroup->workshare = false;
1939   taskgroup->num_children = 0;
1940   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1941   return taskgroup;
1942 }
1943 
1944 void
GOMP_taskgroup_start(void)1945 GOMP_taskgroup_start (void)
1946 {
1947   struct gomp_thread *thr = gomp_thread ();
1948   struct gomp_team *team = thr->ts.team;
1949   struct gomp_task *task = thr->task;
1950 
1951   /* If team is NULL, all tasks are executed as
1952      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1953      taskgroup and their descendant tasks will be finished
1954      by the time GOMP_taskgroup_end is called.  */
1955   if (team == NULL)
1956     return;
1957   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1958 }
1959 
1960 void
GOMP_taskgroup_end(void)1961 GOMP_taskgroup_end (void)
1962 {
1963   struct gomp_thread *thr = gomp_thread ();
1964   struct gomp_team *team = thr->ts.team;
1965   struct gomp_task *task = thr->task;
1966   struct gomp_taskgroup *taskgroup;
1967   struct gomp_task *child_task = NULL;
1968   struct gomp_task *to_free = NULL;
1969   int do_wake = 0;
1970 
1971   if (team == NULL)
1972     return;
1973   taskgroup = task->taskgroup;
1974   if (__builtin_expect (taskgroup == NULL, 0)
1975       && thr->ts.level == 0)
1976     {
1977       /* This can happen if GOMP_taskgroup_start is called when
1978 	 thr->ts.team == NULL, but inside of the taskgroup there
1979 	 is #pragma omp target nowait that creates an implicit
1980 	 team with a single thread.  In this case, we want to wait
1981 	 for all outstanding tasks in this team.  */
1982       gomp_team_barrier_wait (&team->barrier);
1983       return;
1984     }
1985 
1986   /* The acquire barrier on load of taskgroup->num_children here
1987      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1988      It is not necessary that we synchronize with other non-0 writes at
1989      this point, but we must ensure that all writes to memory by a
1990      child thread task work function are seen before we exit from
1991      GOMP_taskgroup_end.  */
1992   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1993     goto finish;
1994 
1995   bool unused;
1996   gomp_mutex_lock (&team->task_lock);
1997   while (1)
1998     {
1999       bool cancelled = false;
2000       if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
2001 				  MEMMODEL_RELAXED))
2002 	{
2003 	  if (taskgroup->num_children)
2004 	    {
2005 	      if (priority_queue_empty_p (&task->children_queue,
2006 					  MEMMODEL_RELAXED))
2007 		goto do_wait;
2008 	      child_task
2009 		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
2010 					    PQ_TEAM, &team->task_queue,
2011 					    &unused);
2012 	    }
2013 	  else
2014 	    {
2015 	      gomp_mutex_unlock (&team->task_lock);
2016 	      if (to_free)
2017 		{
2018 		  gomp_finish_task (to_free);
2019 		  free (to_free);
2020 		}
2021 	      goto finish;
2022 	    }
2023 	}
2024       else
2025 	child_task
2026 	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2027 				      PQ_TEAM, &team->task_queue, &unused);
2028       if (child_task->kind == GOMP_TASK_WAITING)
2029 	{
2030 	  cancelled
2031 	    = gomp_task_run_pre (child_task, child_task->parent, team);
2032 	  if (__builtin_expect (cancelled, 0))
2033 	    {
2034 	      if (to_free)
2035 		{
2036 		  gomp_finish_task (to_free);
2037 		  free (to_free);
2038 		  to_free = NULL;
2039 		}
2040 	      goto finish_cancelled;
2041 	    }
2042 	}
2043       else
2044 	{
2045 	  child_task = NULL;
2046 	 do_wait:
2047 	/* All tasks we are waiting for are either running in other
2048 	   threads, or they are tasks that have not had their
2049 	   dependencies met (so they're not even in the queue).  Wait
2050 	   for them.  */
2051 	  taskgroup->in_taskgroup_wait = true;
2052 	}
2053       gomp_mutex_unlock (&team->task_lock);
2054       if (do_wake)
2055 	{
2056 	  gomp_team_barrier_wake (&team->barrier, do_wake);
2057 	  do_wake = 0;
2058 	}
2059       if (to_free)
2060 	{
2061 	  gomp_finish_task (to_free);
2062 	  free (to_free);
2063 	  to_free = NULL;
2064 	}
2065       if (child_task)
2066 	{
2067 	  thr->task = child_task;
2068 	  if (__builtin_expect (child_task->fn == NULL, 0))
2069 	    {
2070 	      if (gomp_target_task_fn (child_task->fn_data))
2071 		{
2072 		  thr->task = task;
2073 		  gomp_mutex_lock (&team->task_lock);
2074 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2075 		  struct gomp_target_task *ttask
2076 		    = (struct gomp_target_task *) child_task->fn_data;
2077 		  /* If GOMP_PLUGIN_target_task_completion has run already
2078 		     in between gomp_target_task_fn and the mutex lock,
2079 		     perform the requeuing here.  */
2080 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2081 		    gomp_target_task_completion (team, child_task);
2082 		  else
2083 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
2084 		  child_task = NULL;
2085 		  continue;
2086 		}
2087 	    }
2088 	  else
2089 	    child_task->fn (child_task->fn_data);
2090 	  thr->task = task;
2091 	}
2092       else
2093 	gomp_sem_wait (&taskgroup->taskgroup_sem);
2094       gomp_mutex_lock (&team->task_lock);
2095       if (child_task)
2096 	{
2097 	  if (child_task->detach_team)
2098 	    {
2099 	      assert (child_task->detach_team == team);
2100 	      child_task->kind = GOMP_TASK_DETACHED;
2101 	      ++team->task_detach_count;
2102 	      gomp_debug (0,
2103 			  "thread %d: task with event %p finished without "
2104 			  "completion event fulfilled in taskgroup\n",
2105 			  thr->ts.team_id, child_task);
2106 	      child_task = NULL;
2107 	      continue;
2108 	    }
2109 
2110 	 finish_cancelled:;
2111 	  size_t new_tasks
2112 	    = gomp_task_run_post_handle_depend (child_task, team);
2113 	  gomp_task_run_post_remove_parent (child_task);
2114 	  gomp_clear_parent (&child_task->children_queue);
2115 	  gomp_task_run_post_remove_taskgroup (child_task);
2116 	  to_free = child_task;
2117 	  child_task = NULL;
2118 	  team->task_count--;
2119 	  if (new_tasks > 1)
2120 	    {
2121 	      do_wake = team->nthreads - team->task_running_count
2122 			- !task->in_tied_task;
2123 	      if (do_wake > new_tasks)
2124 		do_wake = new_tasks;
2125 	    }
2126 	}
2127     }
2128 
2129  finish:
2130   task->taskgroup = taskgroup->prev;
2131   gomp_sem_destroy (&taskgroup->taskgroup_sem);
2132   free (taskgroup);
2133 }
2134 
2135 static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2136 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2137 			 unsigned nthreads)
2138 {
2139   size_t total_cnt = 0;
2140   uintptr_t *d = data;
2141   struct htab *old_htab = NULL, *new_htab;
2142   do
2143     {
2144       if (__builtin_expect (orig != NULL, 0))
2145 	{
2146 	  /* For worksharing task reductions, memory has been allocated
2147 	     already by some other thread that encountered the construct
2148 	     earlier.  */
2149 	  d[2] = orig[2];
2150 	  d[6] = orig[6];
2151 	  orig = (uintptr_t *) orig[4];
2152 	}
2153       else
2154 	{
2155 	  size_t sz = d[1] * nthreads;
2156 	  /* Should use omp_alloc if d[3] is not -1.  */
2157 	  void *ptr = gomp_aligned_alloc (d[2], sz);
2158 	  memset (ptr, '\0', sz);
2159 	  d[2] = (uintptr_t) ptr;
2160 	  d[6] = d[2] + sz;
2161 	}
2162       d[5] = 0;
2163       total_cnt += d[0];
2164       if (d[4] == 0)
2165 	{
2166 	  d[4] = (uintptr_t) old;
2167 	  break;
2168 	}
2169       else
2170 	d = (uintptr_t *) d[4];
2171     }
2172   while (1);
2173   if (old && old[5])
2174     {
2175       old_htab = (struct htab *) old[5];
2176       total_cnt += htab_elements (old_htab);
2177     }
2178   new_htab = htab_create (total_cnt);
2179   if (old_htab)
2180     {
2181       /* Copy old hash table, like in htab_expand.  */
2182       hash_entry_type *p, *olimit;
2183       new_htab->n_elements = htab_elements (old_htab);
2184       olimit = old_htab->entries + old_htab->size;
2185       p = old_htab->entries;
2186       do
2187 	{
2188 	  hash_entry_type x = *p;
2189 	  if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2190 	    *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2191 	  p++;
2192 	}
2193       while (p < olimit);
2194     }
2195   d = data;
2196   do
2197     {
2198       size_t j;
2199       for (j = 0; j < d[0]; ++j)
2200 	{
2201 	  uintptr_t *p = d + 7 + j * 3;
2202 	  p[2] = (uintptr_t) d;
2203 	  /* Ugly hack, hash_entry_type is defined for the task dependencies,
2204 	     which hash on the first element which is a pointer.  We need
2205 	     to hash also on the first sizeof (uintptr_t) bytes which contain
2206 	     a pointer.  Hide the cast from the compiler.  */
2207 	  hash_entry_type n;
2208 	  __asm ("" : "=g" (n) : "0" (p));
2209 	  *htab_find_slot (&new_htab, n, INSERT) = n;
2210 	}
2211       if (d[4] == (uintptr_t) old)
2212 	break;
2213       else
2214 	d = (uintptr_t *) d[4];
2215     }
2216   while (1);
2217   d[5] = (uintptr_t) new_htab;
2218 }
2219 
2220 static void
gomp_create_artificial_team(void)2221 gomp_create_artificial_team (void)
2222 {
2223   struct gomp_thread *thr = gomp_thread ();
2224   struct gomp_task_icv *icv;
2225   struct gomp_team *team = gomp_new_team (1);
2226   struct gomp_task *task = thr->task;
2227   struct gomp_task **implicit_task = &task;
2228   icv = task ? &task->icv : &gomp_global_icv;
2229   team->prev_ts = thr->ts;
2230   thr->ts.team = team;
2231   thr->ts.team_id = 0;
2232   thr->ts.work_share = &team->work_shares[0];
2233   thr->ts.last_work_share = NULL;
2234 #ifdef HAVE_SYNC_BUILTINS
2235   thr->ts.single_count = 0;
2236 #endif
2237   thr->ts.static_trip = 0;
2238   thr->task = &team->implicit_task[0];
2239   gomp_init_task (thr->task, NULL, icv);
2240   while (*implicit_task
2241 	 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2242     implicit_task = &(*implicit_task)->parent;
2243   if (*implicit_task)
2244     {
2245       thr->task = *implicit_task;
2246       gomp_end_task ();
2247       free (*implicit_task);
2248       thr->task = &team->implicit_task[0];
2249     }
2250 #ifdef LIBGOMP_USE_PTHREADS
2251   else
2252     pthread_setspecific (gomp_thread_destructor, thr);
2253 #endif
2254   if (implicit_task != &task)
2255     {
2256       *implicit_task = thr->task;
2257       thr->task = task;
2258     }
2259 }
2260 
2261 /* The format of data is:
2262    data[0]	cnt
2263    data[1]	size
2264    data[2]	alignment (on output array pointer)
2265    data[3]	allocator (-1 if malloc allocator)
2266    data[4]	next pointer
2267    data[5]	used internally (htab pointer)
2268    data[6]	used internally (end of array)
2269    cnt times
2270    ent[0]	address
2271    ent[1]	offset
2272    ent[2]	used internally (pointer to data[0])
2273    The entries are sorted by increasing offset, so that a binary
2274    search can be performed.  Normally, data[8] is 0, exception is
2275    for worksharing construct task reductions in cancellable parallel,
2276    where at offset 0 there should be space for a pointer and an integer
2277    which are used internally.  */
2278 
2279 void
GOMP_taskgroup_reduction_register(uintptr_t * data)2280 GOMP_taskgroup_reduction_register (uintptr_t *data)
2281 {
2282   struct gomp_thread *thr = gomp_thread ();
2283   struct gomp_team *team = thr->ts.team;
2284   struct gomp_task *task;
2285   unsigned nthreads;
2286   if (__builtin_expect (team == NULL, 0))
2287     {
2288       /* The task reduction code needs a team and task, so for
2289 	 orphaned taskgroups just create the implicit team.  */
2290       gomp_create_artificial_team ();
2291       ialias_call (GOMP_taskgroup_start) ();
2292       team = thr->ts.team;
2293     }
2294   nthreads = team->nthreads;
2295   task = thr->task;
2296   gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2297   task->taskgroup->reductions = data;
2298 }
2299 
2300 void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2301 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2302 {
2303   uintptr_t *d = data;
2304   htab_free ((struct htab *) data[5]);
2305   do
2306     {
2307       gomp_aligned_free ((void *) d[2]);
2308       d = (uintptr_t *) d[4];
2309     }
2310   while (d && !d[5]);
2311 }
ialias(GOMP_taskgroup_reduction_unregister)2312 ialias (GOMP_taskgroup_reduction_unregister)
2313 
2314 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2315    original list item or address of previously remapped original list
2316    item to address of the private copy, store that to ptrs[i].
2317    For i < cntorig, additionally set ptrs[cnt+i] to the address of
2318    the original list item.  */
2319 
2320 void
2321 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2322 {
2323   struct gomp_thread *thr = gomp_thread ();
2324   struct gomp_task *task = thr->task;
2325   unsigned id = thr->ts.team_id;
2326   uintptr_t *data = task->taskgroup->reductions;
2327   uintptr_t *d;
2328   struct htab *reduction_htab = (struct htab *) data[5];
2329   size_t i;
2330   for (i = 0; i < cnt; ++i)
2331     {
2332       hash_entry_type ent, n;
2333       __asm ("" : "=g" (ent) : "0" (ptrs + i));
2334       n = htab_find (reduction_htab, ent);
2335       if (n)
2336 	{
2337 	  uintptr_t *p;
2338 	  __asm ("" : "=g" (p) : "0" (n));
2339 	  /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2340 	     p[1] is the offset within the allocated chunk for each
2341 	     thread, p[2] is the array registered with
2342 	     GOMP_taskgroup_reduction_register, d[2] is the base of the
2343 	     allocated memory and d[1] is the size of the allocated chunk
2344 	     for one thread.  */
2345 	  d = (uintptr_t *) p[2];
2346 	  ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2347 	  if (__builtin_expect (i < cntorig, 0))
2348 	    ptrs[cnt + i] = (void *) p[0];
2349 	  continue;
2350 	}
2351       d = data;
2352       while (d != NULL)
2353 	{
2354 	  if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2355 	    break;
2356 	  d = (uintptr_t *) d[4];
2357 	}
2358       if (d == NULL)
2359 	gomp_fatal ("couldn't find matching task_reduction or reduction with "
2360 		    "task modifier for %p", ptrs[i]);
2361       uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2362       ptrs[i] = (void *) (d[2] + id * d[1] + off);
2363       if (__builtin_expect (i < cntorig, 0))
2364 	{
2365 	  size_t lo = 0, hi = d[0] - 1;
2366 	  while (lo <= hi)
2367 	    {
2368 	      size_t m = (lo + hi) / 2;
2369 	      if (d[7 + 3 * m + 1] < off)
2370 		lo = m + 1;
2371 	      else if (d[7 + 3 * m + 1] == off)
2372 		{
2373 		  ptrs[cnt + i] = (void *) d[7 + 3 * m];
2374 		  break;
2375 		}
2376 	      else
2377 		hi = m - 1;
2378 	    }
2379 	  if (lo > hi)
2380 	    gomp_fatal ("couldn't find matching task_reduction or reduction "
2381 			"with task modifier for %p", ptrs[i]);
2382 	}
2383     }
2384 }
2385 
2386 struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2387 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2388 {
2389   struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2390   gomp_reduction_register (data, NULL, NULL, nthreads);
2391   taskgroup->reductions = data;
2392   return taskgroup;
2393 }
2394 
2395 void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2396 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2397 {
2398   struct gomp_thread *thr = gomp_thread ();
2399   struct gomp_team *team = thr->ts.team;
2400   struct gomp_task *task = thr->task;
2401   unsigned nthreads = team->nthreads;
2402   gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2403   task->taskgroup->reductions = data;
2404 }
2405 
2406 void
gomp_workshare_taskgroup_start(void)2407 gomp_workshare_taskgroup_start (void)
2408 {
2409   struct gomp_thread *thr = gomp_thread ();
2410   struct gomp_team *team = thr->ts.team;
2411   struct gomp_task *task;
2412 
2413   if (team == NULL)
2414     {
2415       gomp_create_artificial_team ();
2416       team = thr->ts.team;
2417     }
2418   task = thr->task;
2419   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2420   task->taskgroup->workshare = true;
2421 }
2422 
2423 void
GOMP_workshare_task_reduction_unregister(bool cancelled)2424 GOMP_workshare_task_reduction_unregister (bool cancelled)
2425 {
2426   struct gomp_thread *thr = gomp_thread ();
2427   struct gomp_task *task = thr->task;
2428   struct gomp_team *team = thr->ts.team;
2429   uintptr_t *data = task->taskgroup->reductions;
2430   ialias_call (GOMP_taskgroup_end) ();
2431   if (thr->ts.team_id == 0)
2432     ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2433   else
2434     htab_free ((struct htab *) data[5]);
2435 
2436   if (!cancelled)
2437     gomp_team_barrier_wait (&team->barrier);
2438 }
2439 
2440 int
omp_in_final(void)2441 omp_in_final (void)
2442 {
2443   struct gomp_thread *thr = gomp_thread ();
2444   return thr->task && thr->task->final_task;
2445 }
2446 
ialias(omp_in_final)2447 ialias (omp_in_final)
2448 
2449 void
2450 omp_fulfill_event (omp_event_handle_t event)
2451 {
2452   struct gomp_task *task = (struct gomp_task *) event;
2453   if (!task->deferred_p)
2454     {
2455       if (gomp_sem_getcount (task->completion_sem) > 0)
2456 	gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task);
2457 
2458       gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2459 		  task);
2460       gomp_sem_post (task->completion_sem);
2461       return;
2462     }
2463 
2464   struct gomp_team *team = __atomic_load_n (&task->detach_team,
2465 					    MEMMODEL_RELAXED);
2466   if (!team)
2467     gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2468 		"been fulfilled!\n", task);
2469 
2470   gomp_mutex_lock (&team->task_lock);
2471   if (task->kind != GOMP_TASK_DETACHED)
2472     {
2473       /* The task has not finished running yet.  */
2474       gomp_debug (0,
2475 		  "omp_fulfill_event: %p event fulfilled for unfinished "
2476 		  "task\n", task);
2477       __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED);
2478       gomp_mutex_unlock (&team->task_lock);
2479       return;
2480     }
2481 
2482   gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2483 	      task);
2484   size_t new_tasks = gomp_task_run_post_handle_depend (task, team);
2485   gomp_task_run_post_remove_parent (task);
2486   gomp_clear_parent (&task->children_queue);
2487   gomp_task_run_post_remove_taskgroup (task);
2488   team->task_count--;
2489   team->task_detach_count--;
2490 
2491   int do_wake = 0;
2492   bool shackled_thread_p = team == gomp_thread ()->ts.team;
2493   if (new_tasks > 0)
2494     {
2495       /* Wake up threads to run new tasks.  */
2496       gomp_team_barrier_set_task_pending (&team->barrier);
2497       do_wake = team->nthreads - team->task_running_count;
2498       if (do_wake > new_tasks)
2499 	do_wake = new_tasks;
2500     }
2501 
2502   if (!shackled_thread_p
2503       && !do_wake
2504       && team->task_detach_count == 0
2505       && gomp_team_barrier_waiting_for_tasks (&team->barrier))
2506     /* Ensure that at least one thread is woken up to signal that the
2507        barrier can finish.  */
2508     do_wake = 1;
2509 
2510   /* If we are running in an unshackled thread, the team might vanish before
2511      gomp_team_barrier_wake is run if we release the lock first, so keep the
2512      lock for the call in that case.  */
2513   if (shackled_thread_p)
2514     gomp_mutex_unlock (&team->task_lock);
2515   if (do_wake)
2516     gomp_team_barrier_wake (&team->barrier, do_wake);
2517   if (!shackled_thread_p)
2518     gomp_mutex_unlock (&team->task_lock);
2519 
2520   gomp_finish_task (task);
2521   free (task);
2522 }
2523 
2524 ialias (omp_fulfill_event)
2525