xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/task.c (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
1 /* Copyright (C) 2007-2020 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the maintenance of tasks in response to task
27    creation and termination.  */
28 
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33 
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35 
36 static inline void *
htab_alloc(size_t size)37 htab_alloc (size_t size)
38 {
39   return gomp_malloc (size);
40 }
41 
42 static inline void
htab_free(void * ptr)43 htab_free (void *ptr)
44 {
45   free (ptr);
46 }
47 
48 #include "hashtab.h"
49 
50 static inline hashval_t
htab_hash(hash_entry_type element)51 htab_hash (hash_entry_type element)
52 {
53   return hash_pointer (element->addr);
54 }
55 
56 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59   return x->addr == y->addr;
60 }
61 
62 /* Create a new task data structure.  */
63 
64 void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 		struct gomp_task_icv *prev_icv)
67 {
68   /* It would seem that using memset here would be a win, but it turns
69      out that partially filling gomp_task allows us to keep the
70      overhead of task creation low.  In the nqueens-1.c test, for a
71      sufficiently large N, we drop the overhead from 5-6% to 1%.
72 
73      Note, the nqueens-1.c test in serial mode is a good test to
74      benchmark the overhead of creating tasks as there are millions of
75      tiny tasks created that all run undeferred.  */
76   task->parent = parent_task;
77   task->icv = *prev_icv;
78   task->kind = GOMP_TASK_IMPLICIT;
79   task->taskwait = NULL;
80   task->in_tied_task = false;
81   task->final_task = false;
82   task->copy_ctors_done = false;
83   task->parent_depends_on = false;
84   priority_queue_init (&task->children_queue);
85   task->taskgroup = NULL;
86   task->dependers = NULL;
87   task->depend_hash = NULL;
88   task->depend_count = 0;
89 }
90 
91 /* Clean up a task, after completing it.  */
92 
93 void
gomp_end_task(void)94 gomp_end_task (void)
95 {
96   struct gomp_thread *thr = gomp_thread ();
97   struct gomp_task *task = thr->task;
98 
99   gomp_finish_task (task);
100   thr->task = task->parent;
101 }
102 
103 /* Clear the parent field of every task in LIST.  */
104 
105 static inline void
gomp_clear_parent_in_list(struct priority_list * list)106 gomp_clear_parent_in_list (struct priority_list *list)
107 {
108   struct priority_node *p = list->tasks;
109   if (p)
110     do
111       {
112 	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 	p = p->next;
114       }
115     while (p != list->tasks);
116 }
117 
118 /* Splay tree version of gomp_clear_parent_in_list.
119 
120    Clear the parent field of every task in NODE within SP, and free
121    the node when done.  */
122 
123 static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125 {
126   if (!node)
127     return;
128   prio_splay_tree_node left = node->left, right = node->right;
129   gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131   memset (node, 0xaf, sizeof (*node));
132 #endif
133   /* No need to remove the node from the tree.  We're nuking
134      everything, so just free the nodes and our caller can clear the
135      entire splay tree.  */
136   free (node);
137   gomp_clear_parent_in_tree (sp, left);
138   gomp_clear_parent_in_tree (sp, right);
139 }
140 
141 /* Clear the parent field of every task in Q and remove every task
142    from Q.  */
143 
144 static inline void
gomp_clear_parent(struct priority_queue * q)145 gomp_clear_parent (struct priority_queue *q)
146 {
147   if (priority_queue_multi_p (q))
148     {
149       gomp_clear_parent_in_tree (&q->t, q->t.root);
150       /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 	 No need to remove anything.  We can just nuke everything.  */
152       q->t.root = NULL;
153     }
154   else
155     gomp_clear_parent_in_list (&q->l);
156 }
157 
158 /* Helper function for GOMP_task and gomp_create_target_task.
159 
160    For a TASK with in/out dependencies, fill in the various dependency
161    queues.  PARENT is the parent of said task.  DEPEND is as in
162    GOMP_task.  */
163 
164 static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 			 void **depend)
167 {
168   size_t ndepend = (uintptr_t) depend[0];
169   size_t i;
170   hash_entry_type ent;
171 
172   if (ndepend)
173     {
174       /* depend[0] is total # */
175       size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
176       /* ndepend - nout is # of in: */
177       for (i = 0; i < ndepend; i++)
178 	{
179 	  task->depend[i].addr = depend[2 + i];
180 	  task->depend[i].is_in = i >= nout;
181 	}
182     }
183   else
184     {
185       ndepend = (uintptr_t) depend[1]; /* total # */
186       size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
187       size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
188       /* For now we treat mutexinoutset like out, which is compliant, but
189 	 inefficient.  */
190       size_t nin = (uintptr_t) depend[4]; /* # of in: */
191       /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
192       size_t normal = nout + nmutexinoutset + nin;
193       size_t n = 0;
194       for (i = normal; i < ndepend; i++)
195 	{
196 	  void **d = (void **) (uintptr_t) depend[5 + i];
197 	  switch ((uintptr_t) d[1])
198 	    {
199 	    case GOMP_DEPEND_OUT:
200 	    case GOMP_DEPEND_INOUT:
201 	    case GOMP_DEPEND_MUTEXINOUTSET:
202 	      break;
203 	    case GOMP_DEPEND_IN:
204 	      continue;
205 	    default:
206 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
207 			  (int) (uintptr_t) d[1]);
208 	    }
209 	  task->depend[n].addr = d[0];
210 	  task->depend[n++].is_in = 0;
211 	}
212       for (i = 0; i < normal; i++)
213 	{
214 	  task->depend[n].addr = depend[5 + i];
215 	  task->depend[n++].is_in = i >= nout + nmutexinoutset;
216 	}
217       for (i = normal; i < ndepend; i++)
218 	{
219 	  void **d = (void **) (uintptr_t) depend[5 + i];
220 	  if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
221 	    continue;
222 	  task->depend[n].addr = d[0];
223 	  task->depend[n++].is_in = 1;
224 	}
225     }
226   task->depend_count = ndepend;
227   task->num_dependees = 0;
228   if (parent->depend_hash == NULL)
229     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
230   for (i = 0; i < ndepend; i++)
231     {
232       task->depend[i].next = NULL;
233       task->depend[i].prev = NULL;
234       task->depend[i].task = task;
235       task->depend[i].redundant = false;
236       task->depend[i].redundant_out = false;
237 
238       hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
239 					      &task->depend[i], INSERT);
240       hash_entry_type out = NULL, last = NULL;
241       if (*slot)
242 	{
243 	  /* If multiple depends on the same task are the same, all but the
244 	     first one are redundant.  As inout/out come first, if any of them
245 	     is inout/out, it will win, which is the right semantics.  */
246 	  if ((*slot)->task == task)
247 	    {
248 	      task->depend[i].redundant = true;
249 	      continue;
250 	    }
251 	  for (ent = *slot; ent; ent = ent->next)
252 	    {
253 	      if (ent->redundant_out)
254 		break;
255 
256 	      last = ent;
257 
258 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
259 	      if (task->depend[i].is_in && ent->is_in)
260 		continue;
261 
262 	      if (!ent->is_in)
263 		out = ent;
264 
265 	      struct gomp_task *tsk = ent->task;
266 	      if (tsk->dependers == NULL)
267 		{
268 		  tsk->dependers
269 		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
270 				   + 6 * sizeof (struct gomp_task *));
271 		  tsk->dependers->n_elem = 1;
272 		  tsk->dependers->allocated = 6;
273 		  tsk->dependers->elem[0] = task;
274 		  task->num_dependees++;
275 		  continue;
276 		}
277 	      /* We already have some other dependency on tsk from earlier
278 		 depend clause.  */
279 	      else if (tsk->dependers->n_elem
280 		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
281 			   == task))
282 		continue;
283 	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
284 		{
285 		  tsk->dependers->allocated
286 		    = tsk->dependers->allocated * 2 + 2;
287 		  tsk->dependers
288 		    = gomp_realloc (tsk->dependers,
289 				    sizeof (struct gomp_dependers_vec)
290 				    + (tsk->dependers->allocated
291 				       * sizeof (struct gomp_task *)));
292 		}
293 	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
294 	      task->num_dependees++;
295 	    }
296 	  task->depend[i].next = *slot;
297 	  (*slot)->prev = &task->depend[i];
298 	}
299       *slot = &task->depend[i];
300 
301       /* There is no need to store more than one depend({,in}out:) task per
302 	 address in the hash table chain for the purpose of creation of
303 	 deferred tasks, because each out depends on all earlier outs, thus it
304 	 is enough to record just the last depend({,in}out:).  For depend(in:),
305 	 we need to keep all of the previous ones not terminated yet, because
306 	 a later depend({,in}out:) might need to depend on all of them.  So, if
307 	 the new task's clause is depend({,in}out:), we know there is at most
308 	 one other depend({,in}out:) clause in the list (out).  For
309 	 non-deferred tasks we want to see all outs, so they are moved to the
310 	 end of the chain, after first redundant_out entry all following
311 	 entries should be redundant_out.  */
312       if (!task->depend[i].is_in && out)
313 	{
314 	  if (out != last)
315 	    {
316 	      out->next->prev = out->prev;
317 	      out->prev->next = out->next;
318 	      out->next = last->next;
319 	      out->prev = last;
320 	      last->next = out;
321 	      if (out->next)
322 		out->next->prev = out;
323 	    }
324 	  out->redundant_out = true;
325 	}
326     }
327 }
328 
329 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
330    false, then we must not delay in executing the task.  If UNTIED is true,
331    then the task may be executed by any member of the team.
332 
333    DEPEND is an array containing:
334      if depend[0] is non-zero, then:
335 	depend[0]: number of depend elements.
336 	depend[1]: number of depend elements of type "out/inout".
337 	depend[2..N+1]: address of [1..N]th depend element.
338      otherwise, when depend[0] is zero, then:
339 	depend[1]: number of depend elements.
340 	depend[2]: number of depend elements of type "out/inout".
341 	depend[3]: number of depend elements of type "mutexinoutset".
342 	depend[4]: number of depend elements of type "in".
343 	depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
344 	depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
345 		   omp_depend_t objects.  */
346 
347 void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)348 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
349 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
350 	   void **depend, int priority)
351 {
352   struct gomp_thread *thr = gomp_thread ();
353   struct gomp_team *team = thr->ts.team;
354 
355 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
356   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
357      tied to one thread all the time.  This means UNTIED tasks must be
358      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
359      might be running on different thread than FN.  */
360   if (cpyfn)
361     if_clause = false;
362   flags &= ~GOMP_TASK_FLAG_UNTIED;
363 #endif
364 
365   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
366   if (__builtin_expect (gomp_cancel_var, 0) && team)
367     {
368       if (gomp_team_barrier_cancelled (&team->barrier))
369 	return;
370       if (thr->task->taskgroup)
371 	{
372 	  if (thr->task->taskgroup->cancelled)
373 	    return;
374 	  if (thr->task->taskgroup->workshare
375 	      && thr->task->taskgroup->prev
376 	      && thr->task->taskgroup->prev->cancelled)
377 	    return;
378 	}
379     }
380 
381   if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
382     priority = 0;
383   else if (priority > gomp_max_task_priority_var)
384     priority = gomp_max_task_priority_var;
385 
386   if (!if_clause || team == NULL
387       || (thr->task && thr->task->final_task)
388       || team->task_count > 64 * team->nthreads)
389     {
390       struct gomp_task task;
391 
392       /* If there are depend clauses and earlier deferred sibling tasks
393 	 with depend clauses, check if there isn't a dependency.  If there
394 	 is, we need to wait for them.  There is no need to handle
395 	 depend clauses for non-deferred tasks other than this, because
396 	 the parent task is suspended until the child task finishes and thus
397 	 it can't start further child tasks.  */
398       if ((flags & GOMP_TASK_FLAG_DEPEND)
399 	  && thr->task && thr->task->depend_hash)
400 	gomp_task_maybe_wait_for_dependencies (depend);
401 
402       gomp_init_task (&task, thr->task, gomp_icv (false));
403       task.kind = GOMP_TASK_UNDEFERRED;
404       task.final_task = (thr->task && thr->task->final_task)
405 			|| (flags & GOMP_TASK_FLAG_FINAL);
406       task.priority = priority;
407       if (thr->task)
408 	{
409 	  task.in_tied_task = thr->task->in_tied_task;
410 	  task.taskgroup = thr->task->taskgroup;
411 	}
412       thr->task = &task;
413       if (__builtin_expect (cpyfn != NULL, 0))
414 	{
415 	  char buf[arg_size + arg_align - 1];
416 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
417 				& ~(uintptr_t) (arg_align - 1));
418 	  cpyfn (arg, data);
419 	  fn (arg);
420 	}
421       else
422 	fn (data);
423       /* Access to "children" is normally done inside a task_lock
424 	 mutex region, but the only way this particular task.children
425 	 can be set is if this thread's task work function (fn)
426 	 creates children.  So since the setter is *this* thread, we
427 	 need no barriers here when testing for non-NULL.  We can have
428 	 task.children set by the current thread then changed by a
429 	 child thread, but seeing a stale non-NULL value is not a
430 	 problem.  Once past the task_lock acquisition, this thread
431 	 will see the real value of task.children.  */
432       if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
433 	{
434 	  gomp_mutex_lock (&team->task_lock);
435 	  gomp_clear_parent (&task.children_queue);
436 	  gomp_mutex_unlock (&team->task_lock);
437 	}
438       gomp_end_task ();
439     }
440   else
441     {
442       struct gomp_task *task;
443       struct gomp_task *parent = thr->task;
444       struct gomp_taskgroup *taskgroup = parent->taskgroup;
445       char *arg;
446       bool do_wake;
447       size_t depend_size = 0;
448 
449       if (flags & GOMP_TASK_FLAG_DEPEND)
450 	depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
451 		       * sizeof (struct gomp_task_depend_entry));
452       task = gomp_malloc (sizeof (*task) + depend_size
453 			  + arg_size + arg_align - 1);
454       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
455 		      & ~(uintptr_t) (arg_align - 1));
456       gomp_init_task (task, parent, gomp_icv (false));
457       task->priority = priority;
458       task->kind = GOMP_TASK_UNDEFERRED;
459       task->in_tied_task = parent->in_tied_task;
460       task->taskgroup = taskgroup;
461       thr->task = task;
462       if (cpyfn)
463 	{
464 	  cpyfn (arg, data);
465 	  task->copy_ctors_done = true;
466 	}
467       else
468 	memcpy (arg, data, arg_size);
469       thr->task = parent;
470       task->kind = GOMP_TASK_WAITING;
471       task->fn = fn;
472       task->fn_data = arg;
473       task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
474       gomp_mutex_lock (&team->task_lock);
475       /* If parallel or taskgroup has been cancelled, don't start new
476 	 tasks.  */
477       if (__builtin_expect (gomp_cancel_var, 0)
478 	  && !task->copy_ctors_done)
479 	{
480 	  if (gomp_team_barrier_cancelled (&team->barrier))
481 	    {
482 	    do_cancel:
483 	      gomp_mutex_unlock (&team->task_lock);
484 	      gomp_finish_task (task);
485 	      free (task);
486 	      return;
487 	    }
488 	  if (taskgroup)
489 	    {
490 	      if (taskgroup->cancelled)
491 		goto do_cancel;
492 	      if (taskgroup->workshare
493 		  && taskgroup->prev
494 		  && taskgroup->prev->cancelled)
495 		goto do_cancel;
496 	    }
497 	}
498       if (taskgroup)
499 	taskgroup->num_children++;
500       if (depend_size)
501 	{
502 	  gomp_task_handle_depend (task, parent, depend);
503 	  if (task->num_dependees)
504 	    {
505 	      /* Tasks that depend on other tasks are not put into the
506 		 various waiting queues, so we are done for now.  Said
507 		 tasks are instead put into the queues via
508 		 gomp_task_run_post_handle_dependers() after their
509 		 dependencies have been satisfied.  After which, they
510 		 can be picked up by the various scheduling
511 		 points.  */
512 	      gomp_mutex_unlock (&team->task_lock);
513 	      return;
514 	    }
515 	}
516 
517       priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
518 			     task, priority,
519 			     PRIORITY_INSERT_BEGIN,
520 			     /*adjust_parent_depends_on=*/false,
521 			     task->parent_depends_on);
522       if (taskgroup)
523 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
524 			       task, priority,
525 			       PRIORITY_INSERT_BEGIN,
526 			       /*adjust_parent_depends_on=*/false,
527 			       task->parent_depends_on);
528 
529       priority_queue_insert (PQ_TEAM, &team->task_queue,
530 			     task, priority,
531 			     PRIORITY_INSERT_END,
532 			     /*adjust_parent_depends_on=*/false,
533 			     task->parent_depends_on);
534 
535       ++team->task_count;
536       ++team->task_queued_count;
537       gomp_team_barrier_set_task_pending (&team->barrier);
538       do_wake = team->task_running_count + !parent->in_tied_task
539 		< team->nthreads;
540       gomp_mutex_unlock (&team->task_lock);
541       if (do_wake)
542 	gomp_team_barrier_wake (&team->barrier, 1);
543     }
544 }
545 
546 ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)547 ialias (GOMP_taskgroup_end)
548 ialias (GOMP_taskgroup_reduction_register)
549 
550 #define TYPE long
551 #define UTYPE unsigned long
552 #define TYPE_is_long 1
553 #include "taskloop.c"
554 #undef TYPE
555 #undef UTYPE
556 #undef TYPE_is_long
557 
558 #define TYPE unsigned long long
559 #define UTYPE TYPE
560 #define GOMP_taskloop GOMP_taskloop_ull
561 #include "taskloop.c"
562 #undef TYPE
563 #undef UTYPE
564 #undef GOMP_taskloop
565 
566 static void inline
567 priority_queue_move_task_first (enum priority_queue_type type,
568 				struct priority_queue *head,
569 				struct gomp_task *task)
570 {
571 #if _LIBGOMP_CHECKING_
572   if (!priority_queue_task_in_queue_p (type, head, task))
573     gomp_fatal ("Attempt to move first missing task %p", task);
574 #endif
575   struct priority_list *list;
576   if (priority_queue_multi_p (head))
577     {
578       list = priority_queue_lookup_priority (head, task->priority);
579 #if _LIBGOMP_CHECKING_
580       if (!list)
581 	gomp_fatal ("Unable to find priority %d", task->priority);
582 #endif
583     }
584   else
585     list = &head->l;
586   priority_list_remove (list, task_to_priority_node (type, task), 0);
587   priority_list_insert (type, list, task, task->priority,
588 			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
589 			task->parent_depends_on);
590 }
591 
592 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
593    with team->task_lock held, or is executed in the thread that called
594    gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
595    run before it acquires team->task_lock.  */
596 
597 static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)598 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
599 {
600   struct gomp_task *parent = task->parent;
601   if (parent)
602     priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
603 				    task);
604 
605   struct gomp_taskgroup *taskgroup = task->taskgroup;
606   if (taskgroup)
607     priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
608 				    task);
609 
610   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
611 			 PRIORITY_INSERT_BEGIN, false,
612 			 task->parent_depends_on);
613   task->kind = GOMP_TASK_WAITING;
614   if (parent && parent->taskwait)
615     {
616       if (parent->taskwait->in_taskwait)
617 	{
618 	  /* One more task has had its dependencies met.
619 	     Inform any waiters.  */
620 	  parent->taskwait->in_taskwait = false;
621 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
622 	}
623       else if (parent->taskwait->in_depend_wait)
624 	{
625 	  /* One more task has had its dependencies met.
626 	     Inform any waiters.  */
627 	  parent->taskwait->in_depend_wait = false;
628 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
629 	}
630     }
631   if (taskgroup && taskgroup->in_taskgroup_wait)
632     {
633       /* One more task has had its dependencies met.
634 	 Inform any waiters.  */
635       taskgroup->in_taskgroup_wait = false;
636       gomp_sem_post (&taskgroup->taskgroup_sem);
637     }
638 
639   ++team->task_queued_count;
640   gomp_team_barrier_set_task_pending (&team->barrier);
641   /* I'm afraid this can't be done after releasing team->task_lock,
642      as gomp_target_task_completion is run from unrelated thread and
643      therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
644      the team could be gone already.  */
645   if (team->nthreads > team->task_running_count)
646     gomp_team_barrier_wake (&team->barrier, 1);
647 }
648 
649 /* Signal that a target task TTASK has completed the asynchronously
650    running phase and should be requeued as a task to handle the
651    variable unmapping.  */
652 
653 void
GOMP_PLUGIN_target_task_completion(void * data)654 GOMP_PLUGIN_target_task_completion (void *data)
655 {
656   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
657   struct gomp_task *task = ttask->task;
658   struct gomp_team *team = ttask->team;
659 
660   gomp_mutex_lock (&team->task_lock);
661   if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
662     {
663       ttask->state = GOMP_TARGET_TASK_FINISHED;
664       gomp_mutex_unlock (&team->task_lock);
665       return;
666     }
667   ttask->state = GOMP_TARGET_TASK_FINISHED;
668   gomp_target_task_completion (team, task);
669   gomp_mutex_unlock (&team->task_lock);
670 }
671 
672 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
673 
674 /* Called for nowait target tasks.  */
675 
676 bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)677 gomp_create_target_task (struct gomp_device_descr *devicep,
678 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
679 			 size_t *sizes, unsigned short *kinds,
680 			 unsigned int flags, void **depend, void **args,
681 			 enum gomp_target_task_state state)
682 {
683   struct gomp_thread *thr = gomp_thread ();
684   struct gomp_team *team = thr->ts.team;
685 
686   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
687   if (__builtin_expect (gomp_cancel_var, 0) && team)
688     {
689       if (gomp_team_barrier_cancelled (&team->barrier))
690 	return true;
691       if (thr->task->taskgroup)
692 	{
693 	  if (thr->task->taskgroup->cancelled)
694 	    return true;
695 	  if (thr->task->taskgroup->workshare
696 	      && thr->task->taskgroup->prev
697 	      && thr->task->taskgroup->prev->cancelled)
698 	    return true;
699 	}
700     }
701 
702   struct gomp_target_task *ttask;
703   struct gomp_task *task;
704   struct gomp_task *parent = thr->task;
705   struct gomp_taskgroup *taskgroup = parent->taskgroup;
706   bool do_wake;
707   size_t depend_size = 0;
708   uintptr_t depend_cnt = 0;
709   size_t tgt_align = 0, tgt_size = 0;
710 
711   if (depend != NULL)
712     {
713       depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
714       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
715     }
716   if (fn)
717     {
718       /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
719 	 firstprivate on the target task.  */
720       size_t i;
721       for (i = 0; i < mapnum; i++)
722 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
723 	  {
724 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
725 	    if (tgt_align < align)
726 	      tgt_align = align;
727 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
728 	    tgt_size += sizes[i];
729 	  }
730       if (tgt_align)
731 	tgt_size += tgt_align - 1;
732       else
733 	tgt_size = 0;
734     }
735 
736   task = gomp_malloc (sizeof (*task) + depend_size
737 		      + sizeof (*ttask)
738 		      + mapnum * (sizeof (void *) + sizeof (size_t)
739 				  + sizeof (unsigned short))
740 		      + tgt_size);
741   gomp_init_task (task, parent, gomp_icv (false));
742   task->priority = 0;
743   task->kind = GOMP_TASK_WAITING;
744   task->in_tied_task = parent->in_tied_task;
745   task->taskgroup = taskgroup;
746   ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
747   ttask->devicep = devicep;
748   ttask->fn = fn;
749   ttask->mapnum = mapnum;
750   ttask->args = args;
751   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
752   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
753   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
754   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
755   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
756   if (tgt_align)
757     {
758       char *tgt = (char *) &ttask->kinds[mapnum];
759       size_t i;
760       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
761       if (al)
762 	tgt += tgt_align - al;
763       tgt_size = 0;
764       for (i = 0; i < mapnum; i++)
765 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
766 	  {
767 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
768 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
769 	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
770 	    ttask->hostaddrs[i] = tgt + tgt_size;
771 	    tgt_size = tgt_size + sizes[i];
772 	  }
773     }
774   ttask->flags = flags;
775   ttask->state = state;
776   ttask->task = task;
777   ttask->team = team;
778   task->fn = NULL;
779   task->fn_data = ttask;
780   task->final_task = 0;
781   gomp_mutex_lock (&team->task_lock);
782   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
783   if (__builtin_expect (gomp_cancel_var, 0))
784     {
785       if (gomp_team_barrier_cancelled (&team->barrier))
786 	{
787 	do_cancel:
788 	  gomp_mutex_unlock (&team->task_lock);
789 	  gomp_finish_task (task);
790 	  free (task);
791 	  return true;
792 	}
793       if (taskgroup)
794 	{
795 	  if (taskgroup->cancelled)
796 	    goto do_cancel;
797 	  if (taskgroup->workshare
798 	      && taskgroup->prev
799 	      && taskgroup->prev->cancelled)
800 	    goto do_cancel;
801 	}
802     }
803   if (depend_size)
804     {
805       gomp_task_handle_depend (task, parent, depend);
806       if (task->num_dependees)
807 	{
808 	  if (taskgroup)
809 	    taskgroup->num_children++;
810 	  gomp_mutex_unlock (&team->task_lock);
811 	  return true;
812 	}
813     }
814   if (state == GOMP_TARGET_TASK_DATA)
815     {
816       gomp_task_run_post_handle_depend_hash (task);
817       gomp_mutex_unlock (&team->task_lock);
818       gomp_finish_task (task);
819       free (task);
820       return false;
821     }
822   if (taskgroup)
823     taskgroup->num_children++;
824   /* For async offloading, if we don't need to wait for dependencies,
825      run the gomp_target_task_fn right away, essentially schedule the
826      mapping part of the task in the current thread.  */
827   if (devicep != NULL
828       && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
829     {
830       priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
831 			     PRIORITY_INSERT_END,
832 			     /*adjust_parent_depends_on=*/false,
833 			     task->parent_depends_on);
834       if (taskgroup)
835 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
836 			       task, 0, PRIORITY_INSERT_END,
837 			       /*adjust_parent_depends_on=*/false,
838 			       task->parent_depends_on);
839       task->pnode[PQ_TEAM].next = NULL;
840       task->pnode[PQ_TEAM].prev = NULL;
841       task->kind = GOMP_TASK_TIED;
842       ++team->task_count;
843       gomp_mutex_unlock (&team->task_lock);
844 
845       thr->task = task;
846       gomp_target_task_fn (task->fn_data);
847       thr->task = parent;
848 
849       gomp_mutex_lock (&team->task_lock);
850       task->kind = GOMP_TASK_ASYNC_RUNNING;
851       /* If GOMP_PLUGIN_target_task_completion has run already
852 	 in between gomp_target_task_fn and the mutex lock,
853 	 perform the requeuing here.  */
854       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
855 	gomp_target_task_completion (team, task);
856       else
857 	ttask->state = GOMP_TARGET_TASK_RUNNING;
858       gomp_mutex_unlock (&team->task_lock);
859       return true;
860     }
861   priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
862 			 PRIORITY_INSERT_BEGIN,
863 			 /*adjust_parent_depends_on=*/false,
864 			 task->parent_depends_on);
865   if (taskgroup)
866     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
867 			   PRIORITY_INSERT_BEGIN,
868 			   /*adjust_parent_depends_on=*/false,
869 			   task->parent_depends_on);
870   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
871 			 PRIORITY_INSERT_END,
872 			 /*adjust_parent_depends_on=*/false,
873 			 task->parent_depends_on);
874   ++team->task_count;
875   ++team->task_queued_count;
876   gomp_team_barrier_set_task_pending (&team->barrier);
877   do_wake = team->task_running_count + !parent->in_tied_task
878 	    < team->nthreads;
879   gomp_mutex_unlock (&team->task_lock);
880   if (do_wake)
881     gomp_team_barrier_wake (&team->barrier, 1);
882   return true;
883 }
884 
885 /* Given a parent_depends_on task in LIST, move it to the front of its
886    priority so it is run as soon as possible.
887 
888    Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
889 
890    We rearrange the queue such that all parent_depends_on tasks are
891    first, and last_parent_depends_on points to the last such task we
892    rearranged.  For example, given the following tasks in a queue
893    where PD[123] are the parent_depends_on tasks:
894 
895 	task->children
896 	|
897 	V
898 	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
899 
900 	We rearrange such that:
901 
902 	task->children
903 	|	       +--- last_parent_depends_on
904 	|	       |
905 	V	       V
906 	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
907 
908 static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)909 priority_list_upgrade_task (struct priority_list *list,
910 			    struct priority_node *node)
911 {
912   struct priority_node *last_parent_depends_on
913     = list->last_parent_depends_on;
914   if (last_parent_depends_on)
915     {
916       node->prev->next = node->next;
917       node->next->prev = node->prev;
918       node->prev = last_parent_depends_on;
919       node->next = last_parent_depends_on->next;
920       node->prev->next = node;
921       node->next->prev = node;
922     }
923   else if (node != list->tasks)
924     {
925       node->prev->next = node->next;
926       node->next->prev = node->prev;
927       node->prev = list->tasks->prev;
928       node->next = list->tasks;
929       list->tasks = node;
930       node->prev->next = node;
931       node->next->prev = node;
932     }
933   list->last_parent_depends_on = node;
934 }
935 
936 /* Given a parent_depends_on TASK in its parent's children_queue, move
937    it to the front of its priority so it is run as soon as possible.
938 
939    PARENT is passed as an optimization.
940 
941    (This function could be defined in priority_queue.c, but we want it
942    inlined, and putting it in priority_queue.h is not an option, given
943    that gomp_task has not been properly defined at that point).  */
944 
945 static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)946 priority_queue_upgrade_task (struct gomp_task *task,
947 			     struct gomp_task *parent)
948 {
949   struct priority_queue *head = &parent->children_queue;
950   struct priority_node *node = &task->pnode[PQ_CHILDREN];
951 #if _LIBGOMP_CHECKING_
952   if (!task->parent_depends_on)
953     gomp_fatal ("priority_queue_upgrade_task: task must be a "
954 		"parent_depends_on task");
955   if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
956     gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
957 #endif
958   if (priority_queue_multi_p (head))
959     {
960       struct priority_list *list
961 	= priority_queue_lookup_priority (head, task->priority);
962       priority_list_upgrade_task (list, node);
963     }
964   else
965     priority_list_upgrade_task (&head->l, node);
966 }
967 
968 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
969    the way in LIST so that other tasks can be considered for
970    execution.  LIST contains tasks of type TYPE.
971 
972    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
973    if applicable.  */
974 
975 static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)976 priority_list_downgrade_task (enum priority_queue_type type,
977 			      struct priority_list *list,
978 			      struct gomp_task *child_task)
979 {
980   struct priority_node *node = task_to_priority_node (type, child_task);
981   if (list->tasks == node)
982     list->tasks = node->next;
983   else if (node->next != list->tasks)
984     {
985       /* The task in NODE is about to become TIED and TIED tasks
986 	 cannot come before WAITING tasks.  If we're about to
987 	 leave the queue in such an indeterminate state, rewire
988 	 things appropriately.  However, a TIED task at the end is
989 	 perfectly fine.  */
990       struct gomp_task *next_task = priority_node_to_task (type, node->next);
991       if (next_task->kind == GOMP_TASK_WAITING)
992 	{
993 	  /* Remove from list.  */
994 	  node->prev->next = node->next;
995 	  node->next->prev = node->prev;
996 	  /* Rewire at the end.  */
997 	  node->next = list->tasks;
998 	  node->prev = list->tasks->prev;
999 	  list->tasks->prev->next = node;
1000 	  list->tasks->prev = node;
1001 	}
1002     }
1003 
1004   /* If the current task is the last_parent_depends_on for its
1005      priority, adjust last_parent_depends_on appropriately.  */
1006   if (__builtin_expect (child_task->parent_depends_on, 0)
1007       && list->last_parent_depends_on == node)
1008     {
1009       struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1010       if (node->prev != node
1011 	  && prev_child->kind == GOMP_TASK_WAITING
1012 	  && prev_child->parent_depends_on)
1013 	list->last_parent_depends_on = node->prev;
1014       else
1015 	{
1016 	  /* There are no more parent_depends_on entries waiting
1017 	     to run, clear the list.  */
1018 	  list->last_parent_depends_on = NULL;
1019 	}
1020     }
1021 }
1022 
1023 /* Given a TASK in HEAD that is about to be executed, move it out of
1024    the way so that other tasks can be considered for execution.  HEAD
1025    contains tasks of type TYPE.
1026 
1027    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1028    if applicable.
1029 
1030    (This function could be defined in priority_queue.c, but we want it
1031    inlined, and putting it in priority_queue.h is not an option, given
1032    that gomp_task has not been properly defined at that point).  */
1033 
1034 static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)1035 priority_queue_downgrade_task (enum priority_queue_type type,
1036 			       struct priority_queue *head,
1037 			       struct gomp_task *task)
1038 {
1039 #if _LIBGOMP_CHECKING_
1040   if (!priority_queue_task_in_queue_p (type, head, task))
1041     gomp_fatal ("Attempt to downgrade missing task %p", task);
1042 #endif
1043   if (priority_queue_multi_p (head))
1044     {
1045       struct priority_list *list
1046 	= priority_queue_lookup_priority (head, task->priority);
1047       priority_list_downgrade_task (type, list, task);
1048     }
1049   else
1050     priority_list_downgrade_task (type, &head->l, task);
1051 }
1052 
1053 /* Setup CHILD_TASK to execute.  This is done by setting the task to
1054    TIED, and updating all relevant queues so that CHILD_TASK is no
1055    longer chosen for scheduling.  Also, remove CHILD_TASK from the
1056    overall team task queue entirely.
1057 
1058    Return TRUE if task or its containing taskgroup has been
1059    cancelled.  */
1060 
1061 static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)1062 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1063 		   struct gomp_team *team)
1064 {
1065 #if _LIBGOMP_CHECKING_
1066   if (child_task->parent)
1067     priority_queue_verify (PQ_CHILDREN,
1068 			   &child_task->parent->children_queue, true);
1069   if (child_task->taskgroup)
1070     priority_queue_verify (PQ_TASKGROUP,
1071 			   &child_task->taskgroup->taskgroup_queue, false);
1072   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1073 #endif
1074 
1075   /* Task is about to go tied, move it out of the way.  */
1076   if (parent)
1077     priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1078 				   child_task);
1079 
1080   /* Task is about to go tied, move it out of the way.  */
1081   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1082   if (taskgroup)
1083     priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1084 				   child_task);
1085 
1086   priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1087 			 MEMMODEL_RELAXED);
1088   child_task->pnode[PQ_TEAM].next = NULL;
1089   child_task->pnode[PQ_TEAM].prev = NULL;
1090   child_task->kind = GOMP_TASK_TIED;
1091 
1092   if (--team->task_queued_count == 0)
1093     gomp_team_barrier_clear_task_pending (&team->barrier);
1094   if (__builtin_expect (gomp_cancel_var, 0)
1095       && !child_task->copy_ctors_done)
1096     {
1097       if (gomp_team_barrier_cancelled (&team->barrier))
1098 	return true;
1099       if (taskgroup)
1100 	{
1101 	  if (taskgroup->cancelled)
1102 	    return true;
1103 	  if (taskgroup->workshare
1104 	      && taskgroup->prev
1105 	      && taskgroup->prev->cancelled)
1106 	    return true;
1107 	}
1108     }
1109   return false;
1110 }
1111 
1112 static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)1113 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1114 {
1115   struct gomp_task *parent = child_task->parent;
1116   size_t i;
1117 
1118   for (i = 0; i < child_task->depend_count; i++)
1119     if (!child_task->depend[i].redundant)
1120       {
1121 	if (child_task->depend[i].next)
1122 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
1123 	if (child_task->depend[i].prev)
1124 	  child_task->depend[i].prev->next = child_task->depend[i].next;
1125 	else
1126 	  {
1127 	    hash_entry_type *slot
1128 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1129 				NO_INSERT);
1130 	    if (*slot != &child_task->depend[i])
1131 	      abort ();
1132 	    if (child_task->depend[i].next)
1133 	      *slot = child_task->depend[i].next;
1134 	    else
1135 	      htab_clear_slot (parent->depend_hash, slot);
1136 	  }
1137       }
1138 }
1139 
1140 /* After a CHILD_TASK has been run, adjust the dependency queue for
1141    each task that depends on CHILD_TASK, to record the fact that there
1142    is one less dependency to worry about.  If a task that depended on
1143    CHILD_TASK now has no dependencies, place it in the various queues
1144    so it gets scheduled to run.
1145 
1146    TEAM is the team to which CHILD_TASK belongs to.  */
1147 
1148 static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1149 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1150 				     struct gomp_team *team)
1151 {
1152   struct gomp_task *parent = child_task->parent;
1153   size_t i, count = child_task->dependers->n_elem, ret = 0;
1154   for (i = 0; i < count; i++)
1155     {
1156       struct gomp_task *task = child_task->dependers->elem[i];
1157 
1158       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
1159 	 TASK's remaining dependencies.  Once TASK has no other
1160 	 dependencies, put it into the various queues so it will get
1161 	 scheduled for execution.  */
1162       if (--task->num_dependees != 0)
1163 	continue;
1164 
1165       struct gomp_taskgroup *taskgroup = task->taskgroup;
1166       if (parent)
1167 	{
1168 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1169 				 task, task->priority,
1170 				 PRIORITY_INSERT_BEGIN,
1171 				 /*adjust_parent_depends_on=*/true,
1172 				 task->parent_depends_on);
1173 	  if (parent->taskwait)
1174 	    {
1175 	      if (parent->taskwait->in_taskwait)
1176 		{
1177 		  /* One more task has had its dependencies met.
1178 		     Inform any waiters.  */
1179 		  parent->taskwait->in_taskwait = false;
1180 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1181 		}
1182 	      else if (parent->taskwait->in_depend_wait)
1183 		{
1184 		  /* One more task has had its dependencies met.
1185 		     Inform any waiters.  */
1186 		  parent->taskwait->in_depend_wait = false;
1187 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1188 		}
1189 	    }
1190 	}
1191       else
1192 	task->parent = NULL;
1193       if (taskgroup)
1194 	{
1195 	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1196 				 task, task->priority,
1197 				 PRIORITY_INSERT_BEGIN,
1198 				 /*adjust_parent_depends_on=*/false,
1199 				 task->parent_depends_on);
1200 	  if (taskgroup->in_taskgroup_wait)
1201 	    {
1202 	      /* One more task has had its dependencies met.
1203 		 Inform any waiters.  */
1204 	      taskgroup->in_taskgroup_wait = false;
1205 	      gomp_sem_post (&taskgroup->taskgroup_sem);
1206 	    }
1207 	}
1208       priority_queue_insert (PQ_TEAM, &team->task_queue,
1209 			     task, task->priority,
1210 			     PRIORITY_INSERT_END,
1211 			     /*adjust_parent_depends_on=*/false,
1212 			     task->parent_depends_on);
1213       ++team->task_count;
1214       ++team->task_queued_count;
1215       ++ret;
1216     }
1217   free (child_task->dependers);
1218   child_task->dependers = NULL;
1219   if (ret > 1)
1220     gomp_team_barrier_set_task_pending (&team->barrier);
1221   return ret;
1222 }
1223 
1224 static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1225 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1226 				  struct gomp_team *team)
1227 {
1228   if (child_task->depend_count == 0)
1229     return 0;
1230 
1231   /* If parent is gone already, the hash table is freed and nothing
1232      will use the hash table anymore, no need to remove anything from it.  */
1233   if (child_task->parent != NULL)
1234     gomp_task_run_post_handle_depend_hash (child_task);
1235 
1236   if (child_task->dependers == NULL)
1237     return 0;
1238 
1239   return gomp_task_run_post_handle_dependers (child_task, team);
1240 }
1241 
1242 /* Remove CHILD_TASK from its parent.  */
1243 
1244 static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1245 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1246 {
1247   struct gomp_task *parent = child_task->parent;
1248   if (parent == NULL)
1249     return;
1250 
1251   /* If this was the last task the parent was depending on,
1252      synchronize with gomp_task_maybe_wait_for_dependencies so it can
1253      clean up and return.  */
1254   if (__builtin_expect (child_task->parent_depends_on, 0)
1255       && --parent->taskwait->n_depend == 0
1256       && parent->taskwait->in_depend_wait)
1257     {
1258       parent->taskwait->in_depend_wait = false;
1259       gomp_sem_post (&parent->taskwait->taskwait_sem);
1260     }
1261 
1262   if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1263 			     child_task, MEMMODEL_RELEASE)
1264       && parent->taskwait && parent->taskwait->in_taskwait)
1265     {
1266       parent->taskwait->in_taskwait = false;
1267       gomp_sem_post (&parent->taskwait->taskwait_sem);
1268     }
1269   child_task->pnode[PQ_CHILDREN].next = NULL;
1270   child_task->pnode[PQ_CHILDREN].prev = NULL;
1271 }
1272 
1273 /* Remove CHILD_TASK from its taskgroup.  */
1274 
1275 static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1276 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1277 {
1278   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1279   if (taskgroup == NULL)
1280     return;
1281   bool empty = priority_queue_remove (PQ_TASKGROUP,
1282 				      &taskgroup->taskgroup_queue,
1283 				      child_task, MEMMODEL_RELAXED);
1284   child_task->pnode[PQ_TASKGROUP].next = NULL;
1285   child_task->pnode[PQ_TASKGROUP].prev = NULL;
1286   if (taskgroup->num_children > 1)
1287     --taskgroup->num_children;
1288   else
1289     {
1290       /* We access taskgroup->num_children in GOMP_taskgroup_end
1291 	 outside of the task lock mutex region, so
1292 	 need a release barrier here to ensure memory
1293 	 written by child_task->fn above is flushed
1294 	 before the NULL is written.  */
1295       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1296     }
1297   if (empty && taskgroup->in_taskgroup_wait)
1298     {
1299       taskgroup->in_taskgroup_wait = false;
1300       gomp_sem_post (&taskgroup->taskgroup_sem);
1301     }
1302 }
1303 
1304 void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1305 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1306 {
1307   struct gomp_thread *thr = gomp_thread ();
1308   struct gomp_team *team = thr->ts.team;
1309   struct gomp_task *task = thr->task;
1310   struct gomp_task *child_task = NULL;
1311   struct gomp_task *to_free = NULL;
1312   int do_wake = 0;
1313 
1314   gomp_mutex_lock (&team->task_lock);
1315   if (gomp_barrier_last_thread (state))
1316     {
1317       if (team->task_count == 0)
1318 	{
1319 	  gomp_team_barrier_done (&team->barrier, state);
1320 	  gomp_mutex_unlock (&team->task_lock);
1321 	  gomp_team_barrier_wake (&team->barrier, 0);
1322 	  return;
1323 	}
1324       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1325     }
1326 
1327   while (1)
1328     {
1329       bool cancelled = false;
1330       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1331 	{
1332 	  bool ignored;
1333 	  child_task
1334 	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1335 					PQ_IGNORED, NULL,
1336 					&ignored);
1337 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
1338 					 team);
1339 	  if (__builtin_expect (cancelled, 0))
1340 	    {
1341 	      if (to_free)
1342 		{
1343 		  gomp_finish_task (to_free);
1344 		  free (to_free);
1345 		  to_free = NULL;
1346 		}
1347 	      goto finish_cancelled;
1348 	    }
1349 	  team->task_running_count++;
1350 	  child_task->in_tied_task = true;
1351 	}
1352       gomp_mutex_unlock (&team->task_lock);
1353       if (do_wake)
1354 	{
1355 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1356 	  do_wake = 0;
1357 	}
1358       if (to_free)
1359 	{
1360 	  gomp_finish_task (to_free);
1361 	  free (to_free);
1362 	  to_free = NULL;
1363 	}
1364       if (child_task)
1365 	{
1366 	  thr->task = child_task;
1367 	  if (__builtin_expect (child_task->fn == NULL, 0))
1368 	    {
1369 	      if (gomp_target_task_fn (child_task->fn_data))
1370 		{
1371 		  thr->task = task;
1372 		  gomp_mutex_lock (&team->task_lock);
1373 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1374 		  team->task_running_count--;
1375 		  struct gomp_target_task *ttask
1376 		    = (struct gomp_target_task *) child_task->fn_data;
1377 		  /* If GOMP_PLUGIN_target_task_completion has run already
1378 		     in between gomp_target_task_fn and the mutex lock,
1379 		     perform the requeuing here.  */
1380 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1381 		    gomp_target_task_completion (team, child_task);
1382 		  else
1383 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1384 		  child_task = NULL;
1385 		  continue;
1386 		}
1387 	    }
1388 	  else
1389 	    child_task->fn (child_task->fn_data);
1390 	  thr->task = task;
1391 	}
1392       else
1393 	return;
1394       gomp_mutex_lock (&team->task_lock);
1395       if (child_task)
1396 	{
1397 	 finish_cancelled:;
1398 	  size_t new_tasks
1399 	    = gomp_task_run_post_handle_depend (child_task, team);
1400 	  gomp_task_run_post_remove_parent (child_task);
1401 	  gomp_clear_parent (&child_task->children_queue);
1402 	  gomp_task_run_post_remove_taskgroup (child_task);
1403 	  to_free = child_task;
1404 	  child_task = NULL;
1405 	  if (!cancelled)
1406 	    team->task_running_count--;
1407 	  if (new_tasks > 1)
1408 	    {
1409 	      do_wake = team->nthreads - team->task_running_count;
1410 	      if (do_wake > new_tasks)
1411 		do_wake = new_tasks;
1412 	    }
1413 	  if (--team->task_count == 0
1414 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1415 	    {
1416 	      gomp_team_barrier_done (&team->barrier, state);
1417 	      gomp_mutex_unlock (&team->task_lock);
1418 	      gomp_team_barrier_wake (&team->barrier, 0);
1419 	      gomp_mutex_lock (&team->task_lock);
1420 	    }
1421 	}
1422     }
1423 }
1424 
1425 /* Called when encountering a taskwait directive.
1426 
1427    Wait for all children of the current task.  */
1428 
1429 void
GOMP_taskwait(void)1430 GOMP_taskwait (void)
1431 {
1432   struct gomp_thread *thr = gomp_thread ();
1433   struct gomp_team *team = thr->ts.team;
1434   struct gomp_task *task = thr->task;
1435   struct gomp_task *child_task = NULL;
1436   struct gomp_task *to_free = NULL;
1437   struct gomp_taskwait taskwait;
1438   int do_wake = 0;
1439 
1440   /* The acquire barrier on load of task->children here synchronizes
1441      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
1442      not necessary that we synchronize with other non-NULL writes at
1443      this point, but we must ensure that all writes to memory by a
1444      child thread task work function are seen before we exit from
1445      GOMP_taskwait.  */
1446   if (task == NULL
1447       || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1448     return;
1449 
1450   memset (&taskwait, 0, sizeof (taskwait));
1451   bool child_q = false;
1452   gomp_mutex_lock (&team->task_lock);
1453   while (1)
1454     {
1455       bool cancelled = false;
1456       if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1457 	{
1458 	  bool destroy_taskwait = task->taskwait != NULL;
1459 	  task->taskwait = NULL;
1460 	  gomp_mutex_unlock (&team->task_lock);
1461 	  if (to_free)
1462 	    {
1463 	      gomp_finish_task (to_free);
1464 	      free (to_free);
1465 	    }
1466 	  if (destroy_taskwait)
1467 	    gomp_sem_destroy (&taskwait.taskwait_sem);
1468 	  return;
1469 	}
1470       struct gomp_task *next_task
1471 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1472 				    PQ_TEAM, &team->task_queue, &child_q);
1473       if (next_task->kind == GOMP_TASK_WAITING)
1474 	{
1475 	  child_task = next_task;
1476 	  cancelled
1477 	    = gomp_task_run_pre (child_task, task, team);
1478 	  if (__builtin_expect (cancelled, 0))
1479 	    {
1480 	      if (to_free)
1481 		{
1482 		  gomp_finish_task (to_free);
1483 		  free (to_free);
1484 		  to_free = NULL;
1485 		}
1486 	      goto finish_cancelled;
1487 	    }
1488 	}
1489       else
1490 	{
1491 	/* All tasks we are waiting for are either running in other
1492 	   threads, or they are tasks that have not had their
1493 	   dependencies met (so they're not even in the queue).  Wait
1494 	   for them.  */
1495 	  if (task->taskwait == NULL)
1496 	    {
1497 	      taskwait.in_depend_wait = false;
1498 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
1499 	      task->taskwait = &taskwait;
1500 	    }
1501 	  taskwait.in_taskwait = true;
1502 	}
1503       gomp_mutex_unlock (&team->task_lock);
1504       if (do_wake)
1505 	{
1506 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1507 	  do_wake = 0;
1508 	}
1509       if (to_free)
1510 	{
1511 	  gomp_finish_task (to_free);
1512 	  free (to_free);
1513 	  to_free = NULL;
1514 	}
1515       if (child_task)
1516 	{
1517 	  thr->task = child_task;
1518 	  if (__builtin_expect (child_task->fn == NULL, 0))
1519 	    {
1520 	      if (gomp_target_task_fn (child_task->fn_data))
1521 		{
1522 		  thr->task = task;
1523 		  gomp_mutex_lock (&team->task_lock);
1524 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1525 		  struct gomp_target_task *ttask
1526 		    = (struct gomp_target_task *) child_task->fn_data;
1527 		  /* If GOMP_PLUGIN_target_task_completion has run already
1528 		     in between gomp_target_task_fn and the mutex lock,
1529 		     perform the requeuing here.  */
1530 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1531 		    gomp_target_task_completion (team, child_task);
1532 		  else
1533 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1534 		  child_task = NULL;
1535 		  continue;
1536 		}
1537 	    }
1538 	  else
1539 	    child_task->fn (child_task->fn_data);
1540 	  thr->task = task;
1541 	}
1542       else
1543 	gomp_sem_wait (&taskwait.taskwait_sem);
1544       gomp_mutex_lock (&team->task_lock);
1545       if (child_task)
1546 	{
1547 	 finish_cancelled:;
1548 	  size_t new_tasks
1549 	    = gomp_task_run_post_handle_depend (child_task, team);
1550 
1551 	  if (child_q)
1552 	    {
1553 	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1554 				     child_task, MEMMODEL_RELAXED);
1555 	      child_task->pnode[PQ_CHILDREN].next = NULL;
1556 	      child_task->pnode[PQ_CHILDREN].prev = NULL;
1557 	    }
1558 
1559 	  gomp_clear_parent (&child_task->children_queue);
1560 
1561 	  gomp_task_run_post_remove_taskgroup (child_task);
1562 
1563 	  to_free = child_task;
1564 	  child_task = NULL;
1565 	  team->task_count--;
1566 	  if (new_tasks > 1)
1567 	    {
1568 	      do_wake = team->nthreads - team->task_running_count
1569 			- !task->in_tied_task;
1570 	      if (do_wake > new_tasks)
1571 		do_wake = new_tasks;
1572 	    }
1573 	}
1574     }
1575 }
1576 
1577 /* Called when encountering a taskwait directive with depend clause(s).
1578    Wait as if it was an mergeable included task construct with empty body.  */
1579 
1580 void
GOMP_taskwait_depend(void ** depend)1581 GOMP_taskwait_depend (void **depend)
1582 {
1583   struct gomp_thread *thr = gomp_thread ();
1584   struct gomp_team *team = thr->ts.team;
1585 
1586   /* If parallel or taskgroup has been cancelled, return early.  */
1587   if (__builtin_expect (gomp_cancel_var, 0) && team)
1588     {
1589       if (gomp_team_barrier_cancelled (&team->barrier))
1590 	return;
1591       if (thr->task->taskgroup)
1592 	{
1593 	  if (thr->task->taskgroup->cancelled)
1594 	    return;
1595 	  if (thr->task->taskgroup->workshare
1596 	      && thr->task->taskgroup->prev
1597 	      && thr->task->taskgroup->prev->cancelled)
1598 	    return;
1599 	}
1600     }
1601 
1602   if (thr->task && thr->task->depend_hash)
1603     gomp_task_maybe_wait_for_dependencies (depend);
1604 }
1605 
1606 /* An undeferred task is about to run.  Wait for all tasks that this
1607    undeferred task depends on.
1608 
1609    This is done by first putting all known ready dependencies
1610    (dependencies that have their own dependencies met) at the top of
1611    the scheduling queues.  Then we iterate through these imminently
1612    ready tasks (and possibly other high priority tasks), and run them.
1613    If we run out of ready dependencies to execute, we either wait for
1614    the remaining dependencies to finish, or wait for them to get
1615    scheduled so we can run them.
1616 
1617    DEPEND is as in GOMP_task.  */
1618 
1619 void
gomp_task_maybe_wait_for_dependencies(void ** depend)1620 gomp_task_maybe_wait_for_dependencies (void **depend)
1621 {
1622   struct gomp_thread *thr = gomp_thread ();
1623   struct gomp_task *task = thr->task;
1624   struct gomp_team *team = thr->ts.team;
1625   struct gomp_task_depend_entry elem, *ent = NULL;
1626   struct gomp_taskwait taskwait;
1627   size_t orig_ndepend = (uintptr_t) depend[0];
1628   size_t nout = (uintptr_t) depend[1];
1629   size_t ndepend = orig_ndepend;
1630   size_t normal = ndepend;
1631   size_t n = 2;
1632   size_t i;
1633   size_t num_awaited = 0;
1634   struct gomp_task *child_task = NULL;
1635   struct gomp_task *to_free = NULL;
1636   int do_wake = 0;
1637 
1638   if (ndepend == 0)
1639     {
1640       ndepend = nout;
1641       nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1642       normal = nout + (uintptr_t) depend[4];
1643       n = 5;
1644     }
1645   gomp_mutex_lock (&team->task_lock);
1646   for (i = 0; i < ndepend; i++)
1647     {
1648       elem.addr = depend[i + n];
1649       elem.is_in = i >= nout;
1650       if (__builtin_expect (i >= normal, 0))
1651 	{
1652 	  void **d = (void **) elem.addr;
1653 	  switch ((uintptr_t) d[1])
1654 	    {
1655 	    case GOMP_DEPEND_IN:
1656 	      break;
1657 	    case GOMP_DEPEND_OUT:
1658 	    case GOMP_DEPEND_INOUT:
1659 	    case GOMP_DEPEND_MUTEXINOUTSET:
1660 	      elem.is_in = 0;
1661 	      break;
1662 	    default:
1663 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
1664 			  (int) (uintptr_t) d[1]);
1665 	    }
1666 	  elem.addr = d[0];
1667 	}
1668       ent = htab_find (task->depend_hash, &elem);
1669       for (; ent; ent = ent->next)
1670 	if (elem.is_in && ent->is_in)
1671 	  continue;
1672 	else
1673 	  {
1674 	    struct gomp_task *tsk = ent->task;
1675 	    if (!tsk->parent_depends_on)
1676 	      {
1677 		tsk->parent_depends_on = true;
1678 		++num_awaited;
1679 		/* If dependency TSK itself has no dependencies and is
1680 		   ready to run, move it up front so that we run it as
1681 		   soon as possible.  */
1682 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1683 		  priority_queue_upgrade_task (tsk, task);
1684 	      }
1685 	  }
1686     }
1687   if (num_awaited == 0)
1688     {
1689       gomp_mutex_unlock (&team->task_lock);
1690       return;
1691     }
1692 
1693   memset (&taskwait, 0, sizeof (taskwait));
1694   taskwait.n_depend = num_awaited;
1695   gomp_sem_init (&taskwait.taskwait_sem, 0);
1696   task->taskwait = &taskwait;
1697 
1698   while (1)
1699     {
1700       bool cancelled = false;
1701       if (taskwait.n_depend == 0)
1702 	{
1703 	  task->taskwait = NULL;
1704 	  gomp_mutex_unlock (&team->task_lock);
1705 	  if (to_free)
1706 	    {
1707 	      gomp_finish_task (to_free);
1708 	      free (to_free);
1709 	    }
1710 	  gomp_sem_destroy (&taskwait.taskwait_sem);
1711 	  return;
1712 	}
1713 
1714       /* Theoretically when we have multiple priorities, we should
1715 	 chose between the highest priority item in
1716 	 task->children_queue and team->task_queue here, so we should
1717 	 use priority_queue_next_task().  However, since we are
1718 	 running an undeferred task, perhaps that makes all tasks it
1719 	 depends on undeferred, thus a priority of INF?  This would
1720 	 make it unnecessary to take anything into account here,
1721 	 but the dependencies.
1722 
1723 	 On the other hand, if we want to use priority_queue_next_task(),
1724 	 care should be taken to only use priority_queue_remove()
1725 	 below if the task was actually removed from the children
1726 	 queue.  */
1727       bool ignored;
1728       struct gomp_task *next_task
1729 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1730 				    PQ_IGNORED, NULL, &ignored);
1731 
1732       if (next_task->kind == GOMP_TASK_WAITING)
1733 	{
1734 	  child_task = next_task;
1735 	  cancelled
1736 	    = gomp_task_run_pre (child_task, task, team);
1737 	  if (__builtin_expect (cancelled, 0))
1738 	    {
1739 	      if (to_free)
1740 		{
1741 		  gomp_finish_task (to_free);
1742 		  free (to_free);
1743 		  to_free = NULL;
1744 		}
1745 	      goto finish_cancelled;
1746 	    }
1747 	}
1748       else
1749 	/* All tasks we are waiting for are either running in other
1750 	   threads, or they are tasks that have not had their
1751 	   dependencies met (so they're not even in the queue).  Wait
1752 	   for them.  */
1753 	taskwait.in_depend_wait = true;
1754       gomp_mutex_unlock (&team->task_lock);
1755       if (do_wake)
1756 	{
1757 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1758 	  do_wake = 0;
1759 	}
1760       if (to_free)
1761 	{
1762 	  gomp_finish_task (to_free);
1763 	  free (to_free);
1764 	  to_free = NULL;
1765 	}
1766       if (child_task)
1767 	{
1768 	  thr->task = child_task;
1769 	  if (__builtin_expect (child_task->fn == NULL, 0))
1770 	    {
1771 	      if (gomp_target_task_fn (child_task->fn_data))
1772 		{
1773 		  thr->task = task;
1774 		  gomp_mutex_lock (&team->task_lock);
1775 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1776 		  struct gomp_target_task *ttask
1777 		    = (struct gomp_target_task *) child_task->fn_data;
1778 		  /* If GOMP_PLUGIN_target_task_completion has run already
1779 		     in between gomp_target_task_fn and the mutex lock,
1780 		     perform the requeuing here.  */
1781 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1782 		    gomp_target_task_completion (team, child_task);
1783 		  else
1784 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1785 		  child_task = NULL;
1786 		  continue;
1787 		}
1788 	    }
1789 	  else
1790 	    child_task->fn (child_task->fn_data);
1791 	  thr->task = task;
1792 	}
1793       else
1794 	gomp_sem_wait (&taskwait.taskwait_sem);
1795       gomp_mutex_lock (&team->task_lock);
1796       if (child_task)
1797 	{
1798 	 finish_cancelled:;
1799 	  size_t new_tasks
1800 	    = gomp_task_run_post_handle_depend (child_task, team);
1801 	  if (child_task->parent_depends_on)
1802 	    --taskwait.n_depend;
1803 
1804 	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1805 				 child_task, MEMMODEL_RELAXED);
1806 	  child_task->pnode[PQ_CHILDREN].next = NULL;
1807 	  child_task->pnode[PQ_CHILDREN].prev = NULL;
1808 
1809 	  gomp_clear_parent (&child_task->children_queue);
1810 	  gomp_task_run_post_remove_taskgroup (child_task);
1811 	  to_free = child_task;
1812 	  child_task = NULL;
1813 	  team->task_count--;
1814 	  if (new_tasks > 1)
1815 	    {
1816 	      do_wake = team->nthreads - team->task_running_count
1817 			- !task->in_tied_task;
1818 	      if (do_wake > new_tasks)
1819 		do_wake = new_tasks;
1820 	    }
1821 	}
1822     }
1823 }
1824 
1825 /* Called when encountering a taskyield directive.  */
1826 
1827 void
GOMP_taskyield(void)1828 GOMP_taskyield (void)
1829 {
1830   /* Nothing at the moment.  */
1831 }
1832 
1833 static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1834 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1835 {
1836   struct gomp_taskgroup *taskgroup
1837     = gomp_malloc (sizeof (struct gomp_taskgroup));
1838   taskgroup->prev = prev;
1839   priority_queue_init (&taskgroup->taskgroup_queue);
1840   taskgroup->reductions = prev ? prev->reductions : NULL;
1841   taskgroup->in_taskgroup_wait = false;
1842   taskgroup->cancelled = false;
1843   taskgroup->workshare = false;
1844   taskgroup->num_children = 0;
1845   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1846   return taskgroup;
1847 }
1848 
1849 void
GOMP_taskgroup_start(void)1850 GOMP_taskgroup_start (void)
1851 {
1852   struct gomp_thread *thr = gomp_thread ();
1853   struct gomp_team *team = thr->ts.team;
1854   struct gomp_task *task = thr->task;
1855 
1856   /* If team is NULL, all tasks are executed as
1857      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1858      taskgroup and their descendant tasks will be finished
1859      by the time GOMP_taskgroup_end is called.  */
1860   if (team == NULL)
1861     return;
1862   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1863 }
1864 
1865 void
GOMP_taskgroup_end(void)1866 GOMP_taskgroup_end (void)
1867 {
1868   struct gomp_thread *thr = gomp_thread ();
1869   struct gomp_team *team = thr->ts.team;
1870   struct gomp_task *task = thr->task;
1871   struct gomp_taskgroup *taskgroup;
1872   struct gomp_task *child_task = NULL;
1873   struct gomp_task *to_free = NULL;
1874   int do_wake = 0;
1875 
1876   if (team == NULL)
1877     return;
1878   taskgroup = task->taskgroup;
1879   if (__builtin_expect (taskgroup == NULL, 0)
1880       && thr->ts.level == 0)
1881     {
1882       /* This can happen if GOMP_taskgroup_start is called when
1883 	 thr->ts.team == NULL, but inside of the taskgroup there
1884 	 is #pragma omp target nowait that creates an implicit
1885 	 team with a single thread.  In this case, we want to wait
1886 	 for all outstanding tasks in this team.  */
1887       gomp_team_barrier_wait (&team->barrier);
1888       return;
1889     }
1890 
1891   /* The acquire barrier on load of taskgroup->num_children here
1892      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1893      It is not necessary that we synchronize with other non-0 writes at
1894      this point, but we must ensure that all writes to memory by a
1895      child thread task work function are seen before we exit from
1896      GOMP_taskgroup_end.  */
1897   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1898     goto finish;
1899 
1900   bool unused;
1901   gomp_mutex_lock (&team->task_lock);
1902   while (1)
1903     {
1904       bool cancelled = false;
1905       if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1906 				  MEMMODEL_RELAXED))
1907 	{
1908 	  if (taskgroup->num_children)
1909 	    {
1910 	      if (priority_queue_empty_p (&task->children_queue,
1911 					  MEMMODEL_RELAXED))
1912 		goto do_wait;
1913 	      child_task
1914 		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1915 					    PQ_TEAM, &team->task_queue,
1916 					    &unused);
1917 	    }
1918 	  else
1919 	    {
1920 	      gomp_mutex_unlock (&team->task_lock);
1921 	      if (to_free)
1922 		{
1923 		  gomp_finish_task (to_free);
1924 		  free (to_free);
1925 		}
1926 	      goto finish;
1927 	    }
1928 	}
1929       else
1930 	child_task
1931 	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1932 				      PQ_TEAM, &team->task_queue, &unused);
1933       if (child_task->kind == GOMP_TASK_WAITING)
1934 	{
1935 	  cancelled
1936 	    = gomp_task_run_pre (child_task, child_task->parent, team);
1937 	  if (__builtin_expect (cancelled, 0))
1938 	    {
1939 	      if (to_free)
1940 		{
1941 		  gomp_finish_task (to_free);
1942 		  free (to_free);
1943 		  to_free = NULL;
1944 		}
1945 	      goto finish_cancelled;
1946 	    }
1947 	}
1948       else
1949 	{
1950 	  child_task = NULL;
1951 	 do_wait:
1952 	/* All tasks we are waiting for are either running in other
1953 	   threads, or they are tasks that have not had their
1954 	   dependencies met (so they're not even in the queue).  Wait
1955 	   for them.  */
1956 	  taskgroup->in_taskgroup_wait = true;
1957 	}
1958       gomp_mutex_unlock (&team->task_lock);
1959       if (do_wake)
1960 	{
1961 	  gomp_team_barrier_wake (&team->barrier, do_wake);
1962 	  do_wake = 0;
1963 	}
1964       if (to_free)
1965 	{
1966 	  gomp_finish_task (to_free);
1967 	  free (to_free);
1968 	  to_free = NULL;
1969 	}
1970       if (child_task)
1971 	{
1972 	  thr->task = child_task;
1973 	  if (__builtin_expect (child_task->fn == NULL, 0))
1974 	    {
1975 	      if (gomp_target_task_fn (child_task->fn_data))
1976 		{
1977 		  thr->task = task;
1978 		  gomp_mutex_lock (&team->task_lock);
1979 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1980 		  struct gomp_target_task *ttask
1981 		    = (struct gomp_target_task *) child_task->fn_data;
1982 		  /* If GOMP_PLUGIN_target_task_completion has run already
1983 		     in between gomp_target_task_fn and the mutex lock,
1984 		     perform the requeuing here.  */
1985 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1986 		    gomp_target_task_completion (team, child_task);
1987 		  else
1988 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1989 		  child_task = NULL;
1990 		  continue;
1991 		}
1992 	    }
1993 	  else
1994 	    child_task->fn (child_task->fn_data);
1995 	  thr->task = task;
1996 	}
1997       else
1998 	gomp_sem_wait (&taskgroup->taskgroup_sem);
1999       gomp_mutex_lock (&team->task_lock);
2000       if (child_task)
2001 	{
2002 	 finish_cancelled:;
2003 	  size_t new_tasks
2004 	    = gomp_task_run_post_handle_depend (child_task, team);
2005 	  gomp_task_run_post_remove_parent (child_task);
2006 	  gomp_clear_parent (&child_task->children_queue);
2007 	  gomp_task_run_post_remove_taskgroup (child_task);
2008 	  to_free = child_task;
2009 	  child_task = NULL;
2010 	  team->task_count--;
2011 	  if (new_tasks > 1)
2012 	    {
2013 	      do_wake = team->nthreads - team->task_running_count
2014 			- !task->in_tied_task;
2015 	      if (do_wake > new_tasks)
2016 		do_wake = new_tasks;
2017 	    }
2018 	}
2019     }
2020 
2021  finish:
2022   task->taskgroup = taskgroup->prev;
2023   gomp_sem_destroy (&taskgroup->taskgroup_sem);
2024   free (taskgroup);
2025 }
2026 
2027 static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2028 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2029 			 unsigned nthreads)
2030 {
2031   size_t total_cnt = 0;
2032   uintptr_t *d = data;
2033   struct htab *old_htab = NULL, *new_htab;
2034   do
2035     {
2036       if (__builtin_expect (orig != NULL, 0))
2037 	{
2038 	  /* For worksharing task reductions, memory has been allocated
2039 	     already by some other thread that encountered the construct
2040 	     earlier.  */
2041 	  d[2] = orig[2];
2042 	  d[6] = orig[6];
2043 	  orig = (uintptr_t *) orig[4];
2044 	}
2045       else
2046 	{
2047 	  size_t sz = d[1] * nthreads;
2048 	  /* Should use omp_alloc if d[3] is not -1.  */
2049 	  void *ptr = gomp_aligned_alloc (d[2], sz);
2050 	  memset (ptr, '\0', sz);
2051 	  d[2] = (uintptr_t) ptr;
2052 	  d[6] = d[2] + sz;
2053 	}
2054       d[5] = 0;
2055       total_cnt += d[0];
2056       if (d[4] == 0)
2057 	{
2058 	  d[4] = (uintptr_t) old;
2059 	  break;
2060 	}
2061       else
2062 	d = (uintptr_t *) d[4];
2063     }
2064   while (1);
2065   if (old && old[5])
2066     {
2067       old_htab = (struct htab *) old[5];
2068       total_cnt += htab_elements (old_htab);
2069     }
2070   new_htab = htab_create (total_cnt);
2071   if (old_htab)
2072     {
2073       /* Copy old hash table, like in htab_expand.  */
2074       hash_entry_type *p, *olimit;
2075       new_htab->n_elements = htab_elements (old_htab);
2076       olimit = old_htab->entries + old_htab->size;
2077       p = old_htab->entries;
2078       do
2079 	{
2080 	  hash_entry_type x = *p;
2081 	  if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2082 	    *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2083 	  p++;
2084 	}
2085       while (p < olimit);
2086     }
2087   d = data;
2088   do
2089     {
2090       size_t j;
2091       for (j = 0; j < d[0]; ++j)
2092 	{
2093 	  uintptr_t *p = d + 7 + j * 3;
2094 	  p[2] = (uintptr_t) d;
2095 	  /* Ugly hack, hash_entry_type is defined for the task dependencies,
2096 	     which hash on the first element which is a pointer.  We need
2097 	     to hash also on the first sizeof (uintptr_t) bytes which contain
2098 	     a pointer.  Hide the cast from the compiler.  */
2099 	  hash_entry_type n;
2100 	  __asm ("" : "=g" (n) : "0" (p));
2101 	  *htab_find_slot (&new_htab, n, INSERT) = n;
2102 	}
2103       if (d[4] == (uintptr_t) old)
2104 	break;
2105       else
2106 	d = (uintptr_t *) d[4];
2107     }
2108   while (1);
2109   d[5] = (uintptr_t) new_htab;
2110 }
2111 
2112 static void
gomp_create_artificial_team(void)2113 gomp_create_artificial_team (void)
2114 {
2115   struct gomp_thread *thr = gomp_thread ();
2116   struct gomp_task_icv *icv;
2117   struct gomp_team *team = gomp_new_team (1);
2118   struct gomp_task *task = thr->task;
2119   struct gomp_task **implicit_task = &task;
2120   icv = task ? &task->icv : &gomp_global_icv;
2121   team->prev_ts = thr->ts;
2122   thr->ts.team = team;
2123   thr->ts.team_id = 0;
2124   thr->ts.work_share = &team->work_shares[0];
2125   thr->ts.last_work_share = NULL;
2126 #ifdef HAVE_SYNC_BUILTINS
2127   thr->ts.single_count = 0;
2128 #endif
2129   thr->ts.static_trip = 0;
2130   thr->task = &team->implicit_task[0];
2131   gomp_init_task (thr->task, NULL, icv);
2132   while (*implicit_task
2133 	 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2134     implicit_task = &(*implicit_task)->parent;
2135   if (*implicit_task)
2136     {
2137       thr->task = *implicit_task;
2138       gomp_end_task ();
2139       free (*implicit_task);
2140       thr->task = &team->implicit_task[0];
2141     }
2142 #ifdef LIBGOMP_USE_PTHREADS
2143   else
2144     pthread_setspecific (gomp_thread_destructor, thr);
2145 #endif
2146   if (implicit_task != &task)
2147     {
2148       *implicit_task = thr->task;
2149       thr->task = task;
2150     }
2151 }
2152 
2153 /* The format of data is:
2154    data[0]	cnt
2155    data[1]	size
2156    data[2]	alignment (on output array pointer)
2157    data[3]	allocator (-1 if malloc allocator)
2158    data[4]	next pointer
2159    data[5]	used internally (htab pointer)
2160    data[6]	used internally (end of array)
2161    cnt times
2162    ent[0]	address
2163    ent[1]	offset
2164    ent[2]	used internally (pointer to data[0])
2165    The entries are sorted by increasing offset, so that a binary
2166    search can be performed.  Normally, data[8] is 0, exception is
2167    for worksharing construct task reductions in cancellable parallel,
2168    where at offset 0 there should be space for a pointer and an integer
2169    which are used internally.  */
2170 
2171 void
GOMP_taskgroup_reduction_register(uintptr_t * data)2172 GOMP_taskgroup_reduction_register (uintptr_t *data)
2173 {
2174   struct gomp_thread *thr = gomp_thread ();
2175   struct gomp_team *team = thr->ts.team;
2176   struct gomp_task *task;
2177   unsigned nthreads;
2178   if (__builtin_expect (team == NULL, 0))
2179     {
2180       /* The task reduction code needs a team and task, so for
2181 	 orphaned taskgroups just create the implicit team.  */
2182       gomp_create_artificial_team ();
2183       ialias_call (GOMP_taskgroup_start) ();
2184       team = thr->ts.team;
2185     }
2186   nthreads = team->nthreads;
2187   task = thr->task;
2188   gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2189   task->taskgroup->reductions = data;
2190 }
2191 
2192 void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2193 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2194 {
2195   uintptr_t *d = data;
2196   htab_free ((struct htab *) data[5]);
2197   do
2198     {
2199       gomp_aligned_free ((void *) d[2]);
2200       d = (uintptr_t *) d[4];
2201     }
2202   while (d && !d[5]);
2203 }
ialias(GOMP_taskgroup_reduction_unregister)2204 ialias (GOMP_taskgroup_reduction_unregister)
2205 
2206 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2207    original list item or address of previously remapped original list
2208    item to address of the private copy, store that to ptrs[i].
2209    For i < cntorig, additionally set ptrs[cnt+i] to the address of
2210    the original list item.  */
2211 
2212 void
2213 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2214 {
2215   struct gomp_thread *thr = gomp_thread ();
2216   struct gomp_task *task = thr->task;
2217   unsigned id = thr->ts.team_id;
2218   uintptr_t *data = task->taskgroup->reductions;
2219   uintptr_t *d;
2220   struct htab *reduction_htab = (struct htab *) data[5];
2221   size_t i;
2222   for (i = 0; i < cnt; ++i)
2223     {
2224       hash_entry_type ent, n;
2225       __asm ("" : "=g" (ent) : "0" (ptrs + i));
2226       n = htab_find (reduction_htab, ent);
2227       if (n)
2228 	{
2229 	  uintptr_t *p;
2230 	  __asm ("" : "=g" (p) : "0" (n));
2231 	  /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2232 	     p[1] is the offset within the allocated chunk for each
2233 	     thread, p[2] is the array registered with
2234 	     GOMP_taskgroup_reduction_register, d[2] is the base of the
2235 	     allocated memory and d[1] is the size of the allocated chunk
2236 	     for one thread.  */
2237 	  d = (uintptr_t *) p[2];
2238 	  ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2239 	  if (__builtin_expect (i < cntorig, 0))
2240 	    ptrs[cnt + i] = (void *) p[0];
2241 	  continue;
2242 	}
2243       d = data;
2244       while (d != NULL)
2245 	{
2246 	  if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2247 	    break;
2248 	  d = (uintptr_t *) d[4];
2249 	}
2250       if (d == NULL)
2251 	gomp_fatal ("couldn't find matching task_reduction or reduction with "
2252 		    "task modifier for %p", ptrs[i]);
2253       uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2254       ptrs[i] = (void *) (d[2] + id * d[1] + off);
2255       if (__builtin_expect (i < cntorig, 0))
2256 	{
2257 	  size_t lo = 0, hi = d[0] - 1;
2258 	  while (lo <= hi)
2259 	    {
2260 	      size_t m = (lo + hi) / 2;
2261 	      if (d[7 + 3 * m + 1] < off)
2262 		lo = m + 1;
2263 	      else if (d[7 + 3 * m + 1] == off)
2264 		{
2265 		  ptrs[cnt + i] = (void *) d[7 + 3 * m];
2266 		  break;
2267 		}
2268 	      else
2269 		hi = m - 1;
2270 	    }
2271 	  if (lo > hi)
2272 	    gomp_fatal ("couldn't find matching task_reduction or reduction "
2273 			"with task modifier for %p", ptrs[i]);
2274 	}
2275     }
2276 }
2277 
2278 struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2279 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2280 {
2281   struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2282   gomp_reduction_register (data, NULL, NULL, nthreads);
2283   taskgroup->reductions = data;
2284   return taskgroup;
2285 }
2286 
2287 void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2288 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2289 {
2290   struct gomp_thread *thr = gomp_thread ();
2291   struct gomp_team *team = thr->ts.team;
2292   struct gomp_task *task = thr->task;
2293   unsigned nthreads = team->nthreads;
2294   gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2295   task->taskgroup->reductions = data;
2296 }
2297 
2298 void
gomp_workshare_taskgroup_start(void)2299 gomp_workshare_taskgroup_start (void)
2300 {
2301   struct gomp_thread *thr = gomp_thread ();
2302   struct gomp_team *team = thr->ts.team;
2303   struct gomp_task *task;
2304 
2305   if (team == NULL)
2306     {
2307       gomp_create_artificial_team ();
2308       team = thr->ts.team;
2309     }
2310   task = thr->task;
2311   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2312   task->taskgroup->workshare = true;
2313 }
2314 
2315 void
GOMP_workshare_task_reduction_unregister(bool cancelled)2316 GOMP_workshare_task_reduction_unregister (bool cancelled)
2317 {
2318   struct gomp_thread *thr = gomp_thread ();
2319   struct gomp_task *task = thr->task;
2320   struct gomp_team *team = thr->ts.team;
2321   uintptr_t *data = task->taskgroup->reductions;
2322   ialias_call (GOMP_taskgroup_end) ();
2323   if (thr->ts.team_id == 0)
2324     ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2325   else
2326     htab_free ((struct htab *) data[5]);
2327 
2328   if (!cancelled)
2329     gomp_team_barrier_wait (&team->barrier);
2330 }
2331 
2332 int
omp_in_final(void)2333 omp_in_final (void)
2334 {
2335   struct gomp_thread *thr = gomp_thread ();
2336   return thr->task && thr->task->final_task;
2337 }
2338 
2339 ialias (omp_in_final)
2340