xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/task.c (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
18feb0f0bSmrg /* Copyright (C) 2007-2020 Free Software Foundation, Inc.
21debfc3dSmrg    Contributed by Richard Henderson <rth@redhat.com>.
31debfc3dSmrg 
41debfc3dSmrg    This file is part of the GNU Offloading and Multi Processing Library
51debfc3dSmrg    (libgomp).
61debfc3dSmrg 
71debfc3dSmrg    Libgomp is free software; you can redistribute it and/or modify it
81debfc3dSmrg    under the terms of the GNU General Public License as published by
91debfc3dSmrg    the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg    any later version.
111debfc3dSmrg 
121debfc3dSmrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
141debfc3dSmrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
151debfc3dSmrg    more details.
161debfc3dSmrg 
171debfc3dSmrg    Under Section 7 of GPL version 3, you are granted additional
181debfc3dSmrg    permissions described in the GCC Runtime Library Exception, version
191debfc3dSmrg    3.1, as published by the Free Software Foundation.
201debfc3dSmrg 
211debfc3dSmrg    You should have received a copy of the GNU General Public License and
221debfc3dSmrg    a copy of the GCC Runtime Library Exception along with this program;
231debfc3dSmrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
241debfc3dSmrg    <http://www.gnu.org/licenses/>.  */
251debfc3dSmrg 
268feb0f0bSmrg /* This file handles the maintenance of tasks in response to task
271debfc3dSmrg    creation and termination.  */
281debfc3dSmrg 
291debfc3dSmrg #include "libgomp.h"
301debfc3dSmrg #include <stdlib.h>
311debfc3dSmrg #include <string.h>
321debfc3dSmrg #include "gomp-constants.h"
331debfc3dSmrg 
341debfc3dSmrg typedef struct gomp_task_depend_entry *hash_entry_type;
351debfc3dSmrg 
361debfc3dSmrg static inline void *
htab_alloc(size_t size)371debfc3dSmrg htab_alloc (size_t size)
381debfc3dSmrg {
391debfc3dSmrg   return gomp_malloc (size);
401debfc3dSmrg }
411debfc3dSmrg 
421debfc3dSmrg static inline void
htab_free(void * ptr)431debfc3dSmrg htab_free (void *ptr)
441debfc3dSmrg {
451debfc3dSmrg   free (ptr);
461debfc3dSmrg }
471debfc3dSmrg 
481debfc3dSmrg #include "hashtab.h"
491debfc3dSmrg 
501debfc3dSmrg static inline hashval_t
htab_hash(hash_entry_type element)511debfc3dSmrg htab_hash (hash_entry_type element)
521debfc3dSmrg {
531debfc3dSmrg   return hash_pointer (element->addr);
541debfc3dSmrg }
551debfc3dSmrg 
561debfc3dSmrg static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)571debfc3dSmrg htab_eq (hash_entry_type x, hash_entry_type y)
581debfc3dSmrg {
591debfc3dSmrg   return x->addr == y->addr;
601debfc3dSmrg }
611debfc3dSmrg 
621debfc3dSmrg /* Create a new task data structure.  */
631debfc3dSmrg 
641debfc3dSmrg void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)651debfc3dSmrg gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
661debfc3dSmrg 		struct gomp_task_icv *prev_icv)
671debfc3dSmrg {
681debfc3dSmrg   /* It would seem that using memset here would be a win, but it turns
691debfc3dSmrg      out that partially filling gomp_task allows us to keep the
701debfc3dSmrg      overhead of task creation low.  In the nqueens-1.c test, for a
711debfc3dSmrg      sufficiently large N, we drop the overhead from 5-6% to 1%.
721debfc3dSmrg 
731debfc3dSmrg      Note, the nqueens-1.c test in serial mode is a good test to
741debfc3dSmrg      benchmark the overhead of creating tasks as there are millions of
751debfc3dSmrg      tiny tasks created that all run undeferred.  */
761debfc3dSmrg   task->parent = parent_task;
771debfc3dSmrg   task->icv = *prev_icv;
781debfc3dSmrg   task->kind = GOMP_TASK_IMPLICIT;
791debfc3dSmrg   task->taskwait = NULL;
801debfc3dSmrg   task->in_tied_task = false;
811debfc3dSmrg   task->final_task = false;
821debfc3dSmrg   task->copy_ctors_done = false;
831debfc3dSmrg   task->parent_depends_on = false;
841debfc3dSmrg   priority_queue_init (&task->children_queue);
851debfc3dSmrg   task->taskgroup = NULL;
861debfc3dSmrg   task->dependers = NULL;
871debfc3dSmrg   task->depend_hash = NULL;
881debfc3dSmrg   task->depend_count = 0;
891debfc3dSmrg }
901debfc3dSmrg 
911debfc3dSmrg /* Clean up a task, after completing it.  */
921debfc3dSmrg 
931debfc3dSmrg void
gomp_end_task(void)941debfc3dSmrg gomp_end_task (void)
951debfc3dSmrg {
961debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
971debfc3dSmrg   struct gomp_task *task = thr->task;
981debfc3dSmrg 
991debfc3dSmrg   gomp_finish_task (task);
1001debfc3dSmrg   thr->task = task->parent;
1011debfc3dSmrg }
1021debfc3dSmrg 
1031debfc3dSmrg /* Clear the parent field of every task in LIST.  */
1041debfc3dSmrg 
1051debfc3dSmrg static inline void
gomp_clear_parent_in_list(struct priority_list * list)1061debfc3dSmrg gomp_clear_parent_in_list (struct priority_list *list)
1071debfc3dSmrg {
1081debfc3dSmrg   struct priority_node *p = list->tasks;
1091debfc3dSmrg   if (p)
1101debfc3dSmrg     do
1111debfc3dSmrg       {
1121debfc3dSmrg 	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
1131debfc3dSmrg 	p = p->next;
1141debfc3dSmrg       }
1151debfc3dSmrg     while (p != list->tasks);
1161debfc3dSmrg }
1171debfc3dSmrg 
1181debfc3dSmrg /* Splay tree version of gomp_clear_parent_in_list.
1191debfc3dSmrg 
1201debfc3dSmrg    Clear the parent field of every task in NODE within SP, and free
1211debfc3dSmrg    the node when done.  */
1221debfc3dSmrg 
1231debfc3dSmrg static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)1241debfc3dSmrg gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
1251debfc3dSmrg {
1261debfc3dSmrg   if (!node)
1271debfc3dSmrg     return;
1281debfc3dSmrg   prio_splay_tree_node left = node->left, right = node->right;
1291debfc3dSmrg   gomp_clear_parent_in_list (&node->key.l);
1301debfc3dSmrg #if _LIBGOMP_CHECKING_
1311debfc3dSmrg   memset (node, 0xaf, sizeof (*node));
1321debfc3dSmrg #endif
1331debfc3dSmrg   /* No need to remove the node from the tree.  We're nuking
1341debfc3dSmrg      everything, so just free the nodes and our caller can clear the
1351debfc3dSmrg      entire splay tree.  */
1361debfc3dSmrg   free (node);
1371debfc3dSmrg   gomp_clear_parent_in_tree (sp, left);
1381debfc3dSmrg   gomp_clear_parent_in_tree (sp, right);
1391debfc3dSmrg }
1401debfc3dSmrg 
1411debfc3dSmrg /* Clear the parent field of every task in Q and remove every task
1421debfc3dSmrg    from Q.  */
1431debfc3dSmrg 
1441debfc3dSmrg static inline void
gomp_clear_parent(struct priority_queue * q)1451debfc3dSmrg gomp_clear_parent (struct priority_queue *q)
1461debfc3dSmrg {
1471debfc3dSmrg   if (priority_queue_multi_p (q))
1481debfc3dSmrg     {
1491debfc3dSmrg       gomp_clear_parent_in_tree (&q->t, q->t.root);
1501debfc3dSmrg       /* All the nodes have been cleared in gomp_clear_parent_in_tree.
1511debfc3dSmrg 	 No need to remove anything.  We can just nuke everything.  */
1521debfc3dSmrg       q->t.root = NULL;
1531debfc3dSmrg     }
1541debfc3dSmrg   else
1551debfc3dSmrg     gomp_clear_parent_in_list (&q->l);
1561debfc3dSmrg }
1571debfc3dSmrg 
1581debfc3dSmrg /* Helper function for GOMP_task and gomp_create_target_task.
1591debfc3dSmrg 
1601debfc3dSmrg    For a TASK with in/out dependencies, fill in the various dependency
1611debfc3dSmrg    queues.  PARENT is the parent of said task.  DEPEND is as in
1621debfc3dSmrg    GOMP_task.  */
1631debfc3dSmrg 
1641debfc3dSmrg static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)1651debfc3dSmrg gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
1661debfc3dSmrg 			 void **depend)
1671debfc3dSmrg {
1681debfc3dSmrg   size_t ndepend = (uintptr_t) depend[0];
1691debfc3dSmrg   size_t i;
1701debfc3dSmrg   hash_entry_type ent;
1711debfc3dSmrg 
172c0a68be4Smrg   if (ndepend)
173c0a68be4Smrg     {
174c0a68be4Smrg       /* depend[0] is total # */
175c0a68be4Smrg       size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
176c0a68be4Smrg       /* ndepend - nout is # of in: */
177c0a68be4Smrg       for (i = 0; i < ndepend; i++)
178c0a68be4Smrg 	{
179c0a68be4Smrg 	  task->depend[i].addr = depend[2 + i];
180c0a68be4Smrg 	  task->depend[i].is_in = i >= nout;
181c0a68be4Smrg 	}
182c0a68be4Smrg     }
183c0a68be4Smrg   else
184c0a68be4Smrg     {
185c0a68be4Smrg       ndepend = (uintptr_t) depend[1]; /* total # */
186c0a68be4Smrg       size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
187c0a68be4Smrg       size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
188c0a68be4Smrg       /* For now we treat mutexinoutset like out, which is compliant, but
189c0a68be4Smrg 	 inefficient.  */
190c0a68be4Smrg       size_t nin = (uintptr_t) depend[4]; /* # of in: */
191c0a68be4Smrg       /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
192c0a68be4Smrg       size_t normal = nout + nmutexinoutset + nin;
193c0a68be4Smrg       size_t n = 0;
194c0a68be4Smrg       for (i = normal; i < ndepend; i++)
195c0a68be4Smrg 	{
196c0a68be4Smrg 	  void **d = (void **) (uintptr_t) depend[5 + i];
197c0a68be4Smrg 	  switch ((uintptr_t) d[1])
198c0a68be4Smrg 	    {
199c0a68be4Smrg 	    case GOMP_DEPEND_OUT:
200c0a68be4Smrg 	    case GOMP_DEPEND_INOUT:
201c0a68be4Smrg 	    case GOMP_DEPEND_MUTEXINOUTSET:
202c0a68be4Smrg 	      break;
203c0a68be4Smrg 	    case GOMP_DEPEND_IN:
204c0a68be4Smrg 	      continue;
205c0a68be4Smrg 	    default:
206c0a68be4Smrg 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
207c0a68be4Smrg 			  (int) (uintptr_t) d[1]);
208c0a68be4Smrg 	    }
209c0a68be4Smrg 	  task->depend[n].addr = d[0];
210c0a68be4Smrg 	  task->depend[n++].is_in = 0;
211c0a68be4Smrg 	}
212c0a68be4Smrg       for (i = 0; i < normal; i++)
213c0a68be4Smrg 	{
214c0a68be4Smrg 	  task->depend[n].addr = depend[5 + i];
215c0a68be4Smrg 	  task->depend[n++].is_in = i >= nout + nmutexinoutset;
216c0a68be4Smrg 	}
217c0a68be4Smrg       for (i = normal; i < ndepend; i++)
218c0a68be4Smrg 	{
219c0a68be4Smrg 	  void **d = (void **) (uintptr_t) depend[5 + i];
220c0a68be4Smrg 	  if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
221c0a68be4Smrg 	    continue;
222c0a68be4Smrg 	  task->depend[n].addr = d[0];
223c0a68be4Smrg 	  task->depend[n++].is_in = 1;
224c0a68be4Smrg 	}
225c0a68be4Smrg     }
2261debfc3dSmrg   task->depend_count = ndepend;
2271debfc3dSmrg   task->num_dependees = 0;
2281debfc3dSmrg   if (parent->depend_hash == NULL)
2291debfc3dSmrg     parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
2301debfc3dSmrg   for (i = 0; i < ndepend; i++)
2311debfc3dSmrg     {
2321debfc3dSmrg       task->depend[i].next = NULL;
2331debfc3dSmrg       task->depend[i].prev = NULL;
2341debfc3dSmrg       task->depend[i].task = task;
2351debfc3dSmrg       task->depend[i].redundant = false;
2361debfc3dSmrg       task->depend[i].redundant_out = false;
2371debfc3dSmrg 
2381debfc3dSmrg       hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
2391debfc3dSmrg 					      &task->depend[i], INSERT);
2401debfc3dSmrg       hash_entry_type out = NULL, last = NULL;
2411debfc3dSmrg       if (*slot)
2421debfc3dSmrg 	{
2431debfc3dSmrg 	  /* If multiple depends on the same task are the same, all but the
2441debfc3dSmrg 	     first one are redundant.  As inout/out come first, if any of them
2451debfc3dSmrg 	     is inout/out, it will win, which is the right semantics.  */
2461debfc3dSmrg 	  if ((*slot)->task == task)
2471debfc3dSmrg 	    {
2481debfc3dSmrg 	      task->depend[i].redundant = true;
2491debfc3dSmrg 	      continue;
2501debfc3dSmrg 	    }
2511debfc3dSmrg 	  for (ent = *slot; ent; ent = ent->next)
2521debfc3dSmrg 	    {
2531debfc3dSmrg 	      if (ent->redundant_out)
2541debfc3dSmrg 		break;
2551debfc3dSmrg 
2561debfc3dSmrg 	      last = ent;
2571debfc3dSmrg 
2581debfc3dSmrg 	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
259c0a68be4Smrg 	      if (task->depend[i].is_in && ent->is_in)
2601debfc3dSmrg 		continue;
2611debfc3dSmrg 
2621debfc3dSmrg 	      if (!ent->is_in)
2631debfc3dSmrg 		out = ent;
2641debfc3dSmrg 
2651debfc3dSmrg 	      struct gomp_task *tsk = ent->task;
2661debfc3dSmrg 	      if (tsk->dependers == NULL)
2671debfc3dSmrg 		{
2681debfc3dSmrg 		  tsk->dependers
2691debfc3dSmrg 		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
2701debfc3dSmrg 				   + 6 * sizeof (struct gomp_task *));
2711debfc3dSmrg 		  tsk->dependers->n_elem = 1;
2721debfc3dSmrg 		  tsk->dependers->allocated = 6;
2731debfc3dSmrg 		  tsk->dependers->elem[0] = task;
2741debfc3dSmrg 		  task->num_dependees++;
2751debfc3dSmrg 		  continue;
2761debfc3dSmrg 		}
2771debfc3dSmrg 	      /* We already have some other dependency on tsk from earlier
2781debfc3dSmrg 		 depend clause.  */
2791debfc3dSmrg 	      else if (tsk->dependers->n_elem
2801debfc3dSmrg 		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
2811debfc3dSmrg 			   == task))
2821debfc3dSmrg 		continue;
2831debfc3dSmrg 	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
2841debfc3dSmrg 		{
2851debfc3dSmrg 		  tsk->dependers->allocated
2861debfc3dSmrg 		    = tsk->dependers->allocated * 2 + 2;
2871debfc3dSmrg 		  tsk->dependers
2881debfc3dSmrg 		    = gomp_realloc (tsk->dependers,
2891debfc3dSmrg 				    sizeof (struct gomp_dependers_vec)
2901debfc3dSmrg 				    + (tsk->dependers->allocated
2911debfc3dSmrg 				       * sizeof (struct gomp_task *)));
2921debfc3dSmrg 		}
2931debfc3dSmrg 	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
2941debfc3dSmrg 	      task->num_dependees++;
2951debfc3dSmrg 	    }
2961debfc3dSmrg 	  task->depend[i].next = *slot;
2971debfc3dSmrg 	  (*slot)->prev = &task->depend[i];
2981debfc3dSmrg 	}
2991debfc3dSmrg       *slot = &task->depend[i];
3001debfc3dSmrg 
3011debfc3dSmrg       /* There is no need to store more than one depend({,in}out:) task per
3021debfc3dSmrg 	 address in the hash table chain for the purpose of creation of
3031debfc3dSmrg 	 deferred tasks, because each out depends on all earlier outs, thus it
3041debfc3dSmrg 	 is enough to record just the last depend({,in}out:).  For depend(in:),
3051debfc3dSmrg 	 we need to keep all of the previous ones not terminated yet, because
3061debfc3dSmrg 	 a later depend({,in}out:) might need to depend on all of them.  So, if
3071debfc3dSmrg 	 the new task's clause is depend({,in}out:), we know there is at most
3081debfc3dSmrg 	 one other depend({,in}out:) clause in the list (out).  For
3091debfc3dSmrg 	 non-deferred tasks we want to see all outs, so they are moved to the
3101debfc3dSmrg 	 end of the chain, after first redundant_out entry all following
3111debfc3dSmrg 	 entries should be redundant_out.  */
3121debfc3dSmrg       if (!task->depend[i].is_in && out)
3131debfc3dSmrg 	{
3141debfc3dSmrg 	  if (out != last)
3151debfc3dSmrg 	    {
3161debfc3dSmrg 	      out->next->prev = out->prev;
3171debfc3dSmrg 	      out->prev->next = out->next;
3181debfc3dSmrg 	      out->next = last->next;
3191debfc3dSmrg 	      out->prev = last;
3201debfc3dSmrg 	      last->next = out;
3211debfc3dSmrg 	      if (out->next)
3221debfc3dSmrg 		out->next->prev = out;
3231debfc3dSmrg 	    }
3241debfc3dSmrg 	  out->redundant_out = true;
3251debfc3dSmrg 	}
3261debfc3dSmrg     }
3271debfc3dSmrg }
3281debfc3dSmrg 
3291debfc3dSmrg /* Called when encountering an explicit task directive.  If IF_CLAUSE is
3301debfc3dSmrg    false, then we must not delay in executing the task.  If UNTIED is true,
3311debfc3dSmrg    then the task may be executed by any member of the team.
3321debfc3dSmrg 
3331debfc3dSmrg    DEPEND is an array containing:
334c0a68be4Smrg      if depend[0] is non-zero, then:
3351debfc3dSmrg 	depend[0]: number of depend elements.
336c0a68be4Smrg 	depend[1]: number of depend elements of type "out/inout".
337c0a68be4Smrg 	depend[2..N+1]: address of [1..N]th depend element.
338c0a68be4Smrg      otherwise, when depend[0] is zero, then:
339c0a68be4Smrg 	depend[1]: number of depend elements.
340c0a68be4Smrg 	depend[2]: number of depend elements of type "out/inout".
341c0a68be4Smrg 	depend[3]: number of depend elements of type "mutexinoutset".
342c0a68be4Smrg 	depend[4]: number of depend elements of type "in".
343c0a68be4Smrg 	depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
344c0a68be4Smrg 	depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
345c0a68be4Smrg 		   omp_depend_t objects.  */
3461debfc3dSmrg 
3471debfc3dSmrg void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)3481debfc3dSmrg GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
3491debfc3dSmrg 	   long arg_size, long arg_align, bool if_clause, unsigned flags,
3501debfc3dSmrg 	   void **depend, int priority)
3511debfc3dSmrg {
3521debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
3531debfc3dSmrg   struct gomp_team *team = thr->ts.team;
3541debfc3dSmrg 
3551debfc3dSmrg #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
3561debfc3dSmrg   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
3571debfc3dSmrg      tied to one thread all the time.  This means UNTIED tasks must be
3581debfc3dSmrg      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
3591debfc3dSmrg      might be running on different thread than FN.  */
3601debfc3dSmrg   if (cpyfn)
3611debfc3dSmrg     if_clause = false;
3621debfc3dSmrg   flags &= ~GOMP_TASK_FLAG_UNTIED;
3631debfc3dSmrg #endif
3641debfc3dSmrg 
3651debfc3dSmrg   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
366c0a68be4Smrg   if (__builtin_expect (gomp_cancel_var, 0) && team)
367c0a68be4Smrg     {
368c0a68be4Smrg       if (gomp_team_barrier_cancelled (&team->barrier))
3691debfc3dSmrg 	return;
370c0a68be4Smrg       if (thr->task->taskgroup)
371c0a68be4Smrg 	{
372c0a68be4Smrg 	  if (thr->task->taskgroup->cancelled)
373c0a68be4Smrg 	    return;
374c0a68be4Smrg 	  if (thr->task->taskgroup->workshare
375c0a68be4Smrg 	      && thr->task->taskgroup->prev
376c0a68be4Smrg 	      && thr->task->taskgroup->prev->cancelled)
377c0a68be4Smrg 	    return;
378c0a68be4Smrg 	}
379c0a68be4Smrg     }
3801debfc3dSmrg 
3811debfc3dSmrg   if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
3821debfc3dSmrg     priority = 0;
3831debfc3dSmrg   else if (priority > gomp_max_task_priority_var)
3841debfc3dSmrg     priority = gomp_max_task_priority_var;
3851debfc3dSmrg 
3861debfc3dSmrg   if (!if_clause || team == NULL
3871debfc3dSmrg       || (thr->task && thr->task->final_task)
3881debfc3dSmrg       || team->task_count > 64 * team->nthreads)
3891debfc3dSmrg     {
3901debfc3dSmrg       struct gomp_task task;
3911debfc3dSmrg 
3921debfc3dSmrg       /* If there are depend clauses and earlier deferred sibling tasks
3931debfc3dSmrg 	 with depend clauses, check if there isn't a dependency.  If there
3941debfc3dSmrg 	 is, we need to wait for them.  There is no need to handle
3951debfc3dSmrg 	 depend clauses for non-deferred tasks other than this, because
3961debfc3dSmrg 	 the parent task is suspended until the child task finishes and thus
3971debfc3dSmrg 	 it can't start further child tasks.  */
3981debfc3dSmrg       if ((flags & GOMP_TASK_FLAG_DEPEND)
3991debfc3dSmrg 	  && thr->task && thr->task->depend_hash)
4001debfc3dSmrg 	gomp_task_maybe_wait_for_dependencies (depend);
4011debfc3dSmrg 
4021debfc3dSmrg       gomp_init_task (&task, thr->task, gomp_icv (false));
4031debfc3dSmrg       task.kind = GOMP_TASK_UNDEFERRED;
4041debfc3dSmrg       task.final_task = (thr->task && thr->task->final_task)
4051debfc3dSmrg 			|| (flags & GOMP_TASK_FLAG_FINAL);
4061debfc3dSmrg       task.priority = priority;
4071debfc3dSmrg       if (thr->task)
4081debfc3dSmrg 	{
4091debfc3dSmrg 	  task.in_tied_task = thr->task->in_tied_task;
4101debfc3dSmrg 	  task.taskgroup = thr->task->taskgroup;
4111debfc3dSmrg 	}
4121debfc3dSmrg       thr->task = &task;
4131debfc3dSmrg       if (__builtin_expect (cpyfn != NULL, 0))
4141debfc3dSmrg 	{
4151debfc3dSmrg 	  char buf[arg_size + arg_align - 1];
4161debfc3dSmrg 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
4171debfc3dSmrg 				& ~(uintptr_t) (arg_align - 1));
4181debfc3dSmrg 	  cpyfn (arg, data);
4191debfc3dSmrg 	  fn (arg);
4201debfc3dSmrg 	}
4211debfc3dSmrg       else
4221debfc3dSmrg 	fn (data);
4231debfc3dSmrg       /* Access to "children" is normally done inside a task_lock
4241debfc3dSmrg 	 mutex region, but the only way this particular task.children
4251debfc3dSmrg 	 can be set is if this thread's task work function (fn)
4261debfc3dSmrg 	 creates children.  So since the setter is *this* thread, we
4271debfc3dSmrg 	 need no barriers here when testing for non-NULL.  We can have
4281debfc3dSmrg 	 task.children set by the current thread then changed by a
4291debfc3dSmrg 	 child thread, but seeing a stale non-NULL value is not a
4301debfc3dSmrg 	 problem.  Once past the task_lock acquisition, this thread
4311debfc3dSmrg 	 will see the real value of task.children.  */
4321debfc3dSmrg       if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
4331debfc3dSmrg 	{
4341debfc3dSmrg 	  gomp_mutex_lock (&team->task_lock);
4351debfc3dSmrg 	  gomp_clear_parent (&task.children_queue);
4361debfc3dSmrg 	  gomp_mutex_unlock (&team->task_lock);
4371debfc3dSmrg 	}
4381debfc3dSmrg       gomp_end_task ();
4391debfc3dSmrg     }
4401debfc3dSmrg   else
4411debfc3dSmrg     {
4421debfc3dSmrg       struct gomp_task *task;
4431debfc3dSmrg       struct gomp_task *parent = thr->task;
4441debfc3dSmrg       struct gomp_taskgroup *taskgroup = parent->taskgroup;
4451debfc3dSmrg       char *arg;
4461debfc3dSmrg       bool do_wake;
4471debfc3dSmrg       size_t depend_size = 0;
4481debfc3dSmrg 
4491debfc3dSmrg       if (flags & GOMP_TASK_FLAG_DEPEND)
450c0a68be4Smrg 	depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
4511debfc3dSmrg 		       * sizeof (struct gomp_task_depend_entry));
4521debfc3dSmrg       task = gomp_malloc (sizeof (*task) + depend_size
4531debfc3dSmrg 			  + arg_size + arg_align - 1);
4541debfc3dSmrg       arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
4551debfc3dSmrg 		      & ~(uintptr_t) (arg_align - 1));
4561debfc3dSmrg       gomp_init_task (task, parent, gomp_icv (false));
4571debfc3dSmrg       task->priority = priority;
4581debfc3dSmrg       task->kind = GOMP_TASK_UNDEFERRED;
4591debfc3dSmrg       task->in_tied_task = parent->in_tied_task;
4601debfc3dSmrg       task->taskgroup = taskgroup;
4611debfc3dSmrg       thr->task = task;
4621debfc3dSmrg       if (cpyfn)
4631debfc3dSmrg 	{
4641debfc3dSmrg 	  cpyfn (arg, data);
4651debfc3dSmrg 	  task->copy_ctors_done = true;
4661debfc3dSmrg 	}
4671debfc3dSmrg       else
4681debfc3dSmrg 	memcpy (arg, data, arg_size);
4691debfc3dSmrg       thr->task = parent;
4701debfc3dSmrg       task->kind = GOMP_TASK_WAITING;
4711debfc3dSmrg       task->fn = fn;
4721debfc3dSmrg       task->fn_data = arg;
4731debfc3dSmrg       task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
4741debfc3dSmrg       gomp_mutex_lock (&team->task_lock);
4751debfc3dSmrg       /* If parallel or taskgroup has been cancelled, don't start new
4761debfc3dSmrg 	 tasks.  */
477c0a68be4Smrg       if (__builtin_expect (gomp_cancel_var, 0)
478c0a68be4Smrg 	  && !task->copy_ctors_done)
4791debfc3dSmrg 	{
480c0a68be4Smrg 	  if (gomp_team_barrier_cancelled (&team->barrier))
481c0a68be4Smrg 	    {
482c0a68be4Smrg 	    do_cancel:
4831debfc3dSmrg 	      gomp_mutex_unlock (&team->task_lock);
4841debfc3dSmrg 	      gomp_finish_task (task);
4851debfc3dSmrg 	      free (task);
4861debfc3dSmrg 	      return;
4871debfc3dSmrg 	    }
4881debfc3dSmrg 	  if (taskgroup)
489c0a68be4Smrg 	    {
490c0a68be4Smrg 	      if (taskgroup->cancelled)
491c0a68be4Smrg 		goto do_cancel;
492c0a68be4Smrg 	      if (taskgroup->workshare
493c0a68be4Smrg 		  && taskgroup->prev
494c0a68be4Smrg 		  && taskgroup->prev->cancelled)
495c0a68be4Smrg 		goto do_cancel;
496c0a68be4Smrg 	    }
497c0a68be4Smrg 	}
498c0a68be4Smrg       if (taskgroup)
4991debfc3dSmrg 	taskgroup->num_children++;
5001debfc3dSmrg       if (depend_size)
5011debfc3dSmrg 	{
5021debfc3dSmrg 	  gomp_task_handle_depend (task, parent, depend);
5031debfc3dSmrg 	  if (task->num_dependees)
5041debfc3dSmrg 	    {
5051debfc3dSmrg 	      /* Tasks that depend on other tasks are not put into the
5061debfc3dSmrg 		 various waiting queues, so we are done for now.  Said
5071debfc3dSmrg 		 tasks are instead put into the queues via
5081debfc3dSmrg 		 gomp_task_run_post_handle_dependers() after their
5091debfc3dSmrg 		 dependencies have been satisfied.  After which, they
5101debfc3dSmrg 		 can be picked up by the various scheduling
5111debfc3dSmrg 		 points.  */
5121debfc3dSmrg 	      gomp_mutex_unlock (&team->task_lock);
5131debfc3dSmrg 	      return;
5141debfc3dSmrg 	    }
5151debfc3dSmrg 	}
5161debfc3dSmrg 
5171debfc3dSmrg       priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
5181debfc3dSmrg 			     task, priority,
5191debfc3dSmrg 			     PRIORITY_INSERT_BEGIN,
5201debfc3dSmrg 			     /*adjust_parent_depends_on=*/false,
5211debfc3dSmrg 			     task->parent_depends_on);
5221debfc3dSmrg       if (taskgroup)
5231debfc3dSmrg 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
5241debfc3dSmrg 			       task, priority,
5251debfc3dSmrg 			       PRIORITY_INSERT_BEGIN,
5261debfc3dSmrg 			       /*adjust_parent_depends_on=*/false,
5271debfc3dSmrg 			       task->parent_depends_on);
5281debfc3dSmrg 
5291debfc3dSmrg       priority_queue_insert (PQ_TEAM, &team->task_queue,
5301debfc3dSmrg 			     task, priority,
5311debfc3dSmrg 			     PRIORITY_INSERT_END,
5321debfc3dSmrg 			     /*adjust_parent_depends_on=*/false,
5331debfc3dSmrg 			     task->parent_depends_on);
5341debfc3dSmrg 
5351debfc3dSmrg       ++team->task_count;
5361debfc3dSmrg       ++team->task_queued_count;
5371debfc3dSmrg       gomp_team_barrier_set_task_pending (&team->barrier);
5381debfc3dSmrg       do_wake = team->task_running_count + !parent->in_tied_task
5391debfc3dSmrg 		< team->nthreads;
5401debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
5411debfc3dSmrg       if (do_wake)
5421debfc3dSmrg 	gomp_team_barrier_wake (&team->barrier, 1);
5431debfc3dSmrg     }
5441debfc3dSmrg }
5451debfc3dSmrg 
5461debfc3dSmrg ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)5471debfc3dSmrg ialias (GOMP_taskgroup_end)
548c0a68be4Smrg ialias (GOMP_taskgroup_reduction_register)
5491debfc3dSmrg 
5501debfc3dSmrg #define TYPE long
5511debfc3dSmrg #define UTYPE unsigned long
5521debfc3dSmrg #define TYPE_is_long 1
5531debfc3dSmrg #include "taskloop.c"
5541debfc3dSmrg #undef TYPE
5551debfc3dSmrg #undef UTYPE
5561debfc3dSmrg #undef TYPE_is_long
5571debfc3dSmrg 
5581debfc3dSmrg #define TYPE unsigned long long
5591debfc3dSmrg #define UTYPE TYPE
5601debfc3dSmrg #define GOMP_taskloop GOMP_taskloop_ull
5611debfc3dSmrg #include "taskloop.c"
5621debfc3dSmrg #undef TYPE
5631debfc3dSmrg #undef UTYPE
5641debfc3dSmrg #undef GOMP_taskloop
5651debfc3dSmrg 
5661debfc3dSmrg static void inline
5671debfc3dSmrg priority_queue_move_task_first (enum priority_queue_type type,
5681debfc3dSmrg 				struct priority_queue *head,
5691debfc3dSmrg 				struct gomp_task *task)
5701debfc3dSmrg {
5711debfc3dSmrg #if _LIBGOMP_CHECKING_
5721debfc3dSmrg   if (!priority_queue_task_in_queue_p (type, head, task))
5731debfc3dSmrg     gomp_fatal ("Attempt to move first missing task %p", task);
5741debfc3dSmrg #endif
5751debfc3dSmrg   struct priority_list *list;
5761debfc3dSmrg   if (priority_queue_multi_p (head))
5771debfc3dSmrg     {
5781debfc3dSmrg       list = priority_queue_lookup_priority (head, task->priority);
5791debfc3dSmrg #if _LIBGOMP_CHECKING_
5801debfc3dSmrg       if (!list)
5811debfc3dSmrg 	gomp_fatal ("Unable to find priority %d", task->priority);
5821debfc3dSmrg #endif
5831debfc3dSmrg     }
5841debfc3dSmrg   else
5851debfc3dSmrg     list = &head->l;
5861debfc3dSmrg   priority_list_remove (list, task_to_priority_node (type, task), 0);
5871debfc3dSmrg   priority_list_insert (type, list, task, task->priority,
5881debfc3dSmrg 			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
5891debfc3dSmrg 			task->parent_depends_on);
5901debfc3dSmrg }
5911debfc3dSmrg 
5921debfc3dSmrg /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
5931debfc3dSmrg    with team->task_lock held, or is executed in the thread that called
5941debfc3dSmrg    gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
5951debfc3dSmrg    run before it acquires team->task_lock.  */
5961debfc3dSmrg 
5971debfc3dSmrg static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)5981debfc3dSmrg gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
5991debfc3dSmrg {
6001debfc3dSmrg   struct gomp_task *parent = task->parent;
6011debfc3dSmrg   if (parent)
6021debfc3dSmrg     priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
6031debfc3dSmrg 				    task);
6041debfc3dSmrg 
6051debfc3dSmrg   struct gomp_taskgroup *taskgroup = task->taskgroup;
6061debfc3dSmrg   if (taskgroup)
6071debfc3dSmrg     priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
6081debfc3dSmrg 				    task);
6091debfc3dSmrg 
6101debfc3dSmrg   priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
6111debfc3dSmrg 			 PRIORITY_INSERT_BEGIN, false,
6121debfc3dSmrg 			 task->parent_depends_on);
6131debfc3dSmrg   task->kind = GOMP_TASK_WAITING;
6141debfc3dSmrg   if (parent && parent->taskwait)
6151debfc3dSmrg     {
6161debfc3dSmrg       if (parent->taskwait->in_taskwait)
6171debfc3dSmrg 	{
6181debfc3dSmrg 	  /* One more task has had its dependencies met.
6191debfc3dSmrg 	     Inform any waiters.  */
6201debfc3dSmrg 	  parent->taskwait->in_taskwait = false;
6211debfc3dSmrg 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
6221debfc3dSmrg 	}
6231debfc3dSmrg       else if (parent->taskwait->in_depend_wait)
6241debfc3dSmrg 	{
6251debfc3dSmrg 	  /* One more task has had its dependencies met.
6261debfc3dSmrg 	     Inform any waiters.  */
6271debfc3dSmrg 	  parent->taskwait->in_depend_wait = false;
6281debfc3dSmrg 	  gomp_sem_post (&parent->taskwait->taskwait_sem);
6291debfc3dSmrg 	}
6301debfc3dSmrg     }
6311debfc3dSmrg   if (taskgroup && taskgroup->in_taskgroup_wait)
6321debfc3dSmrg     {
6331debfc3dSmrg       /* One more task has had its dependencies met.
6341debfc3dSmrg 	 Inform any waiters.  */
6351debfc3dSmrg       taskgroup->in_taskgroup_wait = false;
6361debfc3dSmrg       gomp_sem_post (&taskgroup->taskgroup_sem);
6371debfc3dSmrg     }
6381debfc3dSmrg 
6391debfc3dSmrg   ++team->task_queued_count;
6401debfc3dSmrg   gomp_team_barrier_set_task_pending (&team->barrier);
6411debfc3dSmrg   /* I'm afraid this can't be done after releasing team->task_lock,
6421debfc3dSmrg      as gomp_target_task_completion is run from unrelated thread and
6431debfc3dSmrg      therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
6441debfc3dSmrg      the team could be gone already.  */
6451debfc3dSmrg   if (team->nthreads > team->task_running_count)
6461debfc3dSmrg     gomp_team_barrier_wake (&team->barrier, 1);
6471debfc3dSmrg }
6481debfc3dSmrg 
6491debfc3dSmrg /* Signal that a target task TTASK has completed the asynchronously
6501debfc3dSmrg    running phase and should be requeued as a task to handle the
6511debfc3dSmrg    variable unmapping.  */
6521debfc3dSmrg 
6531debfc3dSmrg void
GOMP_PLUGIN_target_task_completion(void * data)6541debfc3dSmrg GOMP_PLUGIN_target_task_completion (void *data)
6551debfc3dSmrg {
6561debfc3dSmrg   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
6571debfc3dSmrg   struct gomp_task *task = ttask->task;
6581debfc3dSmrg   struct gomp_team *team = ttask->team;
6591debfc3dSmrg 
6601debfc3dSmrg   gomp_mutex_lock (&team->task_lock);
6611debfc3dSmrg   if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
6621debfc3dSmrg     {
6631debfc3dSmrg       ttask->state = GOMP_TARGET_TASK_FINISHED;
6641debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
6651debfc3dSmrg       return;
6661debfc3dSmrg     }
6671debfc3dSmrg   ttask->state = GOMP_TARGET_TASK_FINISHED;
6681debfc3dSmrg   gomp_target_task_completion (team, task);
6691debfc3dSmrg   gomp_mutex_unlock (&team->task_lock);
6701debfc3dSmrg }
6711debfc3dSmrg 
6721debfc3dSmrg static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
6731debfc3dSmrg 
6741debfc3dSmrg /* Called for nowait target tasks.  */
6751debfc3dSmrg 
6761debfc3dSmrg bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)6771debfc3dSmrg gomp_create_target_task (struct gomp_device_descr *devicep,
6781debfc3dSmrg 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
6791debfc3dSmrg 			 size_t *sizes, unsigned short *kinds,
6801debfc3dSmrg 			 unsigned int flags, void **depend, void **args,
6811debfc3dSmrg 			 enum gomp_target_task_state state)
6821debfc3dSmrg {
6831debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
6841debfc3dSmrg   struct gomp_team *team = thr->ts.team;
6851debfc3dSmrg 
6861debfc3dSmrg   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
687c0a68be4Smrg   if (__builtin_expect (gomp_cancel_var, 0) && team)
688c0a68be4Smrg     {
689c0a68be4Smrg       if (gomp_team_barrier_cancelled (&team->barrier))
6901debfc3dSmrg 	return true;
691c0a68be4Smrg       if (thr->task->taskgroup)
692c0a68be4Smrg 	{
693c0a68be4Smrg 	  if (thr->task->taskgroup->cancelled)
694c0a68be4Smrg 	    return true;
695c0a68be4Smrg 	  if (thr->task->taskgroup->workshare
696c0a68be4Smrg 	      && thr->task->taskgroup->prev
697c0a68be4Smrg 	      && thr->task->taskgroup->prev->cancelled)
698c0a68be4Smrg 	    return true;
699c0a68be4Smrg 	}
700c0a68be4Smrg     }
7011debfc3dSmrg 
7021debfc3dSmrg   struct gomp_target_task *ttask;
7031debfc3dSmrg   struct gomp_task *task;
7041debfc3dSmrg   struct gomp_task *parent = thr->task;
7051debfc3dSmrg   struct gomp_taskgroup *taskgroup = parent->taskgroup;
7061debfc3dSmrg   bool do_wake;
7071debfc3dSmrg   size_t depend_size = 0;
7081debfc3dSmrg   uintptr_t depend_cnt = 0;
7091debfc3dSmrg   size_t tgt_align = 0, tgt_size = 0;
7101debfc3dSmrg 
7111debfc3dSmrg   if (depend != NULL)
7121debfc3dSmrg     {
713c0a68be4Smrg       depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
7141debfc3dSmrg       depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
7151debfc3dSmrg     }
7161debfc3dSmrg   if (fn)
7171debfc3dSmrg     {
7181debfc3dSmrg       /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
7191debfc3dSmrg 	 firstprivate on the target task.  */
7201debfc3dSmrg       size_t i;
7211debfc3dSmrg       for (i = 0; i < mapnum; i++)
7221debfc3dSmrg 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
7231debfc3dSmrg 	  {
7241debfc3dSmrg 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
7251debfc3dSmrg 	    if (tgt_align < align)
7261debfc3dSmrg 	      tgt_align = align;
7271debfc3dSmrg 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
7281debfc3dSmrg 	    tgt_size += sizes[i];
7291debfc3dSmrg 	  }
7301debfc3dSmrg       if (tgt_align)
7311debfc3dSmrg 	tgt_size += tgt_align - 1;
7321debfc3dSmrg       else
7331debfc3dSmrg 	tgt_size = 0;
7341debfc3dSmrg     }
7351debfc3dSmrg 
7361debfc3dSmrg   task = gomp_malloc (sizeof (*task) + depend_size
7371debfc3dSmrg 		      + sizeof (*ttask)
7381debfc3dSmrg 		      + mapnum * (sizeof (void *) + sizeof (size_t)
7391debfc3dSmrg 				  + sizeof (unsigned short))
7401debfc3dSmrg 		      + tgt_size);
7411debfc3dSmrg   gomp_init_task (task, parent, gomp_icv (false));
7421debfc3dSmrg   task->priority = 0;
7431debfc3dSmrg   task->kind = GOMP_TASK_WAITING;
7441debfc3dSmrg   task->in_tied_task = parent->in_tied_task;
7451debfc3dSmrg   task->taskgroup = taskgroup;
7461debfc3dSmrg   ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
7471debfc3dSmrg   ttask->devicep = devicep;
7481debfc3dSmrg   ttask->fn = fn;
7491debfc3dSmrg   ttask->mapnum = mapnum;
7501debfc3dSmrg   ttask->args = args;
7511debfc3dSmrg   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
7521debfc3dSmrg   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
7531debfc3dSmrg   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
7541debfc3dSmrg   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
7551debfc3dSmrg   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
7561debfc3dSmrg   if (tgt_align)
7571debfc3dSmrg     {
7581debfc3dSmrg       char *tgt = (char *) &ttask->kinds[mapnum];
7591debfc3dSmrg       size_t i;
7601debfc3dSmrg       uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
7611debfc3dSmrg       if (al)
7621debfc3dSmrg 	tgt += tgt_align - al;
7631debfc3dSmrg       tgt_size = 0;
7641debfc3dSmrg       for (i = 0; i < mapnum; i++)
7651debfc3dSmrg 	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
7661debfc3dSmrg 	  {
7671debfc3dSmrg 	    size_t align = (size_t) 1 << (kinds[i] >> 8);
7681debfc3dSmrg 	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
7691debfc3dSmrg 	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
7701debfc3dSmrg 	    ttask->hostaddrs[i] = tgt + tgt_size;
7711debfc3dSmrg 	    tgt_size = tgt_size + sizes[i];
7721debfc3dSmrg 	  }
7731debfc3dSmrg     }
7741debfc3dSmrg   ttask->flags = flags;
7751debfc3dSmrg   ttask->state = state;
7761debfc3dSmrg   ttask->task = task;
7771debfc3dSmrg   ttask->team = team;
7781debfc3dSmrg   task->fn = NULL;
7791debfc3dSmrg   task->fn_data = ttask;
7801debfc3dSmrg   task->final_task = 0;
7811debfc3dSmrg   gomp_mutex_lock (&team->task_lock);
7821debfc3dSmrg   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
783c0a68be4Smrg   if (__builtin_expect (gomp_cancel_var, 0))
7841debfc3dSmrg     {
785c0a68be4Smrg       if (gomp_team_barrier_cancelled (&team->barrier))
786c0a68be4Smrg 	{
787c0a68be4Smrg 	do_cancel:
7881debfc3dSmrg 	  gomp_mutex_unlock (&team->task_lock);
7891debfc3dSmrg 	  gomp_finish_task (task);
7901debfc3dSmrg 	  free (task);
7911debfc3dSmrg 	  return true;
7921debfc3dSmrg 	}
793c0a68be4Smrg       if (taskgroup)
794c0a68be4Smrg 	{
795c0a68be4Smrg 	  if (taskgroup->cancelled)
796c0a68be4Smrg 	    goto do_cancel;
797c0a68be4Smrg 	  if (taskgroup->workshare
798c0a68be4Smrg 	      && taskgroup->prev
799c0a68be4Smrg 	      && taskgroup->prev->cancelled)
800c0a68be4Smrg 	    goto do_cancel;
801c0a68be4Smrg 	}
802c0a68be4Smrg     }
8031debfc3dSmrg   if (depend_size)
8041debfc3dSmrg     {
8051debfc3dSmrg       gomp_task_handle_depend (task, parent, depend);
8061debfc3dSmrg       if (task->num_dependees)
8071debfc3dSmrg 	{
8081debfc3dSmrg 	  if (taskgroup)
8091debfc3dSmrg 	    taskgroup->num_children++;
8101debfc3dSmrg 	  gomp_mutex_unlock (&team->task_lock);
8111debfc3dSmrg 	  return true;
8121debfc3dSmrg 	}
8131debfc3dSmrg     }
8141debfc3dSmrg   if (state == GOMP_TARGET_TASK_DATA)
8151debfc3dSmrg     {
8161debfc3dSmrg       gomp_task_run_post_handle_depend_hash (task);
8171debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
8181debfc3dSmrg       gomp_finish_task (task);
8191debfc3dSmrg       free (task);
8201debfc3dSmrg       return false;
8211debfc3dSmrg     }
8221debfc3dSmrg   if (taskgroup)
8231debfc3dSmrg     taskgroup->num_children++;
8241debfc3dSmrg   /* For async offloading, if we don't need to wait for dependencies,
8251debfc3dSmrg      run the gomp_target_task_fn right away, essentially schedule the
8261debfc3dSmrg      mapping part of the task in the current thread.  */
8271debfc3dSmrg   if (devicep != NULL
8281debfc3dSmrg       && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
8291debfc3dSmrg     {
8301debfc3dSmrg       priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
8311debfc3dSmrg 			     PRIORITY_INSERT_END,
8321debfc3dSmrg 			     /*adjust_parent_depends_on=*/false,
8331debfc3dSmrg 			     task->parent_depends_on);
8341debfc3dSmrg       if (taskgroup)
8351debfc3dSmrg 	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
8361debfc3dSmrg 			       task, 0, PRIORITY_INSERT_END,
8371debfc3dSmrg 			       /*adjust_parent_depends_on=*/false,
8381debfc3dSmrg 			       task->parent_depends_on);
8391debfc3dSmrg       task->pnode[PQ_TEAM].next = NULL;
8401debfc3dSmrg       task->pnode[PQ_TEAM].prev = NULL;
8411debfc3dSmrg       task->kind = GOMP_TASK_TIED;
8421debfc3dSmrg       ++team->task_count;
8431debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
8441debfc3dSmrg 
8451debfc3dSmrg       thr->task = task;
8461debfc3dSmrg       gomp_target_task_fn (task->fn_data);
8471debfc3dSmrg       thr->task = parent;
8481debfc3dSmrg 
8491debfc3dSmrg       gomp_mutex_lock (&team->task_lock);
8501debfc3dSmrg       task->kind = GOMP_TASK_ASYNC_RUNNING;
8511debfc3dSmrg       /* If GOMP_PLUGIN_target_task_completion has run already
8521debfc3dSmrg 	 in between gomp_target_task_fn and the mutex lock,
8531debfc3dSmrg 	 perform the requeuing here.  */
8541debfc3dSmrg       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
8551debfc3dSmrg 	gomp_target_task_completion (team, task);
8561debfc3dSmrg       else
8571debfc3dSmrg 	ttask->state = GOMP_TARGET_TASK_RUNNING;
8581debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
8591debfc3dSmrg       return true;
8601debfc3dSmrg     }
8611debfc3dSmrg   priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
8621debfc3dSmrg 			 PRIORITY_INSERT_BEGIN,
8631debfc3dSmrg 			 /*adjust_parent_depends_on=*/false,
8641debfc3dSmrg 			 task->parent_depends_on);
8651debfc3dSmrg   if (taskgroup)
8661debfc3dSmrg     priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
8671debfc3dSmrg 			   PRIORITY_INSERT_BEGIN,
8681debfc3dSmrg 			   /*adjust_parent_depends_on=*/false,
8691debfc3dSmrg 			   task->parent_depends_on);
8701debfc3dSmrg   priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
8711debfc3dSmrg 			 PRIORITY_INSERT_END,
8721debfc3dSmrg 			 /*adjust_parent_depends_on=*/false,
8731debfc3dSmrg 			 task->parent_depends_on);
8741debfc3dSmrg   ++team->task_count;
8751debfc3dSmrg   ++team->task_queued_count;
8761debfc3dSmrg   gomp_team_barrier_set_task_pending (&team->barrier);
8771debfc3dSmrg   do_wake = team->task_running_count + !parent->in_tied_task
8781debfc3dSmrg 	    < team->nthreads;
8791debfc3dSmrg   gomp_mutex_unlock (&team->task_lock);
8801debfc3dSmrg   if (do_wake)
8811debfc3dSmrg     gomp_team_barrier_wake (&team->barrier, 1);
8821debfc3dSmrg   return true;
8831debfc3dSmrg }
8841debfc3dSmrg 
8851debfc3dSmrg /* Given a parent_depends_on task in LIST, move it to the front of its
8861debfc3dSmrg    priority so it is run as soon as possible.
8871debfc3dSmrg 
8881debfc3dSmrg    Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
8891debfc3dSmrg 
8901debfc3dSmrg    We rearrange the queue such that all parent_depends_on tasks are
8911debfc3dSmrg    first, and last_parent_depends_on points to the last such task we
8921debfc3dSmrg    rearranged.  For example, given the following tasks in a queue
8931debfc3dSmrg    where PD[123] are the parent_depends_on tasks:
8941debfc3dSmrg 
8951debfc3dSmrg 	task->children
8961debfc3dSmrg 	|
8971debfc3dSmrg 	V
8981debfc3dSmrg 	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
8991debfc3dSmrg 
9001debfc3dSmrg 	We rearrange such that:
9011debfc3dSmrg 
9021debfc3dSmrg 	task->children
9031debfc3dSmrg 	|	       +--- last_parent_depends_on
9041debfc3dSmrg 	|	       |
9051debfc3dSmrg 	V	       V
9061debfc3dSmrg 	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
9071debfc3dSmrg 
9081debfc3dSmrg static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)9091debfc3dSmrg priority_list_upgrade_task (struct priority_list *list,
9101debfc3dSmrg 			    struct priority_node *node)
9111debfc3dSmrg {
9121debfc3dSmrg   struct priority_node *last_parent_depends_on
9131debfc3dSmrg     = list->last_parent_depends_on;
9141debfc3dSmrg   if (last_parent_depends_on)
9151debfc3dSmrg     {
9161debfc3dSmrg       node->prev->next = node->next;
9171debfc3dSmrg       node->next->prev = node->prev;
9181debfc3dSmrg       node->prev = last_parent_depends_on;
9191debfc3dSmrg       node->next = last_parent_depends_on->next;
9201debfc3dSmrg       node->prev->next = node;
9211debfc3dSmrg       node->next->prev = node;
9221debfc3dSmrg     }
9231debfc3dSmrg   else if (node != list->tasks)
9241debfc3dSmrg     {
9251debfc3dSmrg       node->prev->next = node->next;
9261debfc3dSmrg       node->next->prev = node->prev;
9271debfc3dSmrg       node->prev = list->tasks->prev;
9281debfc3dSmrg       node->next = list->tasks;
9291debfc3dSmrg       list->tasks = node;
9301debfc3dSmrg       node->prev->next = node;
9311debfc3dSmrg       node->next->prev = node;
9321debfc3dSmrg     }
9331debfc3dSmrg   list->last_parent_depends_on = node;
9341debfc3dSmrg }
9351debfc3dSmrg 
9361debfc3dSmrg /* Given a parent_depends_on TASK in its parent's children_queue, move
9371debfc3dSmrg    it to the front of its priority so it is run as soon as possible.
9381debfc3dSmrg 
9391debfc3dSmrg    PARENT is passed as an optimization.
9401debfc3dSmrg 
9411debfc3dSmrg    (This function could be defined in priority_queue.c, but we want it
9421debfc3dSmrg    inlined, and putting it in priority_queue.h is not an option, given
9431debfc3dSmrg    that gomp_task has not been properly defined at that point).  */
9441debfc3dSmrg 
9451debfc3dSmrg static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)9461debfc3dSmrg priority_queue_upgrade_task (struct gomp_task *task,
9471debfc3dSmrg 			     struct gomp_task *parent)
9481debfc3dSmrg {
9491debfc3dSmrg   struct priority_queue *head = &parent->children_queue;
9501debfc3dSmrg   struct priority_node *node = &task->pnode[PQ_CHILDREN];
9511debfc3dSmrg #if _LIBGOMP_CHECKING_
9521debfc3dSmrg   if (!task->parent_depends_on)
9531debfc3dSmrg     gomp_fatal ("priority_queue_upgrade_task: task must be a "
9541debfc3dSmrg 		"parent_depends_on task");
9551debfc3dSmrg   if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
9561debfc3dSmrg     gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
9571debfc3dSmrg #endif
9581debfc3dSmrg   if (priority_queue_multi_p (head))
9591debfc3dSmrg     {
9601debfc3dSmrg       struct priority_list *list
9611debfc3dSmrg 	= priority_queue_lookup_priority (head, task->priority);
9621debfc3dSmrg       priority_list_upgrade_task (list, node);
9631debfc3dSmrg     }
9641debfc3dSmrg   else
9651debfc3dSmrg     priority_list_upgrade_task (&head->l, node);
9661debfc3dSmrg }
9671debfc3dSmrg 
9681debfc3dSmrg /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
9691debfc3dSmrg    the way in LIST so that other tasks can be considered for
9701debfc3dSmrg    execution.  LIST contains tasks of type TYPE.
9711debfc3dSmrg 
9721debfc3dSmrg    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
9731debfc3dSmrg    if applicable.  */
9741debfc3dSmrg 
9751debfc3dSmrg static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)9761debfc3dSmrg priority_list_downgrade_task (enum priority_queue_type type,
9771debfc3dSmrg 			      struct priority_list *list,
9781debfc3dSmrg 			      struct gomp_task *child_task)
9791debfc3dSmrg {
9801debfc3dSmrg   struct priority_node *node = task_to_priority_node (type, child_task);
9811debfc3dSmrg   if (list->tasks == node)
9821debfc3dSmrg     list->tasks = node->next;
9831debfc3dSmrg   else if (node->next != list->tasks)
9841debfc3dSmrg     {
9851debfc3dSmrg       /* The task in NODE is about to become TIED and TIED tasks
9861debfc3dSmrg 	 cannot come before WAITING tasks.  If we're about to
9871debfc3dSmrg 	 leave the queue in such an indeterminate state, rewire
9881debfc3dSmrg 	 things appropriately.  However, a TIED task at the end is
9891debfc3dSmrg 	 perfectly fine.  */
9901debfc3dSmrg       struct gomp_task *next_task = priority_node_to_task (type, node->next);
9911debfc3dSmrg       if (next_task->kind == GOMP_TASK_WAITING)
9921debfc3dSmrg 	{
9931debfc3dSmrg 	  /* Remove from list.  */
9941debfc3dSmrg 	  node->prev->next = node->next;
9951debfc3dSmrg 	  node->next->prev = node->prev;
9961debfc3dSmrg 	  /* Rewire at the end.  */
9971debfc3dSmrg 	  node->next = list->tasks;
9981debfc3dSmrg 	  node->prev = list->tasks->prev;
9991debfc3dSmrg 	  list->tasks->prev->next = node;
10001debfc3dSmrg 	  list->tasks->prev = node;
10011debfc3dSmrg 	}
10021debfc3dSmrg     }
10031debfc3dSmrg 
10041debfc3dSmrg   /* If the current task is the last_parent_depends_on for its
10051debfc3dSmrg      priority, adjust last_parent_depends_on appropriately.  */
10061debfc3dSmrg   if (__builtin_expect (child_task->parent_depends_on, 0)
10071debfc3dSmrg       && list->last_parent_depends_on == node)
10081debfc3dSmrg     {
10091debfc3dSmrg       struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
10101debfc3dSmrg       if (node->prev != node
10111debfc3dSmrg 	  && prev_child->kind == GOMP_TASK_WAITING
10121debfc3dSmrg 	  && prev_child->parent_depends_on)
10131debfc3dSmrg 	list->last_parent_depends_on = node->prev;
10141debfc3dSmrg       else
10151debfc3dSmrg 	{
10161debfc3dSmrg 	  /* There are no more parent_depends_on entries waiting
10171debfc3dSmrg 	     to run, clear the list.  */
10181debfc3dSmrg 	  list->last_parent_depends_on = NULL;
10191debfc3dSmrg 	}
10201debfc3dSmrg     }
10211debfc3dSmrg }
10221debfc3dSmrg 
10231debfc3dSmrg /* Given a TASK in HEAD that is about to be executed, move it out of
10241debfc3dSmrg    the way so that other tasks can be considered for execution.  HEAD
10251debfc3dSmrg    contains tasks of type TYPE.
10261debfc3dSmrg 
10271debfc3dSmrg    Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
10281debfc3dSmrg    if applicable.
10291debfc3dSmrg 
10301debfc3dSmrg    (This function could be defined in priority_queue.c, but we want it
10311debfc3dSmrg    inlined, and putting it in priority_queue.h is not an option, given
10321debfc3dSmrg    that gomp_task has not been properly defined at that point).  */
10331debfc3dSmrg 
10341debfc3dSmrg static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)10351debfc3dSmrg priority_queue_downgrade_task (enum priority_queue_type type,
10361debfc3dSmrg 			       struct priority_queue *head,
10371debfc3dSmrg 			       struct gomp_task *task)
10381debfc3dSmrg {
10391debfc3dSmrg #if _LIBGOMP_CHECKING_
10401debfc3dSmrg   if (!priority_queue_task_in_queue_p (type, head, task))
10411debfc3dSmrg     gomp_fatal ("Attempt to downgrade missing task %p", task);
10421debfc3dSmrg #endif
10431debfc3dSmrg   if (priority_queue_multi_p (head))
10441debfc3dSmrg     {
10451debfc3dSmrg       struct priority_list *list
10461debfc3dSmrg 	= priority_queue_lookup_priority (head, task->priority);
10471debfc3dSmrg       priority_list_downgrade_task (type, list, task);
10481debfc3dSmrg     }
10491debfc3dSmrg   else
10501debfc3dSmrg     priority_list_downgrade_task (type, &head->l, task);
10511debfc3dSmrg }
10521debfc3dSmrg 
10531debfc3dSmrg /* Setup CHILD_TASK to execute.  This is done by setting the task to
10541debfc3dSmrg    TIED, and updating all relevant queues so that CHILD_TASK is no
10551debfc3dSmrg    longer chosen for scheduling.  Also, remove CHILD_TASK from the
10561debfc3dSmrg    overall team task queue entirely.
10571debfc3dSmrg 
10581debfc3dSmrg    Return TRUE if task or its containing taskgroup has been
10591debfc3dSmrg    cancelled.  */
10601debfc3dSmrg 
10611debfc3dSmrg static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)10621debfc3dSmrg gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
10631debfc3dSmrg 		   struct gomp_team *team)
10641debfc3dSmrg {
10651debfc3dSmrg #if _LIBGOMP_CHECKING_
10661debfc3dSmrg   if (child_task->parent)
10671debfc3dSmrg     priority_queue_verify (PQ_CHILDREN,
10681debfc3dSmrg 			   &child_task->parent->children_queue, true);
10691debfc3dSmrg   if (child_task->taskgroup)
10701debfc3dSmrg     priority_queue_verify (PQ_TASKGROUP,
10711debfc3dSmrg 			   &child_task->taskgroup->taskgroup_queue, false);
10721debfc3dSmrg   priority_queue_verify (PQ_TEAM, &team->task_queue, false);
10731debfc3dSmrg #endif
10741debfc3dSmrg 
10751debfc3dSmrg   /* Task is about to go tied, move it out of the way.  */
10761debfc3dSmrg   if (parent)
10771debfc3dSmrg     priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
10781debfc3dSmrg 				   child_task);
10791debfc3dSmrg 
10801debfc3dSmrg   /* Task is about to go tied, move it out of the way.  */
10811debfc3dSmrg   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
10821debfc3dSmrg   if (taskgroup)
10831debfc3dSmrg     priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
10841debfc3dSmrg 				   child_task);
10851debfc3dSmrg 
10861debfc3dSmrg   priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
10871debfc3dSmrg 			 MEMMODEL_RELAXED);
10881debfc3dSmrg   child_task->pnode[PQ_TEAM].next = NULL;
10891debfc3dSmrg   child_task->pnode[PQ_TEAM].prev = NULL;
10901debfc3dSmrg   child_task->kind = GOMP_TASK_TIED;
10911debfc3dSmrg 
10921debfc3dSmrg   if (--team->task_queued_count == 0)
10931debfc3dSmrg     gomp_team_barrier_clear_task_pending (&team->barrier);
1094c0a68be4Smrg   if (__builtin_expect (gomp_cancel_var, 0)
10951debfc3dSmrg       && !child_task->copy_ctors_done)
1096c0a68be4Smrg     {
1097c0a68be4Smrg       if (gomp_team_barrier_cancelled (&team->barrier))
10981debfc3dSmrg 	return true;
1099c0a68be4Smrg       if (taskgroup)
1100c0a68be4Smrg 	{
1101c0a68be4Smrg 	  if (taskgroup->cancelled)
1102c0a68be4Smrg 	    return true;
1103c0a68be4Smrg 	  if (taskgroup->workshare
1104c0a68be4Smrg 	      && taskgroup->prev
1105c0a68be4Smrg 	      && taskgroup->prev->cancelled)
1106c0a68be4Smrg 	    return true;
1107c0a68be4Smrg 	}
1108c0a68be4Smrg     }
11091debfc3dSmrg   return false;
11101debfc3dSmrg }
11111debfc3dSmrg 
11121debfc3dSmrg static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)11131debfc3dSmrg gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
11141debfc3dSmrg {
11151debfc3dSmrg   struct gomp_task *parent = child_task->parent;
11161debfc3dSmrg   size_t i;
11171debfc3dSmrg 
11181debfc3dSmrg   for (i = 0; i < child_task->depend_count; i++)
11191debfc3dSmrg     if (!child_task->depend[i].redundant)
11201debfc3dSmrg       {
11211debfc3dSmrg 	if (child_task->depend[i].next)
11221debfc3dSmrg 	  child_task->depend[i].next->prev = child_task->depend[i].prev;
11231debfc3dSmrg 	if (child_task->depend[i].prev)
11241debfc3dSmrg 	  child_task->depend[i].prev->next = child_task->depend[i].next;
11251debfc3dSmrg 	else
11261debfc3dSmrg 	  {
11271debfc3dSmrg 	    hash_entry_type *slot
11281debfc3dSmrg 	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
11291debfc3dSmrg 				NO_INSERT);
11301debfc3dSmrg 	    if (*slot != &child_task->depend[i])
11311debfc3dSmrg 	      abort ();
11321debfc3dSmrg 	    if (child_task->depend[i].next)
11331debfc3dSmrg 	      *slot = child_task->depend[i].next;
11341debfc3dSmrg 	    else
11351debfc3dSmrg 	      htab_clear_slot (parent->depend_hash, slot);
11361debfc3dSmrg 	  }
11371debfc3dSmrg       }
11381debfc3dSmrg }
11391debfc3dSmrg 
11401debfc3dSmrg /* After a CHILD_TASK has been run, adjust the dependency queue for
11411debfc3dSmrg    each task that depends on CHILD_TASK, to record the fact that there
11421debfc3dSmrg    is one less dependency to worry about.  If a task that depended on
11431debfc3dSmrg    CHILD_TASK now has no dependencies, place it in the various queues
11441debfc3dSmrg    so it gets scheduled to run.
11451debfc3dSmrg 
11461debfc3dSmrg    TEAM is the team to which CHILD_TASK belongs to.  */
11471debfc3dSmrg 
11481debfc3dSmrg static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)11491debfc3dSmrg gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
11501debfc3dSmrg 				     struct gomp_team *team)
11511debfc3dSmrg {
11521debfc3dSmrg   struct gomp_task *parent = child_task->parent;
11531debfc3dSmrg   size_t i, count = child_task->dependers->n_elem, ret = 0;
11541debfc3dSmrg   for (i = 0; i < count; i++)
11551debfc3dSmrg     {
11561debfc3dSmrg       struct gomp_task *task = child_task->dependers->elem[i];
11571debfc3dSmrg 
11581debfc3dSmrg       /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
11591debfc3dSmrg 	 TASK's remaining dependencies.  Once TASK has no other
11608feb0f0bSmrg 	 dependencies, put it into the various queues so it will get
11611debfc3dSmrg 	 scheduled for execution.  */
11621debfc3dSmrg       if (--task->num_dependees != 0)
11631debfc3dSmrg 	continue;
11641debfc3dSmrg 
11651debfc3dSmrg       struct gomp_taskgroup *taskgroup = task->taskgroup;
11661debfc3dSmrg       if (parent)
11671debfc3dSmrg 	{
11681debfc3dSmrg 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
11691debfc3dSmrg 				 task, task->priority,
11701debfc3dSmrg 				 PRIORITY_INSERT_BEGIN,
11711debfc3dSmrg 				 /*adjust_parent_depends_on=*/true,
11721debfc3dSmrg 				 task->parent_depends_on);
11731debfc3dSmrg 	  if (parent->taskwait)
11741debfc3dSmrg 	    {
11751debfc3dSmrg 	      if (parent->taskwait->in_taskwait)
11761debfc3dSmrg 		{
11771debfc3dSmrg 		  /* One more task has had its dependencies met.
11781debfc3dSmrg 		     Inform any waiters.  */
11791debfc3dSmrg 		  parent->taskwait->in_taskwait = false;
11801debfc3dSmrg 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
11811debfc3dSmrg 		}
11821debfc3dSmrg 	      else if (parent->taskwait->in_depend_wait)
11831debfc3dSmrg 		{
11841debfc3dSmrg 		  /* One more task has had its dependencies met.
11851debfc3dSmrg 		     Inform any waiters.  */
11861debfc3dSmrg 		  parent->taskwait->in_depend_wait = false;
11871debfc3dSmrg 		  gomp_sem_post (&parent->taskwait->taskwait_sem);
11881debfc3dSmrg 		}
11891debfc3dSmrg 	    }
11901debfc3dSmrg 	}
11918feb0f0bSmrg       else
11928feb0f0bSmrg 	task->parent = NULL;
11931debfc3dSmrg       if (taskgroup)
11941debfc3dSmrg 	{
11951debfc3dSmrg 	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
11961debfc3dSmrg 				 task, task->priority,
11971debfc3dSmrg 				 PRIORITY_INSERT_BEGIN,
11981debfc3dSmrg 				 /*adjust_parent_depends_on=*/false,
11991debfc3dSmrg 				 task->parent_depends_on);
12001debfc3dSmrg 	  if (taskgroup->in_taskgroup_wait)
12011debfc3dSmrg 	    {
12021debfc3dSmrg 	      /* One more task has had its dependencies met.
12031debfc3dSmrg 		 Inform any waiters.  */
12041debfc3dSmrg 	      taskgroup->in_taskgroup_wait = false;
12051debfc3dSmrg 	      gomp_sem_post (&taskgroup->taskgroup_sem);
12061debfc3dSmrg 	    }
12071debfc3dSmrg 	}
12081debfc3dSmrg       priority_queue_insert (PQ_TEAM, &team->task_queue,
12091debfc3dSmrg 			     task, task->priority,
12101debfc3dSmrg 			     PRIORITY_INSERT_END,
12111debfc3dSmrg 			     /*adjust_parent_depends_on=*/false,
12121debfc3dSmrg 			     task->parent_depends_on);
12131debfc3dSmrg       ++team->task_count;
12141debfc3dSmrg       ++team->task_queued_count;
12151debfc3dSmrg       ++ret;
12161debfc3dSmrg     }
12171debfc3dSmrg   free (child_task->dependers);
12181debfc3dSmrg   child_task->dependers = NULL;
12191debfc3dSmrg   if (ret > 1)
12201debfc3dSmrg     gomp_team_barrier_set_task_pending (&team->barrier);
12211debfc3dSmrg   return ret;
12221debfc3dSmrg }
12231debfc3dSmrg 
12241debfc3dSmrg static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)12251debfc3dSmrg gomp_task_run_post_handle_depend (struct gomp_task *child_task,
12261debfc3dSmrg 				  struct gomp_team *team)
12271debfc3dSmrg {
12281debfc3dSmrg   if (child_task->depend_count == 0)
12291debfc3dSmrg     return 0;
12301debfc3dSmrg 
12311debfc3dSmrg   /* If parent is gone already, the hash table is freed and nothing
12321debfc3dSmrg      will use the hash table anymore, no need to remove anything from it.  */
12331debfc3dSmrg   if (child_task->parent != NULL)
12341debfc3dSmrg     gomp_task_run_post_handle_depend_hash (child_task);
12351debfc3dSmrg 
12361debfc3dSmrg   if (child_task->dependers == NULL)
12371debfc3dSmrg     return 0;
12381debfc3dSmrg 
12391debfc3dSmrg   return gomp_task_run_post_handle_dependers (child_task, team);
12401debfc3dSmrg }
12411debfc3dSmrg 
12421debfc3dSmrg /* Remove CHILD_TASK from its parent.  */
12431debfc3dSmrg 
12441debfc3dSmrg static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)12451debfc3dSmrg gomp_task_run_post_remove_parent (struct gomp_task *child_task)
12461debfc3dSmrg {
12471debfc3dSmrg   struct gomp_task *parent = child_task->parent;
12481debfc3dSmrg   if (parent == NULL)
12491debfc3dSmrg     return;
12501debfc3dSmrg 
12511debfc3dSmrg   /* If this was the last task the parent was depending on,
12521debfc3dSmrg      synchronize with gomp_task_maybe_wait_for_dependencies so it can
12531debfc3dSmrg      clean up and return.  */
12541debfc3dSmrg   if (__builtin_expect (child_task->parent_depends_on, 0)
12551debfc3dSmrg       && --parent->taskwait->n_depend == 0
12561debfc3dSmrg       && parent->taskwait->in_depend_wait)
12571debfc3dSmrg     {
12581debfc3dSmrg       parent->taskwait->in_depend_wait = false;
12591debfc3dSmrg       gomp_sem_post (&parent->taskwait->taskwait_sem);
12601debfc3dSmrg     }
12611debfc3dSmrg 
12621debfc3dSmrg   if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
12631debfc3dSmrg 			     child_task, MEMMODEL_RELEASE)
12641debfc3dSmrg       && parent->taskwait && parent->taskwait->in_taskwait)
12651debfc3dSmrg     {
12661debfc3dSmrg       parent->taskwait->in_taskwait = false;
12671debfc3dSmrg       gomp_sem_post (&parent->taskwait->taskwait_sem);
12681debfc3dSmrg     }
12691debfc3dSmrg   child_task->pnode[PQ_CHILDREN].next = NULL;
12701debfc3dSmrg   child_task->pnode[PQ_CHILDREN].prev = NULL;
12711debfc3dSmrg }
12721debfc3dSmrg 
12731debfc3dSmrg /* Remove CHILD_TASK from its taskgroup.  */
12741debfc3dSmrg 
12751debfc3dSmrg static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)12761debfc3dSmrg gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
12771debfc3dSmrg {
12781debfc3dSmrg   struct gomp_taskgroup *taskgroup = child_task->taskgroup;
12791debfc3dSmrg   if (taskgroup == NULL)
12801debfc3dSmrg     return;
12811debfc3dSmrg   bool empty = priority_queue_remove (PQ_TASKGROUP,
12821debfc3dSmrg 				      &taskgroup->taskgroup_queue,
12831debfc3dSmrg 				      child_task, MEMMODEL_RELAXED);
12841debfc3dSmrg   child_task->pnode[PQ_TASKGROUP].next = NULL;
12851debfc3dSmrg   child_task->pnode[PQ_TASKGROUP].prev = NULL;
12861debfc3dSmrg   if (taskgroup->num_children > 1)
12871debfc3dSmrg     --taskgroup->num_children;
12881debfc3dSmrg   else
12891debfc3dSmrg     {
12901debfc3dSmrg       /* We access taskgroup->num_children in GOMP_taskgroup_end
12911debfc3dSmrg 	 outside of the task lock mutex region, so
12921debfc3dSmrg 	 need a release barrier here to ensure memory
12931debfc3dSmrg 	 written by child_task->fn above is flushed
12941debfc3dSmrg 	 before the NULL is written.  */
12951debfc3dSmrg       __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
12961debfc3dSmrg     }
12971debfc3dSmrg   if (empty && taskgroup->in_taskgroup_wait)
12981debfc3dSmrg     {
12991debfc3dSmrg       taskgroup->in_taskgroup_wait = false;
13001debfc3dSmrg       gomp_sem_post (&taskgroup->taskgroup_sem);
13011debfc3dSmrg     }
13021debfc3dSmrg }
13031debfc3dSmrg 
13041debfc3dSmrg void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)13051debfc3dSmrg gomp_barrier_handle_tasks (gomp_barrier_state_t state)
13061debfc3dSmrg {
13071debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
13081debfc3dSmrg   struct gomp_team *team = thr->ts.team;
13091debfc3dSmrg   struct gomp_task *task = thr->task;
13101debfc3dSmrg   struct gomp_task *child_task = NULL;
13111debfc3dSmrg   struct gomp_task *to_free = NULL;
13121debfc3dSmrg   int do_wake = 0;
13131debfc3dSmrg 
13141debfc3dSmrg   gomp_mutex_lock (&team->task_lock);
13151debfc3dSmrg   if (gomp_barrier_last_thread (state))
13161debfc3dSmrg     {
13171debfc3dSmrg       if (team->task_count == 0)
13181debfc3dSmrg 	{
13191debfc3dSmrg 	  gomp_team_barrier_done (&team->barrier, state);
13201debfc3dSmrg 	  gomp_mutex_unlock (&team->task_lock);
13211debfc3dSmrg 	  gomp_team_barrier_wake (&team->barrier, 0);
13221debfc3dSmrg 	  return;
13231debfc3dSmrg 	}
13241debfc3dSmrg       gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
13251debfc3dSmrg     }
13261debfc3dSmrg 
13271debfc3dSmrg   while (1)
13281debfc3dSmrg     {
13291debfc3dSmrg       bool cancelled = false;
13301debfc3dSmrg       if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
13311debfc3dSmrg 	{
13321debfc3dSmrg 	  bool ignored;
13331debfc3dSmrg 	  child_task
13341debfc3dSmrg 	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
13351debfc3dSmrg 					PQ_IGNORED, NULL,
13361debfc3dSmrg 					&ignored);
13371debfc3dSmrg 	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
13381debfc3dSmrg 					 team);
13391debfc3dSmrg 	  if (__builtin_expect (cancelled, 0))
13401debfc3dSmrg 	    {
13411debfc3dSmrg 	      if (to_free)
13421debfc3dSmrg 		{
13431debfc3dSmrg 		  gomp_finish_task (to_free);
13441debfc3dSmrg 		  free (to_free);
13451debfc3dSmrg 		  to_free = NULL;
13461debfc3dSmrg 		}
13471debfc3dSmrg 	      goto finish_cancelled;
13481debfc3dSmrg 	    }
13491debfc3dSmrg 	  team->task_running_count++;
13501debfc3dSmrg 	  child_task->in_tied_task = true;
13511debfc3dSmrg 	}
13521debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
13531debfc3dSmrg       if (do_wake)
13541debfc3dSmrg 	{
13551debfc3dSmrg 	  gomp_team_barrier_wake (&team->barrier, do_wake);
13561debfc3dSmrg 	  do_wake = 0;
13571debfc3dSmrg 	}
13581debfc3dSmrg       if (to_free)
13591debfc3dSmrg 	{
13601debfc3dSmrg 	  gomp_finish_task (to_free);
13611debfc3dSmrg 	  free (to_free);
13621debfc3dSmrg 	  to_free = NULL;
13631debfc3dSmrg 	}
13641debfc3dSmrg       if (child_task)
13651debfc3dSmrg 	{
13661debfc3dSmrg 	  thr->task = child_task;
13671debfc3dSmrg 	  if (__builtin_expect (child_task->fn == NULL, 0))
13681debfc3dSmrg 	    {
13691debfc3dSmrg 	      if (gomp_target_task_fn (child_task->fn_data))
13701debfc3dSmrg 		{
13711debfc3dSmrg 		  thr->task = task;
13721debfc3dSmrg 		  gomp_mutex_lock (&team->task_lock);
13731debfc3dSmrg 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
13741debfc3dSmrg 		  team->task_running_count--;
13751debfc3dSmrg 		  struct gomp_target_task *ttask
13761debfc3dSmrg 		    = (struct gomp_target_task *) child_task->fn_data;
13771debfc3dSmrg 		  /* If GOMP_PLUGIN_target_task_completion has run already
13781debfc3dSmrg 		     in between gomp_target_task_fn and the mutex lock,
13791debfc3dSmrg 		     perform the requeuing here.  */
13801debfc3dSmrg 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
13811debfc3dSmrg 		    gomp_target_task_completion (team, child_task);
13821debfc3dSmrg 		  else
13831debfc3dSmrg 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
13841debfc3dSmrg 		  child_task = NULL;
13851debfc3dSmrg 		  continue;
13861debfc3dSmrg 		}
13871debfc3dSmrg 	    }
13881debfc3dSmrg 	  else
13891debfc3dSmrg 	    child_task->fn (child_task->fn_data);
13901debfc3dSmrg 	  thr->task = task;
13911debfc3dSmrg 	}
13921debfc3dSmrg       else
13931debfc3dSmrg 	return;
13941debfc3dSmrg       gomp_mutex_lock (&team->task_lock);
13951debfc3dSmrg       if (child_task)
13961debfc3dSmrg 	{
13971debfc3dSmrg 	 finish_cancelled:;
13981debfc3dSmrg 	  size_t new_tasks
13991debfc3dSmrg 	    = gomp_task_run_post_handle_depend (child_task, team);
14001debfc3dSmrg 	  gomp_task_run_post_remove_parent (child_task);
14011debfc3dSmrg 	  gomp_clear_parent (&child_task->children_queue);
14021debfc3dSmrg 	  gomp_task_run_post_remove_taskgroup (child_task);
14031debfc3dSmrg 	  to_free = child_task;
14041debfc3dSmrg 	  child_task = NULL;
14051debfc3dSmrg 	  if (!cancelled)
14061debfc3dSmrg 	    team->task_running_count--;
14071debfc3dSmrg 	  if (new_tasks > 1)
14081debfc3dSmrg 	    {
14091debfc3dSmrg 	      do_wake = team->nthreads - team->task_running_count;
14101debfc3dSmrg 	      if (do_wake > new_tasks)
14111debfc3dSmrg 		do_wake = new_tasks;
14121debfc3dSmrg 	    }
14131debfc3dSmrg 	  if (--team->task_count == 0
14141debfc3dSmrg 	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
14151debfc3dSmrg 	    {
14161debfc3dSmrg 	      gomp_team_barrier_done (&team->barrier, state);
14171debfc3dSmrg 	      gomp_mutex_unlock (&team->task_lock);
14181debfc3dSmrg 	      gomp_team_barrier_wake (&team->barrier, 0);
14191debfc3dSmrg 	      gomp_mutex_lock (&team->task_lock);
14201debfc3dSmrg 	    }
14211debfc3dSmrg 	}
14221debfc3dSmrg     }
14231debfc3dSmrg }
14241debfc3dSmrg 
14251debfc3dSmrg /* Called when encountering a taskwait directive.
14261debfc3dSmrg 
14271debfc3dSmrg    Wait for all children of the current task.  */
14281debfc3dSmrg 
14291debfc3dSmrg void
GOMP_taskwait(void)14301debfc3dSmrg GOMP_taskwait (void)
14311debfc3dSmrg {
14321debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
14331debfc3dSmrg   struct gomp_team *team = thr->ts.team;
14341debfc3dSmrg   struct gomp_task *task = thr->task;
14351debfc3dSmrg   struct gomp_task *child_task = NULL;
14361debfc3dSmrg   struct gomp_task *to_free = NULL;
14371debfc3dSmrg   struct gomp_taskwait taskwait;
14381debfc3dSmrg   int do_wake = 0;
14391debfc3dSmrg 
14401debfc3dSmrg   /* The acquire barrier on load of task->children here synchronizes
14411debfc3dSmrg      with the write of a NULL in gomp_task_run_post_remove_parent.  It is
14421debfc3dSmrg      not necessary that we synchronize with other non-NULL writes at
14431debfc3dSmrg      this point, but we must ensure that all writes to memory by a
14441debfc3dSmrg      child thread task work function are seen before we exit from
14451debfc3dSmrg      GOMP_taskwait.  */
14461debfc3dSmrg   if (task == NULL
14471debfc3dSmrg       || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
14481debfc3dSmrg     return;
14491debfc3dSmrg 
14501debfc3dSmrg   memset (&taskwait, 0, sizeof (taskwait));
14511debfc3dSmrg   bool child_q = false;
14521debfc3dSmrg   gomp_mutex_lock (&team->task_lock);
14531debfc3dSmrg   while (1)
14541debfc3dSmrg     {
14551debfc3dSmrg       bool cancelled = false;
14561debfc3dSmrg       if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
14571debfc3dSmrg 	{
14581debfc3dSmrg 	  bool destroy_taskwait = task->taskwait != NULL;
14591debfc3dSmrg 	  task->taskwait = NULL;
14601debfc3dSmrg 	  gomp_mutex_unlock (&team->task_lock);
14611debfc3dSmrg 	  if (to_free)
14621debfc3dSmrg 	    {
14631debfc3dSmrg 	      gomp_finish_task (to_free);
14641debfc3dSmrg 	      free (to_free);
14651debfc3dSmrg 	    }
14661debfc3dSmrg 	  if (destroy_taskwait)
14671debfc3dSmrg 	    gomp_sem_destroy (&taskwait.taskwait_sem);
14681debfc3dSmrg 	  return;
14691debfc3dSmrg 	}
14701debfc3dSmrg       struct gomp_task *next_task
14711debfc3dSmrg 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
14721debfc3dSmrg 				    PQ_TEAM, &team->task_queue, &child_q);
14731debfc3dSmrg       if (next_task->kind == GOMP_TASK_WAITING)
14741debfc3dSmrg 	{
14751debfc3dSmrg 	  child_task = next_task;
14761debfc3dSmrg 	  cancelled
14771debfc3dSmrg 	    = gomp_task_run_pre (child_task, task, team);
14781debfc3dSmrg 	  if (__builtin_expect (cancelled, 0))
14791debfc3dSmrg 	    {
14801debfc3dSmrg 	      if (to_free)
14811debfc3dSmrg 		{
14821debfc3dSmrg 		  gomp_finish_task (to_free);
14831debfc3dSmrg 		  free (to_free);
14841debfc3dSmrg 		  to_free = NULL;
14851debfc3dSmrg 		}
14861debfc3dSmrg 	      goto finish_cancelled;
14871debfc3dSmrg 	    }
14881debfc3dSmrg 	}
14891debfc3dSmrg       else
14901debfc3dSmrg 	{
14911debfc3dSmrg 	/* All tasks we are waiting for are either running in other
14921debfc3dSmrg 	   threads, or they are tasks that have not had their
14931debfc3dSmrg 	   dependencies met (so they're not even in the queue).  Wait
14941debfc3dSmrg 	   for them.  */
14951debfc3dSmrg 	  if (task->taskwait == NULL)
14961debfc3dSmrg 	    {
14971debfc3dSmrg 	      taskwait.in_depend_wait = false;
14981debfc3dSmrg 	      gomp_sem_init (&taskwait.taskwait_sem, 0);
14991debfc3dSmrg 	      task->taskwait = &taskwait;
15001debfc3dSmrg 	    }
15011debfc3dSmrg 	  taskwait.in_taskwait = true;
15021debfc3dSmrg 	}
15031debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
15041debfc3dSmrg       if (do_wake)
15051debfc3dSmrg 	{
15061debfc3dSmrg 	  gomp_team_barrier_wake (&team->barrier, do_wake);
15071debfc3dSmrg 	  do_wake = 0;
15081debfc3dSmrg 	}
15091debfc3dSmrg       if (to_free)
15101debfc3dSmrg 	{
15111debfc3dSmrg 	  gomp_finish_task (to_free);
15121debfc3dSmrg 	  free (to_free);
15131debfc3dSmrg 	  to_free = NULL;
15141debfc3dSmrg 	}
15151debfc3dSmrg       if (child_task)
15161debfc3dSmrg 	{
15171debfc3dSmrg 	  thr->task = child_task;
15181debfc3dSmrg 	  if (__builtin_expect (child_task->fn == NULL, 0))
15191debfc3dSmrg 	    {
15201debfc3dSmrg 	      if (gomp_target_task_fn (child_task->fn_data))
15211debfc3dSmrg 		{
15221debfc3dSmrg 		  thr->task = task;
15231debfc3dSmrg 		  gomp_mutex_lock (&team->task_lock);
15241debfc3dSmrg 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
15251debfc3dSmrg 		  struct gomp_target_task *ttask
15261debfc3dSmrg 		    = (struct gomp_target_task *) child_task->fn_data;
15271debfc3dSmrg 		  /* If GOMP_PLUGIN_target_task_completion has run already
15281debfc3dSmrg 		     in between gomp_target_task_fn and the mutex lock,
15291debfc3dSmrg 		     perform the requeuing here.  */
15301debfc3dSmrg 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
15311debfc3dSmrg 		    gomp_target_task_completion (team, child_task);
15321debfc3dSmrg 		  else
15331debfc3dSmrg 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
15341debfc3dSmrg 		  child_task = NULL;
15351debfc3dSmrg 		  continue;
15361debfc3dSmrg 		}
15371debfc3dSmrg 	    }
15381debfc3dSmrg 	  else
15391debfc3dSmrg 	    child_task->fn (child_task->fn_data);
15401debfc3dSmrg 	  thr->task = task;
15411debfc3dSmrg 	}
15421debfc3dSmrg       else
15431debfc3dSmrg 	gomp_sem_wait (&taskwait.taskwait_sem);
15441debfc3dSmrg       gomp_mutex_lock (&team->task_lock);
15451debfc3dSmrg       if (child_task)
15461debfc3dSmrg 	{
15471debfc3dSmrg 	 finish_cancelled:;
15481debfc3dSmrg 	  size_t new_tasks
15491debfc3dSmrg 	    = gomp_task_run_post_handle_depend (child_task, team);
15501debfc3dSmrg 
15511debfc3dSmrg 	  if (child_q)
15521debfc3dSmrg 	    {
15531debfc3dSmrg 	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
15541debfc3dSmrg 				     child_task, MEMMODEL_RELAXED);
15551debfc3dSmrg 	      child_task->pnode[PQ_CHILDREN].next = NULL;
15561debfc3dSmrg 	      child_task->pnode[PQ_CHILDREN].prev = NULL;
15571debfc3dSmrg 	    }
15581debfc3dSmrg 
15591debfc3dSmrg 	  gomp_clear_parent (&child_task->children_queue);
15601debfc3dSmrg 
15611debfc3dSmrg 	  gomp_task_run_post_remove_taskgroup (child_task);
15621debfc3dSmrg 
15631debfc3dSmrg 	  to_free = child_task;
15641debfc3dSmrg 	  child_task = NULL;
15651debfc3dSmrg 	  team->task_count--;
15661debfc3dSmrg 	  if (new_tasks > 1)
15671debfc3dSmrg 	    {
15681debfc3dSmrg 	      do_wake = team->nthreads - team->task_running_count
15691debfc3dSmrg 			- !task->in_tied_task;
15701debfc3dSmrg 	      if (do_wake > new_tasks)
15711debfc3dSmrg 		do_wake = new_tasks;
15721debfc3dSmrg 	    }
15731debfc3dSmrg 	}
15741debfc3dSmrg     }
15751debfc3dSmrg }
15761debfc3dSmrg 
1577c0a68be4Smrg /* Called when encountering a taskwait directive with depend clause(s).
1578c0a68be4Smrg    Wait as if it was an mergeable included task construct with empty body.  */
1579c0a68be4Smrg 
1580c0a68be4Smrg void
GOMP_taskwait_depend(void ** depend)1581c0a68be4Smrg GOMP_taskwait_depend (void **depend)
1582c0a68be4Smrg {
1583c0a68be4Smrg   struct gomp_thread *thr = gomp_thread ();
1584c0a68be4Smrg   struct gomp_team *team = thr->ts.team;
1585c0a68be4Smrg 
1586c0a68be4Smrg   /* If parallel or taskgroup has been cancelled, return early.  */
1587c0a68be4Smrg   if (__builtin_expect (gomp_cancel_var, 0) && team)
1588c0a68be4Smrg     {
1589c0a68be4Smrg       if (gomp_team_barrier_cancelled (&team->barrier))
1590c0a68be4Smrg 	return;
1591c0a68be4Smrg       if (thr->task->taskgroup)
1592c0a68be4Smrg 	{
1593c0a68be4Smrg 	  if (thr->task->taskgroup->cancelled)
1594c0a68be4Smrg 	    return;
1595c0a68be4Smrg 	  if (thr->task->taskgroup->workshare
1596c0a68be4Smrg 	      && thr->task->taskgroup->prev
1597c0a68be4Smrg 	      && thr->task->taskgroup->prev->cancelled)
1598c0a68be4Smrg 	    return;
1599c0a68be4Smrg 	}
1600c0a68be4Smrg     }
1601c0a68be4Smrg 
1602c0a68be4Smrg   if (thr->task && thr->task->depend_hash)
1603c0a68be4Smrg     gomp_task_maybe_wait_for_dependencies (depend);
1604c0a68be4Smrg }
1605c0a68be4Smrg 
16061debfc3dSmrg /* An undeferred task is about to run.  Wait for all tasks that this
16071debfc3dSmrg    undeferred task depends on.
16081debfc3dSmrg 
16091debfc3dSmrg    This is done by first putting all known ready dependencies
16101debfc3dSmrg    (dependencies that have their own dependencies met) at the top of
16111debfc3dSmrg    the scheduling queues.  Then we iterate through these imminently
16121debfc3dSmrg    ready tasks (and possibly other high priority tasks), and run them.
16131debfc3dSmrg    If we run out of ready dependencies to execute, we either wait for
1614c0a68be4Smrg    the remaining dependencies to finish, or wait for them to get
16151debfc3dSmrg    scheduled so we can run them.
16161debfc3dSmrg 
16171debfc3dSmrg    DEPEND is as in GOMP_task.  */
16181debfc3dSmrg 
16191debfc3dSmrg void
gomp_task_maybe_wait_for_dependencies(void ** depend)16201debfc3dSmrg gomp_task_maybe_wait_for_dependencies (void **depend)
16211debfc3dSmrg {
16221debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
16231debfc3dSmrg   struct gomp_task *task = thr->task;
16241debfc3dSmrg   struct gomp_team *team = thr->ts.team;
16251debfc3dSmrg   struct gomp_task_depend_entry elem, *ent = NULL;
16261debfc3dSmrg   struct gomp_taskwait taskwait;
1627c0a68be4Smrg   size_t orig_ndepend = (uintptr_t) depend[0];
16281debfc3dSmrg   size_t nout = (uintptr_t) depend[1];
1629c0a68be4Smrg   size_t ndepend = orig_ndepend;
1630c0a68be4Smrg   size_t normal = ndepend;
1631c0a68be4Smrg   size_t n = 2;
16321debfc3dSmrg   size_t i;
16331debfc3dSmrg   size_t num_awaited = 0;
16341debfc3dSmrg   struct gomp_task *child_task = NULL;
16351debfc3dSmrg   struct gomp_task *to_free = NULL;
16361debfc3dSmrg   int do_wake = 0;
16371debfc3dSmrg 
1638c0a68be4Smrg   if (ndepend == 0)
1639c0a68be4Smrg     {
1640c0a68be4Smrg       ndepend = nout;
1641c0a68be4Smrg       nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1642c0a68be4Smrg       normal = nout + (uintptr_t) depend[4];
1643c0a68be4Smrg       n = 5;
1644c0a68be4Smrg     }
16451debfc3dSmrg   gomp_mutex_lock (&team->task_lock);
16461debfc3dSmrg   for (i = 0; i < ndepend; i++)
16471debfc3dSmrg     {
1648c0a68be4Smrg       elem.addr = depend[i + n];
1649c0a68be4Smrg       elem.is_in = i >= nout;
1650c0a68be4Smrg       if (__builtin_expect (i >= normal, 0))
1651c0a68be4Smrg 	{
1652c0a68be4Smrg 	  void **d = (void **) elem.addr;
1653c0a68be4Smrg 	  switch ((uintptr_t) d[1])
1654c0a68be4Smrg 	    {
1655c0a68be4Smrg 	    case GOMP_DEPEND_IN:
1656c0a68be4Smrg 	      break;
1657c0a68be4Smrg 	    case GOMP_DEPEND_OUT:
1658c0a68be4Smrg 	    case GOMP_DEPEND_INOUT:
1659c0a68be4Smrg 	    case GOMP_DEPEND_MUTEXINOUTSET:
1660c0a68be4Smrg 	      elem.is_in = 0;
1661c0a68be4Smrg 	      break;
1662c0a68be4Smrg 	    default:
1663c0a68be4Smrg 	      gomp_fatal ("unknown omp_depend_t dependence type %d",
1664c0a68be4Smrg 			  (int) (uintptr_t) d[1]);
1665c0a68be4Smrg 	    }
1666c0a68be4Smrg 	  elem.addr = d[0];
1667c0a68be4Smrg 	}
16681debfc3dSmrg       ent = htab_find (task->depend_hash, &elem);
16691debfc3dSmrg       for (; ent; ent = ent->next)
1670c0a68be4Smrg 	if (elem.is_in && ent->is_in)
16711debfc3dSmrg 	  continue;
16721debfc3dSmrg 	else
16731debfc3dSmrg 	  {
16741debfc3dSmrg 	    struct gomp_task *tsk = ent->task;
16751debfc3dSmrg 	    if (!tsk->parent_depends_on)
16761debfc3dSmrg 	      {
16771debfc3dSmrg 		tsk->parent_depends_on = true;
16781debfc3dSmrg 		++num_awaited;
16798feb0f0bSmrg 		/* If dependency TSK itself has no dependencies and is
16801debfc3dSmrg 		   ready to run, move it up front so that we run it as
16811debfc3dSmrg 		   soon as possible.  */
16821debfc3dSmrg 		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
16831debfc3dSmrg 		  priority_queue_upgrade_task (tsk, task);
16841debfc3dSmrg 	      }
16851debfc3dSmrg 	  }
16861debfc3dSmrg     }
16871debfc3dSmrg   if (num_awaited == 0)
16881debfc3dSmrg     {
16891debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
16901debfc3dSmrg       return;
16911debfc3dSmrg     }
16921debfc3dSmrg 
16931debfc3dSmrg   memset (&taskwait, 0, sizeof (taskwait));
16941debfc3dSmrg   taskwait.n_depend = num_awaited;
16951debfc3dSmrg   gomp_sem_init (&taskwait.taskwait_sem, 0);
16961debfc3dSmrg   task->taskwait = &taskwait;
16971debfc3dSmrg 
16981debfc3dSmrg   while (1)
16991debfc3dSmrg     {
17001debfc3dSmrg       bool cancelled = false;
17011debfc3dSmrg       if (taskwait.n_depend == 0)
17021debfc3dSmrg 	{
17031debfc3dSmrg 	  task->taskwait = NULL;
17041debfc3dSmrg 	  gomp_mutex_unlock (&team->task_lock);
17051debfc3dSmrg 	  if (to_free)
17061debfc3dSmrg 	    {
17071debfc3dSmrg 	      gomp_finish_task (to_free);
17081debfc3dSmrg 	      free (to_free);
17091debfc3dSmrg 	    }
17101debfc3dSmrg 	  gomp_sem_destroy (&taskwait.taskwait_sem);
17111debfc3dSmrg 	  return;
17121debfc3dSmrg 	}
17131debfc3dSmrg 
17141debfc3dSmrg       /* Theoretically when we have multiple priorities, we should
17151debfc3dSmrg 	 chose between the highest priority item in
17161debfc3dSmrg 	 task->children_queue and team->task_queue here, so we should
17171debfc3dSmrg 	 use priority_queue_next_task().  However, since we are
17181debfc3dSmrg 	 running an undeferred task, perhaps that makes all tasks it
17191debfc3dSmrg 	 depends on undeferred, thus a priority of INF?  This would
17201debfc3dSmrg 	 make it unnecessary to take anything into account here,
17211debfc3dSmrg 	 but the dependencies.
17221debfc3dSmrg 
17231debfc3dSmrg 	 On the other hand, if we want to use priority_queue_next_task(),
17241debfc3dSmrg 	 care should be taken to only use priority_queue_remove()
17251debfc3dSmrg 	 below if the task was actually removed from the children
17261debfc3dSmrg 	 queue.  */
17271debfc3dSmrg       bool ignored;
17281debfc3dSmrg       struct gomp_task *next_task
17291debfc3dSmrg 	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
17301debfc3dSmrg 				    PQ_IGNORED, NULL, &ignored);
17311debfc3dSmrg 
17321debfc3dSmrg       if (next_task->kind == GOMP_TASK_WAITING)
17331debfc3dSmrg 	{
17341debfc3dSmrg 	  child_task = next_task;
17351debfc3dSmrg 	  cancelled
17361debfc3dSmrg 	    = gomp_task_run_pre (child_task, task, team);
17371debfc3dSmrg 	  if (__builtin_expect (cancelled, 0))
17381debfc3dSmrg 	    {
17391debfc3dSmrg 	      if (to_free)
17401debfc3dSmrg 		{
17411debfc3dSmrg 		  gomp_finish_task (to_free);
17421debfc3dSmrg 		  free (to_free);
17431debfc3dSmrg 		  to_free = NULL;
17441debfc3dSmrg 		}
17451debfc3dSmrg 	      goto finish_cancelled;
17461debfc3dSmrg 	    }
17471debfc3dSmrg 	}
17481debfc3dSmrg       else
17491debfc3dSmrg 	/* All tasks we are waiting for are either running in other
17501debfc3dSmrg 	   threads, or they are tasks that have not had their
17511debfc3dSmrg 	   dependencies met (so they're not even in the queue).  Wait
17521debfc3dSmrg 	   for them.  */
17531debfc3dSmrg 	taskwait.in_depend_wait = true;
17541debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
17551debfc3dSmrg       if (do_wake)
17561debfc3dSmrg 	{
17571debfc3dSmrg 	  gomp_team_barrier_wake (&team->barrier, do_wake);
17581debfc3dSmrg 	  do_wake = 0;
17591debfc3dSmrg 	}
17601debfc3dSmrg       if (to_free)
17611debfc3dSmrg 	{
17621debfc3dSmrg 	  gomp_finish_task (to_free);
17631debfc3dSmrg 	  free (to_free);
17641debfc3dSmrg 	  to_free = NULL;
17651debfc3dSmrg 	}
17661debfc3dSmrg       if (child_task)
17671debfc3dSmrg 	{
17681debfc3dSmrg 	  thr->task = child_task;
17691debfc3dSmrg 	  if (__builtin_expect (child_task->fn == NULL, 0))
17701debfc3dSmrg 	    {
17711debfc3dSmrg 	      if (gomp_target_task_fn (child_task->fn_data))
17721debfc3dSmrg 		{
17731debfc3dSmrg 		  thr->task = task;
17741debfc3dSmrg 		  gomp_mutex_lock (&team->task_lock);
17751debfc3dSmrg 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
17761debfc3dSmrg 		  struct gomp_target_task *ttask
17771debfc3dSmrg 		    = (struct gomp_target_task *) child_task->fn_data;
17781debfc3dSmrg 		  /* If GOMP_PLUGIN_target_task_completion has run already
17791debfc3dSmrg 		     in between gomp_target_task_fn and the mutex lock,
17801debfc3dSmrg 		     perform the requeuing here.  */
17811debfc3dSmrg 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
17821debfc3dSmrg 		    gomp_target_task_completion (team, child_task);
17831debfc3dSmrg 		  else
17841debfc3dSmrg 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
17851debfc3dSmrg 		  child_task = NULL;
17861debfc3dSmrg 		  continue;
17871debfc3dSmrg 		}
17881debfc3dSmrg 	    }
17891debfc3dSmrg 	  else
17901debfc3dSmrg 	    child_task->fn (child_task->fn_data);
17911debfc3dSmrg 	  thr->task = task;
17921debfc3dSmrg 	}
17931debfc3dSmrg       else
17941debfc3dSmrg 	gomp_sem_wait (&taskwait.taskwait_sem);
17951debfc3dSmrg       gomp_mutex_lock (&team->task_lock);
17961debfc3dSmrg       if (child_task)
17971debfc3dSmrg 	{
17981debfc3dSmrg 	 finish_cancelled:;
17991debfc3dSmrg 	  size_t new_tasks
18001debfc3dSmrg 	    = gomp_task_run_post_handle_depend (child_task, team);
18011debfc3dSmrg 	  if (child_task->parent_depends_on)
18021debfc3dSmrg 	    --taskwait.n_depend;
18031debfc3dSmrg 
18041debfc3dSmrg 	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
18051debfc3dSmrg 				 child_task, MEMMODEL_RELAXED);
18061debfc3dSmrg 	  child_task->pnode[PQ_CHILDREN].next = NULL;
18071debfc3dSmrg 	  child_task->pnode[PQ_CHILDREN].prev = NULL;
18081debfc3dSmrg 
18091debfc3dSmrg 	  gomp_clear_parent (&child_task->children_queue);
18101debfc3dSmrg 	  gomp_task_run_post_remove_taskgroup (child_task);
18111debfc3dSmrg 	  to_free = child_task;
18121debfc3dSmrg 	  child_task = NULL;
18131debfc3dSmrg 	  team->task_count--;
18141debfc3dSmrg 	  if (new_tasks > 1)
18151debfc3dSmrg 	    {
18161debfc3dSmrg 	      do_wake = team->nthreads - team->task_running_count
18171debfc3dSmrg 			- !task->in_tied_task;
18181debfc3dSmrg 	      if (do_wake > new_tasks)
18191debfc3dSmrg 		do_wake = new_tasks;
18201debfc3dSmrg 	    }
18211debfc3dSmrg 	}
18221debfc3dSmrg     }
18231debfc3dSmrg }
18241debfc3dSmrg 
18251debfc3dSmrg /* Called when encountering a taskyield directive.  */
18261debfc3dSmrg 
18271debfc3dSmrg void
GOMP_taskyield(void)18281debfc3dSmrg GOMP_taskyield (void)
18291debfc3dSmrg {
18301debfc3dSmrg   /* Nothing at the moment.  */
18311debfc3dSmrg }
18321debfc3dSmrg 
1833c0a68be4Smrg static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1834c0a68be4Smrg gomp_taskgroup_init (struct gomp_taskgroup *prev)
1835c0a68be4Smrg {
1836c0a68be4Smrg   struct gomp_taskgroup *taskgroup
1837c0a68be4Smrg     = gomp_malloc (sizeof (struct gomp_taskgroup));
1838c0a68be4Smrg   taskgroup->prev = prev;
1839c0a68be4Smrg   priority_queue_init (&taskgroup->taskgroup_queue);
1840c0a68be4Smrg   taskgroup->reductions = prev ? prev->reductions : NULL;
1841c0a68be4Smrg   taskgroup->in_taskgroup_wait = false;
1842c0a68be4Smrg   taskgroup->cancelled = false;
1843c0a68be4Smrg   taskgroup->workshare = false;
1844c0a68be4Smrg   taskgroup->num_children = 0;
1845c0a68be4Smrg   gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1846c0a68be4Smrg   return taskgroup;
1847c0a68be4Smrg }
1848c0a68be4Smrg 
18491debfc3dSmrg void
GOMP_taskgroup_start(void)18501debfc3dSmrg GOMP_taskgroup_start (void)
18511debfc3dSmrg {
18521debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
18531debfc3dSmrg   struct gomp_team *team = thr->ts.team;
18541debfc3dSmrg   struct gomp_task *task = thr->task;
18551debfc3dSmrg 
18561debfc3dSmrg   /* If team is NULL, all tasks are executed as
18571debfc3dSmrg      GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
18581debfc3dSmrg      taskgroup and their descendant tasks will be finished
18591debfc3dSmrg      by the time GOMP_taskgroup_end is called.  */
18601debfc3dSmrg   if (team == NULL)
18611debfc3dSmrg     return;
1862c0a68be4Smrg   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
18631debfc3dSmrg }
18641debfc3dSmrg 
18651debfc3dSmrg void
GOMP_taskgroup_end(void)18661debfc3dSmrg GOMP_taskgroup_end (void)
18671debfc3dSmrg {
18681debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
18691debfc3dSmrg   struct gomp_team *team = thr->ts.team;
18701debfc3dSmrg   struct gomp_task *task = thr->task;
18711debfc3dSmrg   struct gomp_taskgroup *taskgroup;
18721debfc3dSmrg   struct gomp_task *child_task = NULL;
18731debfc3dSmrg   struct gomp_task *to_free = NULL;
18741debfc3dSmrg   int do_wake = 0;
18751debfc3dSmrg 
18761debfc3dSmrg   if (team == NULL)
18771debfc3dSmrg     return;
18781debfc3dSmrg   taskgroup = task->taskgroup;
18791debfc3dSmrg   if (__builtin_expect (taskgroup == NULL, 0)
18801debfc3dSmrg       && thr->ts.level == 0)
18811debfc3dSmrg     {
18821debfc3dSmrg       /* This can happen if GOMP_taskgroup_start is called when
18831debfc3dSmrg 	 thr->ts.team == NULL, but inside of the taskgroup there
18841debfc3dSmrg 	 is #pragma omp target nowait that creates an implicit
18851debfc3dSmrg 	 team with a single thread.  In this case, we want to wait
18861debfc3dSmrg 	 for all outstanding tasks in this team.  */
18871debfc3dSmrg       gomp_team_barrier_wait (&team->barrier);
18881debfc3dSmrg       return;
18891debfc3dSmrg     }
18901debfc3dSmrg 
18911debfc3dSmrg   /* The acquire barrier on load of taskgroup->num_children here
18921debfc3dSmrg      synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
18931debfc3dSmrg      It is not necessary that we synchronize with other non-0 writes at
18941debfc3dSmrg      this point, but we must ensure that all writes to memory by a
18951debfc3dSmrg      child thread task work function are seen before we exit from
18961debfc3dSmrg      GOMP_taskgroup_end.  */
18971debfc3dSmrg   if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
18981debfc3dSmrg     goto finish;
18991debfc3dSmrg 
19001debfc3dSmrg   bool unused;
19011debfc3dSmrg   gomp_mutex_lock (&team->task_lock);
19021debfc3dSmrg   while (1)
19031debfc3dSmrg     {
19041debfc3dSmrg       bool cancelled = false;
19051debfc3dSmrg       if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
19061debfc3dSmrg 				  MEMMODEL_RELAXED))
19071debfc3dSmrg 	{
19081debfc3dSmrg 	  if (taskgroup->num_children)
19091debfc3dSmrg 	    {
19101debfc3dSmrg 	      if (priority_queue_empty_p (&task->children_queue,
19111debfc3dSmrg 					  MEMMODEL_RELAXED))
19121debfc3dSmrg 		goto do_wait;
19131debfc3dSmrg 	      child_task
19141debfc3dSmrg 		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
19151debfc3dSmrg 					    PQ_TEAM, &team->task_queue,
19161debfc3dSmrg 					    &unused);
19171debfc3dSmrg 	    }
19181debfc3dSmrg 	  else
19191debfc3dSmrg 	    {
19201debfc3dSmrg 	      gomp_mutex_unlock (&team->task_lock);
19211debfc3dSmrg 	      if (to_free)
19221debfc3dSmrg 		{
19231debfc3dSmrg 		  gomp_finish_task (to_free);
19241debfc3dSmrg 		  free (to_free);
19251debfc3dSmrg 		}
19261debfc3dSmrg 	      goto finish;
19271debfc3dSmrg 	    }
19281debfc3dSmrg 	}
19291debfc3dSmrg       else
19301debfc3dSmrg 	child_task
19311debfc3dSmrg 	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
19321debfc3dSmrg 				      PQ_TEAM, &team->task_queue, &unused);
19331debfc3dSmrg       if (child_task->kind == GOMP_TASK_WAITING)
19341debfc3dSmrg 	{
19351debfc3dSmrg 	  cancelled
19361debfc3dSmrg 	    = gomp_task_run_pre (child_task, child_task->parent, team);
19371debfc3dSmrg 	  if (__builtin_expect (cancelled, 0))
19381debfc3dSmrg 	    {
19391debfc3dSmrg 	      if (to_free)
19401debfc3dSmrg 		{
19411debfc3dSmrg 		  gomp_finish_task (to_free);
19421debfc3dSmrg 		  free (to_free);
19431debfc3dSmrg 		  to_free = NULL;
19441debfc3dSmrg 		}
19451debfc3dSmrg 	      goto finish_cancelled;
19461debfc3dSmrg 	    }
19471debfc3dSmrg 	}
19481debfc3dSmrg       else
19491debfc3dSmrg 	{
19501debfc3dSmrg 	  child_task = NULL;
19511debfc3dSmrg 	 do_wait:
19521debfc3dSmrg 	/* All tasks we are waiting for are either running in other
19531debfc3dSmrg 	   threads, or they are tasks that have not had their
19541debfc3dSmrg 	   dependencies met (so they're not even in the queue).  Wait
19551debfc3dSmrg 	   for them.  */
19561debfc3dSmrg 	  taskgroup->in_taskgroup_wait = true;
19571debfc3dSmrg 	}
19581debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
19591debfc3dSmrg       if (do_wake)
19601debfc3dSmrg 	{
19611debfc3dSmrg 	  gomp_team_barrier_wake (&team->barrier, do_wake);
19621debfc3dSmrg 	  do_wake = 0;
19631debfc3dSmrg 	}
19641debfc3dSmrg       if (to_free)
19651debfc3dSmrg 	{
19661debfc3dSmrg 	  gomp_finish_task (to_free);
19671debfc3dSmrg 	  free (to_free);
19681debfc3dSmrg 	  to_free = NULL;
19691debfc3dSmrg 	}
19701debfc3dSmrg       if (child_task)
19711debfc3dSmrg 	{
19721debfc3dSmrg 	  thr->task = child_task;
19731debfc3dSmrg 	  if (__builtin_expect (child_task->fn == NULL, 0))
19741debfc3dSmrg 	    {
19751debfc3dSmrg 	      if (gomp_target_task_fn (child_task->fn_data))
19761debfc3dSmrg 		{
19771debfc3dSmrg 		  thr->task = task;
19781debfc3dSmrg 		  gomp_mutex_lock (&team->task_lock);
19791debfc3dSmrg 		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
19801debfc3dSmrg 		  struct gomp_target_task *ttask
19811debfc3dSmrg 		    = (struct gomp_target_task *) child_task->fn_data;
19821debfc3dSmrg 		  /* If GOMP_PLUGIN_target_task_completion has run already
19831debfc3dSmrg 		     in between gomp_target_task_fn and the mutex lock,
19841debfc3dSmrg 		     perform the requeuing here.  */
19851debfc3dSmrg 		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
19861debfc3dSmrg 		    gomp_target_task_completion (team, child_task);
19871debfc3dSmrg 		  else
19881debfc3dSmrg 		    ttask->state = GOMP_TARGET_TASK_RUNNING;
19891debfc3dSmrg 		  child_task = NULL;
19901debfc3dSmrg 		  continue;
19911debfc3dSmrg 		}
19921debfc3dSmrg 	    }
19931debfc3dSmrg 	  else
19941debfc3dSmrg 	    child_task->fn (child_task->fn_data);
19951debfc3dSmrg 	  thr->task = task;
19961debfc3dSmrg 	}
19971debfc3dSmrg       else
19981debfc3dSmrg 	gomp_sem_wait (&taskgroup->taskgroup_sem);
19991debfc3dSmrg       gomp_mutex_lock (&team->task_lock);
20001debfc3dSmrg       if (child_task)
20011debfc3dSmrg 	{
20021debfc3dSmrg 	 finish_cancelled:;
20031debfc3dSmrg 	  size_t new_tasks
20041debfc3dSmrg 	    = gomp_task_run_post_handle_depend (child_task, team);
20051debfc3dSmrg 	  gomp_task_run_post_remove_parent (child_task);
20061debfc3dSmrg 	  gomp_clear_parent (&child_task->children_queue);
20071debfc3dSmrg 	  gomp_task_run_post_remove_taskgroup (child_task);
20081debfc3dSmrg 	  to_free = child_task;
20091debfc3dSmrg 	  child_task = NULL;
20101debfc3dSmrg 	  team->task_count--;
20111debfc3dSmrg 	  if (new_tasks > 1)
20121debfc3dSmrg 	    {
20131debfc3dSmrg 	      do_wake = team->nthreads - team->task_running_count
20141debfc3dSmrg 			- !task->in_tied_task;
20151debfc3dSmrg 	      if (do_wake > new_tasks)
20161debfc3dSmrg 		do_wake = new_tasks;
20171debfc3dSmrg 	    }
20181debfc3dSmrg 	}
20191debfc3dSmrg     }
20201debfc3dSmrg 
20211debfc3dSmrg  finish:
20221debfc3dSmrg   task->taskgroup = taskgroup->prev;
20231debfc3dSmrg   gomp_sem_destroy (&taskgroup->taskgroup_sem);
20241debfc3dSmrg   free (taskgroup);
20251debfc3dSmrg }
20261debfc3dSmrg 
2027c0a68be4Smrg static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2028c0a68be4Smrg gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2029c0a68be4Smrg 			 unsigned nthreads)
2030c0a68be4Smrg {
2031c0a68be4Smrg   size_t total_cnt = 0;
2032c0a68be4Smrg   uintptr_t *d = data;
2033c0a68be4Smrg   struct htab *old_htab = NULL, *new_htab;
2034c0a68be4Smrg   do
2035c0a68be4Smrg     {
2036c0a68be4Smrg       if (__builtin_expect (orig != NULL, 0))
2037c0a68be4Smrg 	{
2038c0a68be4Smrg 	  /* For worksharing task reductions, memory has been allocated
2039c0a68be4Smrg 	     already by some other thread that encountered the construct
2040c0a68be4Smrg 	     earlier.  */
2041c0a68be4Smrg 	  d[2] = orig[2];
2042c0a68be4Smrg 	  d[6] = orig[6];
2043c0a68be4Smrg 	  orig = (uintptr_t *) orig[4];
2044c0a68be4Smrg 	}
2045c0a68be4Smrg       else
2046c0a68be4Smrg 	{
2047c0a68be4Smrg 	  size_t sz = d[1] * nthreads;
2048c0a68be4Smrg 	  /* Should use omp_alloc if d[3] is not -1.  */
2049c0a68be4Smrg 	  void *ptr = gomp_aligned_alloc (d[2], sz);
2050c0a68be4Smrg 	  memset (ptr, '\0', sz);
2051c0a68be4Smrg 	  d[2] = (uintptr_t) ptr;
2052c0a68be4Smrg 	  d[6] = d[2] + sz;
2053c0a68be4Smrg 	}
2054c0a68be4Smrg       d[5] = 0;
2055c0a68be4Smrg       total_cnt += d[0];
2056c0a68be4Smrg       if (d[4] == 0)
2057c0a68be4Smrg 	{
2058c0a68be4Smrg 	  d[4] = (uintptr_t) old;
2059c0a68be4Smrg 	  break;
2060c0a68be4Smrg 	}
2061c0a68be4Smrg       else
2062c0a68be4Smrg 	d = (uintptr_t *) d[4];
2063c0a68be4Smrg     }
2064c0a68be4Smrg   while (1);
2065c0a68be4Smrg   if (old && old[5])
2066c0a68be4Smrg     {
2067c0a68be4Smrg       old_htab = (struct htab *) old[5];
2068c0a68be4Smrg       total_cnt += htab_elements (old_htab);
2069c0a68be4Smrg     }
2070c0a68be4Smrg   new_htab = htab_create (total_cnt);
2071c0a68be4Smrg   if (old_htab)
2072c0a68be4Smrg     {
2073c0a68be4Smrg       /* Copy old hash table, like in htab_expand.  */
2074c0a68be4Smrg       hash_entry_type *p, *olimit;
2075c0a68be4Smrg       new_htab->n_elements = htab_elements (old_htab);
2076c0a68be4Smrg       olimit = old_htab->entries + old_htab->size;
2077c0a68be4Smrg       p = old_htab->entries;
2078c0a68be4Smrg       do
2079c0a68be4Smrg 	{
2080c0a68be4Smrg 	  hash_entry_type x = *p;
2081c0a68be4Smrg 	  if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2082c0a68be4Smrg 	    *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2083c0a68be4Smrg 	  p++;
2084c0a68be4Smrg 	}
2085c0a68be4Smrg       while (p < olimit);
2086c0a68be4Smrg     }
2087c0a68be4Smrg   d = data;
2088c0a68be4Smrg   do
2089c0a68be4Smrg     {
2090c0a68be4Smrg       size_t j;
2091c0a68be4Smrg       for (j = 0; j < d[0]; ++j)
2092c0a68be4Smrg 	{
2093c0a68be4Smrg 	  uintptr_t *p = d + 7 + j * 3;
2094c0a68be4Smrg 	  p[2] = (uintptr_t) d;
2095c0a68be4Smrg 	  /* Ugly hack, hash_entry_type is defined for the task dependencies,
2096c0a68be4Smrg 	     which hash on the first element which is a pointer.  We need
2097c0a68be4Smrg 	     to hash also on the first sizeof (uintptr_t) bytes which contain
2098c0a68be4Smrg 	     a pointer.  Hide the cast from the compiler.  */
2099c0a68be4Smrg 	  hash_entry_type n;
2100c0a68be4Smrg 	  __asm ("" : "=g" (n) : "0" (p));
2101c0a68be4Smrg 	  *htab_find_slot (&new_htab, n, INSERT) = n;
2102c0a68be4Smrg 	}
2103c0a68be4Smrg       if (d[4] == (uintptr_t) old)
2104c0a68be4Smrg 	break;
2105c0a68be4Smrg       else
2106c0a68be4Smrg 	d = (uintptr_t *) d[4];
2107c0a68be4Smrg     }
2108c0a68be4Smrg   while (1);
2109c0a68be4Smrg   d[5] = (uintptr_t) new_htab;
2110c0a68be4Smrg }
2111c0a68be4Smrg 
2112c0a68be4Smrg static void
gomp_create_artificial_team(void)2113c0a68be4Smrg gomp_create_artificial_team (void)
2114c0a68be4Smrg {
2115c0a68be4Smrg   struct gomp_thread *thr = gomp_thread ();
2116c0a68be4Smrg   struct gomp_task_icv *icv;
2117c0a68be4Smrg   struct gomp_team *team = gomp_new_team (1);
2118c0a68be4Smrg   struct gomp_task *task = thr->task;
2119*23f5f463Smrg   struct gomp_task **implicit_task = &task;
2120c0a68be4Smrg   icv = task ? &task->icv : &gomp_global_icv;
2121c0a68be4Smrg   team->prev_ts = thr->ts;
2122c0a68be4Smrg   thr->ts.team = team;
2123c0a68be4Smrg   thr->ts.team_id = 0;
2124c0a68be4Smrg   thr->ts.work_share = &team->work_shares[0];
2125c0a68be4Smrg   thr->ts.last_work_share = NULL;
2126c0a68be4Smrg #ifdef HAVE_SYNC_BUILTINS
2127c0a68be4Smrg   thr->ts.single_count = 0;
2128c0a68be4Smrg #endif
2129c0a68be4Smrg   thr->ts.static_trip = 0;
2130c0a68be4Smrg   thr->task = &team->implicit_task[0];
2131c0a68be4Smrg   gomp_init_task (thr->task, NULL, icv);
2132*23f5f463Smrg   while (*implicit_task
2133*23f5f463Smrg 	 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2134*23f5f463Smrg     implicit_task = &(*implicit_task)->parent;
2135*23f5f463Smrg   if (*implicit_task)
2136c0a68be4Smrg     {
2137*23f5f463Smrg       thr->task = *implicit_task;
2138c0a68be4Smrg       gomp_end_task ();
2139*23f5f463Smrg       free (*implicit_task);
2140c0a68be4Smrg       thr->task = &team->implicit_task[0];
2141c0a68be4Smrg     }
2142c0a68be4Smrg #ifdef LIBGOMP_USE_PTHREADS
2143c0a68be4Smrg   else
2144c0a68be4Smrg     pthread_setspecific (gomp_thread_destructor, thr);
2145c0a68be4Smrg #endif
2146*23f5f463Smrg   if (implicit_task != &task)
2147*23f5f463Smrg     {
2148*23f5f463Smrg       *implicit_task = thr->task;
2149*23f5f463Smrg       thr->task = task;
2150*23f5f463Smrg     }
2151c0a68be4Smrg }
2152c0a68be4Smrg 
2153c0a68be4Smrg /* The format of data is:
2154c0a68be4Smrg    data[0]	cnt
2155c0a68be4Smrg    data[1]	size
2156c0a68be4Smrg    data[2]	alignment (on output array pointer)
2157c0a68be4Smrg    data[3]	allocator (-1 if malloc allocator)
2158c0a68be4Smrg    data[4]	next pointer
2159c0a68be4Smrg    data[5]	used internally (htab pointer)
2160c0a68be4Smrg    data[6]	used internally (end of array)
2161c0a68be4Smrg    cnt times
2162c0a68be4Smrg    ent[0]	address
2163c0a68be4Smrg    ent[1]	offset
2164c0a68be4Smrg    ent[2]	used internally (pointer to data[0])
2165c0a68be4Smrg    The entries are sorted by increasing offset, so that a binary
2166c0a68be4Smrg    search can be performed.  Normally, data[8] is 0, exception is
2167c0a68be4Smrg    for worksharing construct task reductions in cancellable parallel,
2168c0a68be4Smrg    where at offset 0 there should be space for a pointer and an integer
2169c0a68be4Smrg    which are used internally.  */
2170c0a68be4Smrg 
2171c0a68be4Smrg void
GOMP_taskgroup_reduction_register(uintptr_t * data)2172c0a68be4Smrg GOMP_taskgroup_reduction_register (uintptr_t *data)
2173c0a68be4Smrg {
2174c0a68be4Smrg   struct gomp_thread *thr = gomp_thread ();
2175c0a68be4Smrg   struct gomp_team *team = thr->ts.team;
2176c0a68be4Smrg   struct gomp_task *task;
2177c0a68be4Smrg   unsigned nthreads;
2178c0a68be4Smrg   if (__builtin_expect (team == NULL, 0))
2179c0a68be4Smrg     {
2180c0a68be4Smrg       /* The task reduction code needs a team and task, so for
2181c0a68be4Smrg 	 orphaned taskgroups just create the implicit team.  */
2182c0a68be4Smrg       gomp_create_artificial_team ();
2183c0a68be4Smrg       ialias_call (GOMP_taskgroup_start) ();
2184c0a68be4Smrg       team = thr->ts.team;
2185c0a68be4Smrg     }
2186c0a68be4Smrg   nthreads = team->nthreads;
2187c0a68be4Smrg   task = thr->task;
2188c0a68be4Smrg   gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2189c0a68be4Smrg   task->taskgroup->reductions = data;
2190c0a68be4Smrg }
2191c0a68be4Smrg 
2192c0a68be4Smrg void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2193c0a68be4Smrg GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2194c0a68be4Smrg {
2195c0a68be4Smrg   uintptr_t *d = data;
2196c0a68be4Smrg   htab_free ((struct htab *) data[5]);
2197c0a68be4Smrg   do
2198c0a68be4Smrg     {
2199c0a68be4Smrg       gomp_aligned_free ((void *) d[2]);
2200c0a68be4Smrg       d = (uintptr_t *) d[4];
2201c0a68be4Smrg     }
2202c0a68be4Smrg   while (d && !d[5]);
2203c0a68be4Smrg }
ialias(GOMP_taskgroup_reduction_unregister)2204c0a68be4Smrg ialias (GOMP_taskgroup_reduction_unregister)
2205c0a68be4Smrg 
2206c0a68be4Smrg /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2207c0a68be4Smrg    original list item or address of previously remapped original list
2208c0a68be4Smrg    item to address of the private copy, store that to ptrs[i].
2209c0a68be4Smrg    For i < cntorig, additionally set ptrs[cnt+i] to the address of
2210c0a68be4Smrg    the original list item.  */
2211c0a68be4Smrg 
2212c0a68be4Smrg void
2213c0a68be4Smrg GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2214c0a68be4Smrg {
2215c0a68be4Smrg   struct gomp_thread *thr = gomp_thread ();
2216c0a68be4Smrg   struct gomp_task *task = thr->task;
2217c0a68be4Smrg   unsigned id = thr->ts.team_id;
2218c0a68be4Smrg   uintptr_t *data = task->taskgroup->reductions;
2219c0a68be4Smrg   uintptr_t *d;
2220c0a68be4Smrg   struct htab *reduction_htab = (struct htab *) data[5];
2221c0a68be4Smrg   size_t i;
2222c0a68be4Smrg   for (i = 0; i < cnt; ++i)
2223c0a68be4Smrg     {
2224c0a68be4Smrg       hash_entry_type ent, n;
2225c0a68be4Smrg       __asm ("" : "=g" (ent) : "0" (ptrs + i));
2226c0a68be4Smrg       n = htab_find (reduction_htab, ent);
2227c0a68be4Smrg       if (n)
2228c0a68be4Smrg 	{
2229c0a68be4Smrg 	  uintptr_t *p;
2230c0a68be4Smrg 	  __asm ("" : "=g" (p) : "0" (n));
2231c0a68be4Smrg 	  /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2232c0a68be4Smrg 	     p[1] is the offset within the allocated chunk for each
2233c0a68be4Smrg 	     thread, p[2] is the array registered with
2234c0a68be4Smrg 	     GOMP_taskgroup_reduction_register, d[2] is the base of the
2235c0a68be4Smrg 	     allocated memory and d[1] is the size of the allocated chunk
2236c0a68be4Smrg 	     for one thread.  */
2237c0a68be4Smrg 	  d = (uintptr_t *) p[2];
2238c0a68be4Smrg 	  ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2239c0a68be4Smrg 	  if (__builtin_expect (i < cntorig, 0))
2240c0a68be4Smrg 	    ptrs[cnt + i] = (void *) p[0];
2241c0a68be4Smrg 	  continue;
2242c0a68be4Smrg 	}
2243c0a68be4Smrg       d = data;
2244c0a68be4Smrg       while (d != NULL)
2245c0a68be4Smrg 	{
2246c0a68be4Smrg 	  if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2247c0a68be4Smrg 	    break;
2248c0a68be4Smrg 	  d = (uintptr_t *) d[4];
2249c0a68be4Smrg 	}
2250c0a68be4Smrg       if (d == NULL)
2251c0a68be4Smrg 	gomp_fatal ("couldn't find matching task_reduction or reduction with "
2252c0a68be4Smrg 		    "task modifier for %p", ptrs[i]);
2253c0a68be4Smrg       uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2254c0a68be4Smrg       ptrs[i] = (void *) (d[2] + id * d[1] + off);
2255c0a68be4Smrg       if (__builtin_expect (i < cntorig, 0))
2256c0a68be4Smrg 	{
2257c0a68be4Smrg 	  size_t lo = 0, hi = d[0] - 1;
2258c0a68be4Smrg 	  while (lo <= hi)
2259c0a68be4Smrg 	    {
2260c0a68be4Smrg 	      size_t m = (lo + hi) / 2;
2261c0a68be4Smrg 	      if (d[7 + 3 * m + 1] < off)
2262c0a68be4Smrg 		lo = m + 1;
2263c0a68be4Smrg 	      else if (d[7 + 3 * m + 1] == off)
2264c0a68be4Smrg 		{
2265c0a68be4Smrg 		  ptrs[cnt + i] = (void *) d[7 + 3 * m];
2266c0a68be4Smrg 		  break;
2267c0a68be4Smrg 		}
2268c0a68be4Smrg 	      else
2269c0a68be4Smrg 		hi = m - 1;
2270c0a68be4Smrg 	    }
2271c0a68be4Smrg 	  if (lo > hi)
2272c0a68be4Smrg 	    gomp_fatal ("couldn't find matching task_reduction or reduction "
2273c0a68be4Smrg 			"with task modifier for %p", ptrs[i]);
2274c0a68be4Smrg 	}
2275c0a68be4Smrg     }
2276c0a68be4Smrg }
2277c0a68be4Smrg 
2278c0a68be4Smrg struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2279c0a68be4Smrg gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2280c0a68be4Smrg {
2281c0a68be4Smrg   struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2282c0a68be4Smrg   gomp_reduction_register (data, NULL, NULL, nthreads);
2283c0a68be4Smrg   taskgroup->reductions = data;
2284c0a68be4Smrg   return taskgroup;
2285c0a68be4Smrg }
2286c0a68be4Smrg 
2287c0a68be4Smrg void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2288c0a68be4Smrg gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2289c0a68be4Smrg {
2290c0a68be4Smrg   struct gomp_thread *thr = gomp_thread ();
2291c0a68be4Smrg   struct gomp_team *team = thr->ts.team;
2292c0a68be4Smrg   struct gomp_task *task = thr->task;
2293c0a68be4Smrg   unsigned nthreads = team->nthreads;
2294c0a68be4Smrg   gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2295c0a68be4Smrg   task->taskgroup->reductions = data;
2296c0a68be4Smrg }
2297c0a68be4Smrg 
2298c0a68be4Smrg void
gomp_workshare_taskgroup_start(void)2299c0a68be4Smrg gomp_workshare_taskgroup_start (void)
2300c0a68be4Smrg {
2301c0a68be4Smrg   struct gomp_thread *thr = gomp_thread ();
2302c0a68be4Smrg   struct gomp_team *team = thr->ts.team;
2303c0a68be4Smrg   struct gomp_task *task;
2304c0a68be4Smrg 
2305c0a68be4Smrg   if (team == NULL)
2306c0a68be4Smrg     {
2307c0a68be4Smrg       gomp_create_artificial_team ();
2308c0a68be4Smrg       team = thr->ts.team;
2309c0a68be4Smrg     }
2310c0a68be4Smrg   task = thr->task;
2311c0a68be4Smrg   task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2312c0a68be4Smrg   task->taskgroup->workshare = true;
2313c0a68be4Smrg }
2314c0a68be4Smrg 
2315c0a68be4Smrg void
GOMP_workshare_task_reduction_unregister(bool cancelled)2316c0a68be4Smrg GOMP_workshare_task_reduction_unregister (bool cancelled)
2317c0a68be4Smrg {
2318c0a68be4Smrg   struct gomp_thread *thr = gomp_thread ();
2319c0a68be4Smrg   struct gomp_task *task = thr->task;
2320c0a68be4Smrg   struct gomp_team *team = thr->ts.team;
2321c0a68be4Smrg   uintptr_t *data = task->taskgroup->reductions;
2322c0a68be4Smrg   ialias_call (GOMP_taskgroup_end) ();
2323c0a68be4Smrg   if (thr->ts.team_id == 0)
2324c0a68be4Smrg     ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2325c0a68be4Smrg   else
2326c0a68be4Smrg     htab_free ((struct htab *) data[5]);
2327c0a68be4Smrg 
2328c0a68be4Smrg   if (!cancelled)
2329c0a68be4Smrg     gomp_team_barrier_wait (&team->barrier);
2330c0a68be4Smrg }
2331c0a68be4Smrg 
23321debfc3dSmrg int
omp_in_final(void)23331debfc3dSmrg omp_in_final (void)
23341debfc3dSmrg {
23351debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
23361debfc3dSmrg   return thr->task && thr->task->final_task;
23371debfc3dSmrg }
23381debfc3dSmrg 
23391debfc3dSmrg ialias (omp_in_final)
2340