18feb0f0bSmrg /* Copyright (C) 2007-2020 Free Software Foundation, Inc.
21debfc3dSmrg Contributed by Richard Henderson <rth@redhat.com>.
31debfc3dSmrg
41debfc3dSmrg This file is part of the GNU Offloading and Multi Processing Library
51debfc3dSmrg (libgomp).
61debfc3dSmrg
71debfc3dSmrg Libgomp is free software; you can redistribute it and/or modify it
81debfc3dSmrg under the terms of the GNU General Public License as published by
91debfc3dSmrg the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg any later version.
111debfc3dSmrg
121debfc3dSmrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
141debfc3dSmrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
151debfc3dSmrg more details.
161debfc3dSmrg
171debfc3dSmrg Under Section 7 of GPL version 3, you are granted additional
181debfc3dSmrg permissions described in the GCC Runtime Library Exception, version
191debfc3dSmrg 3.1, as published by the Free Software Foundation.
201debfc3dSmrg
211debfc3dSmrg You should have received a copy of the GNU General Public License and
221debfc3dSmrg a copy of the GCC Runtime Library Exception along with this program;
231debfc3dSmrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
241debfc3dSmrg <http://www.gnu.org/licenses/>. */
251debfc3dSmrg
268feb0f0bSmrg /* This file handles the maintenance of tasks in response to task
271debfc3dSmrg creation and termination. */
281debfc3dSmrg
291debfc3dSmrg #include "libgomp.h"
301debfc3dSmrg #include <stdlib.h>
311debfc3dSmrg #include <string.h>
321debfc3dSmrg #include "gomp-constants.h"
331debfc3dSmrg
341debfc3dSmrg typedef struct gomp_task_depend_entry *hash_entry_type;
351debfc3dSmrg
361debfc3dSmrg static inline void *
htab_alloc(size_t size)371debfc3dSmrg htab_alloc (size_t size)
381debfc3dSmrg {
391debfc3dSmrg return gomp_malloc (size);
401debfc3dSmrg }
411debfc3dSmrg
421debfc3dSmrg static inline void
htab_free(void * ptr)431debfc3dSmrg htab_free (void *ptr)
441debfc3dSmrg {
451debfc3dSmrg free (ptr);
461debfc3dSmrg }
471debfc3dSmrg
481debfc3dSmrg #include "hashtab.h"
491debfc3dSmrg
501debfc3dSmrg static inline hashval_t
htab_hash(hash_entry_type element)511debfc3dSmrg htab_hash (hash_entry_type element)
521debfc3dSmrg {
531debfc3dSmrg return hash_pointer (element->addr);
541debfc3dSmrg }
551debfc3dSmrg
561debfc3dSmrg static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)571debfc3dSmrg htab_eq (hash_entry_type x, hash_entry_type y)
581debfc3dSmrg {
591debfc3dSmrg return x->addr == y->addr;
601debfc3dSmrg }
611debfc3dSmrg
621debfc3dSmrg /* Create a new task data structure. */
631debfc3dSmrg
641debfc3dSmrg void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)651debfc3dSmrg gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
661debfc3dSmrg struct gomp_task_icv *prev_icv)
671debfc3dSmrg {
681debfc3dSmrg /* It would seem that using memset here would be a win, but it turns
691debfc3dSmrg out that partially filling gomp_task allows us to keep the
701debfc3dSmrg overhead of task creation low. In the nqueens-1.c test, for a
711debfc3dSmrg sufficiently large N, we drop the overhead from 5-6% to 1%.
721debfc3dSmrg
731debfc3dSmrg Note, the nqueens-1.c test in serial mode is a good test to
741debfc3dSmrg benchmark the overhead of creating tasks as there are millions of
751debfc3dSmrg tiny tasks created that all run undeferred. */
761debfc3dSmrg task->parent = parent_task;
771debfc3dSmrg task->icv = *prev_icv;
781debfc3dSmrg task->kind = GOMP_TASK_IMPLICIT;
791debfc3dSmrg task->taskwait = NULL;
801debfc3dSmrg task->in_tied_task = false;
811debfc3dSmrg task->final_task = false;
821debfc3dSmrg task->copy_ctors_done = false;
831debfc3dSmrg task->parent_depends_on = false;
841debfc3dSmrg priority_queue_init (&task->children_queue);
851debfc3dSmrg task->taskgroup = NULL;
861debfc3dSmrg task->dependers = NULL;
871debfc3dSmrg task->depend_hash = NULL;
881debfc3dSmrg task->depend_count = 0;
891debfc3dSmrg }
901debfc3dSmrg
911debfc3dSmrg /* Clean up a task, after completing it. */
921debfc3dSmrg
931debfc3dSmrg void
gomp_end_task(void)941debfc3dSmrg gomp_end_task (void)
951debfc3dSmrg {
961debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
971debfc3dSmrg struct gomp_task *task = thr->task;
981debfc3dSmrg
991debfc3dSmrg gomp_finish_task (task);
1001debfc3dSmrg thr->task = task->parent;
1011debfc3dSmrg }
1021debfc3dSmrg
1031debfc3dSmrg /* Clear the parent field of every task in LIST. */
1041debfc3dSmrg
1051debfc3dSmrg static inline void
gomp_clear_parent_in_list(struct priority_list * list)1061debfc3dSmrg gomp_clear_parent_in_list (struct priority_list *list)
1071debfc3dSmrg {
1081debfc3dSmrg struct priority_node *p = list->tasks;
1091debfc3dSmrg if (p)
1101debfc3dSmrg do
1111debfc3dSmrg {
1121debfc3dSmrg priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
1131debfc3dSmrg p = p->next;
1141debfc3dSmrg }
1151debfc3dSmrg while (p != list->tasks);
1161debfc3dSmrg }
1171debfc3dSmrg
1181debfc3dSmrg /* Splay tree version of gomp_clear_parent_in_list.
1191debfc3dSmrg
1201debfc3dSmrg Clear the parent field of every task in NODE within SP, and free
1211debfc3dSmrg the node when done. */
1221debfc3dSmrg
1231debfc3dSmrg static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)1241debfc3dSmrg gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
1251debfc3dSmrg {
1261debfc3dSmrg if (!node)
1271debfc3dSmrg return;
1281debfc3dSmrg prio_splay_tree_node left = node->left, right = node->right;
1291debfc3dSmrg gomp_clear_parent_in_list (&node->key.l);
1301debfc3dSmrg #if _LIBGOMP_CHECKING_
1311debfc3dSmrg memset (node, 0xaf, sizeof (*node));
1321debfc3dSmrg #endif
1331debfc3dSmrg /* No need to remove the node from the tree. We're nuking
1341debfc3dSmrg everything, so just free the nodes and our caller can clear the
1351debfc3dSmrg entire splay tree. */
1361debfc3dSmrg free (node);
1371debfc3dSmrg gomp_clear_parent_in_tree (sp, left);
1381debfc3dSmrg gomp_clear_parent_in_tree (sp, right);
1391debfc3dSmrg }
1401debfc3dSmrg
1411debfc3dSmrg /* Clear the parent field of every task in Q and remove every task
1421debfc3dSmrg from Q. */
1431debfc3dSmrg
1441debfc3dSmrg static inline void
gomp_clear_parent(struct priority_queue * q)1451debfc3dSmrg gomp_clear_parent (struct priority_queue *q)
1461debfc3dSmrg {
1471debfc3dSmrg if (priority_queue_multi_p (q))
1481debfc3dSmrg {
1491debfc3dSmrg gomp_clear_parent_in_tree (&q->t, q->t.root);
1501debfc3dSmrg /* All the nodes have been cleared in gomp_clear_parent_in_tree.
1511debfc3dSmrg No need to remove anything. We can just nuke everything. */
1521debfc3dSmrg q->t.root = NULL;
1531debfc3dSmrg }
1541debfc3dSmrg else
1551debfc3dSmrg gomp_clear_parent_in_list (&q->l);
1561debfc3dSmrg }
1571debfc3dSmrg
1581debfc3dSmrg /* Helper function for GOMP_task and gomp_create_target_task.
1591debfc3dSmrg
1601debfc3dSmrg For a TASK with in/out dependencies, fill in the various dependency
1611debfc3dSmrg queues. PARENT is the parent of said task. DEPEND is as in
1621debfc3dSmrg GOMP_task. */
1631debfc3dSmrg
1641debfc3dSmrg static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)1651debfc3dSmrg gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
1661debfc3dSmrg void **depend)
1671debfc3dSmrg {
1681debfc3dSmrg size_t ndepend = (uintptr_t) depend[0];
1691debfc3dSmrg size_t i;
1701debfc3dSmrg hash_entry_type ent;
1711debfc3dSmrg
172c0a68be4Smrg if (ndepend)
173c0a68be4Smrg {
174c0a68be4Smrg /* depend[0] is total # */
175c0a68be4Smrg size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
176c0a68be4Smrg /* ndepend - nout is # of in: */
177c0a68be4Smrg for (i = 0; i < ndepend; i++)
178c0a68be4Smrg {
179c0a68be4Smrg task->depend[i].addr = depend[2 + i];
180c0a68be4Smrg task->depend[i].is_in = i >= nout;
181c0a68be4Smrg }
182c0a68be4Smrg }
183c0a68be4Smrg else
184c0a68be4Smrg {
185c0a68be4Smrg ndepend = (uintptr_t) depend[1]; /* total # */
186c0a68be4Smrg size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
187c0a68be4Smrg size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
188c0a68be4Smrg /* For now we treat mutexinoutset like out, which is compliant, but
189c0a68be4Smrg inefficient. */
190c0a68be4Smrg size_t nin = (uintptr_t) depend[4]; /* # of in: */
191c0a68be4Smrg /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
192c0a68be4Smrg size_t normal = nout + nmutexinoutset + nin;
193c0a68be4Smrg size_t n = 0;
194c0a68be4Smrg for (i = normal; i < ndepend; i++)
195c0a68be4Smrg {
196c0a68be4Smrg void **d = (void **) (uintptr_t) depend[5 + i];
197c0a68be4Smrg switch ((uintptr_t) d[1])
198c0a68be4Smrg {
199c0a68be4Smrg case GOMP_DEPEND_OUT:
200c0a68be4Smrg case GOMP_DEPEND_INOUT:
201c0a68be4Smrg case GOMP_DEPEND_MUTEXINOUTSET:
202c0a68be4Smrg break;
203c0a68be4Smrg case GOMP_DEPEND_IN:
204c0a68be4Smrg continue;
205c0a68be4Smrg default:
206c0a68be4Smrg gomp_fatal ("unknown omp_depend_t dependence type %d",
207c0a68be4Smrg (int) (uintptr_t) d[1]);
208c0a68be4Smrg }
209c0a68be4Smrg task->depend[n].addr = d[0];
210c0a68be4Smrg task->depend[n++].is_in = 0;
211c0a68be4Smrg }
212c0a68be4Smrg for (i = 0; i < normal; i++)
213c0a68be4Smrg {
214c0a68be4Smrg task->depend[n].addr = depend[5 + i];
215c0a68be4Smrg task->depend[n++].is_in = i >= nout + nmutexinoutset;
216c0a68be4Smrg }
217c0a68be4Smrg for (i = normal; i < ndepend; i++)
218c0a68be4Smrg {
219c0a68be4Smrg void **d = (void **) (uintptr_t) depend[5 + i];
220c0a68be4Smrg if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
221c0a68be4Smrg continue;
222c0a68be4Smrg task->depend[n].addr = d[0];
223c0a68be4Smrg task->depend[n++].is_in = 1;
224c0a68be4Smrg }
225c0a68be4Smrg }
2261debfc3dSmrg task->depend_count = ndepend;
2271debfc3dSmrg task->num_dependees = 0;
2281debfc3dSmrg if (parent->depend_hash == NULL)
2291debfc3dSmrg parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
2301debfc3dSmrg for (i = 0; i < ndepend; i++)
2311debfc3dSmrg {
2321debfc3dSmrg task->depend[i].next = NULL;
2331debfc3dSmrg task->depend[i].prev = NULL;
2341debfc3dSmrg task->depend[i].task = task;
2351debfc3dSmrg task->depend[i].redundant = false;
2361debfc3dSmrg task->depend[i].redundant_out = false;
2371debfc3dSmrg
2381debfc3dSmrg hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
2391debfc3dSmrg &task->depend[i], INSERT);
2401debfc3dSmrg hash_entry_type out = NULL, last = NULL;
2411debfc3dSmrg if (*slot)
2421debfc3dSmrg {
2431debfc3dSmrg /* If multiple depends on the same task are the same, all but the
2441debfc3dSmrg first one are redundant. As inout/out come first, if any of them
2451debfc3dSmrg is inout/out, it will win, which is the right semantics. */
2461debfc3dSmrg if ((*slot)->task == task)
2471debfc3dSmrg {
2481debfc3dSmrg task->depend[i].redundant = true;
2491debfc3dSmrg continue;
2501debfc3dSmrg }
2511debfc3dSmrg for (ent = *slot; ent; ent = ent->next)
2521debfc3dSmrg {
2531debfc3dSmrg if (ent->redundant_out)
2541debfc3dSmrg break;
2551debfc3dSmrg
2561debfc3dSmrg last = ent;
2571debfc3dSmrg
2581debfc3dSmrg /* depend(in:...) doesn't depend on earlier depend(in:...). */
259c0a68be4Smrg if (task->depend[i].is_in && ent->is_in)
2601debfc3dSmrg continue;
2611debfc3dSmrg
2621debfc3dSmrg if (!ent->is_in)
2631debfc3dSmrg out = ent;
2641debfc3dSmrg
2651debfc3dSmrg struct gomp_task *tsk = ent->task;
2661debfc3dSmrg if (tsk->dependers == NULL)
2671debfc3dSmrg {
2681debfc3dSmrg tsk->dependers
2691debfc3dSmrg = gomp_malloc (sizeof (struct gomp_dependers_vec)
2701debfc3dSmrg + 6 * sizeof (struct gomp_task *));
2711debfc3dSmrg tsk->dependers->n_elem = 1;
2721debfc3dSmrg tsk->dependers->allocated = 6;
2731debfc3dSmrg tsk->dependers->elem[0] = task;
2741debfc3dSmrg task->num_dependees++;
2751debfc3dSmrg continue;
2761debfc3dSmrg }
2771debfc3dSmrg /* We already have some other dependency on tsk from earlier
2781debfc3dSmrg depend clause. */
2791debfc3dSmrg else if (tsk->dependers->n_elem
2801debfc3dSmrg && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
2811debfc3dSmrg == task))
2821debfc3dSmrg continue;
2831debfc3dSmrg else if (tsk->dependers->n_elem == tsk->dependers->allocated)
2841debfc3dSmrg {
2851debfc3dSmrg tsk->dependers->allocated
2861debfc3dSmrg = tsk->dependers->allocated * 2 + 2;
2871debfc3dSmrg tsk->dependers
2881debfc3dSmrg = gomp_realloc (tsk->dependers,
2891debfc3dSmrg sizeof (struct gomp_dependers_vec)
2901debfc3dSmrg + (tsk->dependers->allocated
2911debfc3dSmrg * sizeof (struct gomp_task *)));
2921debfc3dSmrg }
2931debfc3dSmrg tsk->dependers->elem[tsk->dependers->n_elem++] = task;
2941debfc3dSmrg task->num_dependees++;
2951debfc3dSmrg }
2961debfc3dSmrg task->depend[i].next = *slot;
2971debfc3dSmrg (*slot)->prev = &task->depend[i];
2981debfc3dSmrg }
2991debfc3dSmrg *slot = &task->depend[i];
3001debfc3dSmrg
3011debfc3dSmrg /* There is no need to store more than one depend({,in}out:) task per
3021debfc3dSmrg address in the hash table chain for the purpose of creation of
3031debfc3dSmrg deferred tasks, because each out depends on all earlier outs, thus it
3041debfc3dSmrg is enough to record just the last depend({,in}out:). For depend(in:),
3051debfc3dSmrg we need to keep all of the previous ones not terminated yet, because
3061debfc3dSmrg a later depend({,in}out:) might need to depend on all of them. So, if
3071debfc3dSmrg the new task's clause is depend({,in}out:), we know there is at most
3081debfc3dSmrg one other depend({,in}out:) clause in the list (out). For
3091debfc3dSmrg non-deferred tasks we want to see all outs, so they are moved to the
3101debfc3dSmrg end of the chain, after first redundant_out entry all following
3111debfc3dSmrg entries should be redundant_out. */
3121debfc3dSmrg if (!task->depend[i].is_in && out)
3131debfc3dSmrg {
3141debfc3dSmrg if (out != last)
3151debfc3dSmrg {
3161debfc3dSmrg out->next->prev = out->prev;
3171debfc3dSmrg out->prev->next = out->next;
3181debfc3dSmrg out->next = last->next;
3191debfc3dSmrg out->prev = last;
3201debfc3dSmrg last->next = out;
3211debfc3dSmrg if (out->next)
3221debfc3dSmrg out->next->prev = out;
3231debfc3dSmrg }
3241debfc3dSmrg out->redundant_out = true;
3251debfc3dSmrg }
3261debfc3dSmrg }
3271debfc3dSmrg }
3281debfc3dSmrg
3291debfc3dSmrg /* Called when encountering an explicit task directive. If IF_CLAUSE is
3301debfc3dSmrg false, then we must not delay in executing the task. If UNTIED is true,
3311debfc3dSmrg then the task may be executed by any member of the team.
3321debfc3dSmrg
3331debfc3dSmrg DEPEND is an array containing:
334c0a68be4Smrg if depend[0] is non-zero, then:
3351debfc3dSmrg depend[0]: number of depend elements.
336c0a68be4Smrg depend[1]: number of depend elements of type "out/inout".
337c0a68be4Smrg depend[2..N+1]: address of [1..N]th depend element.
338c0a68be4Smrg otherwise, when depend[0] is zero, then:
339c0a68be4Smrg depend[1]: number of depend elements.
340c0a68be4Smrg depend[2]: number of depend elements of type "out/inout".
341c0a68be4Smrg depend[3]: number of depend elements of type "mutexinoutset".
342c0a68be4Smrg depend[4]: number of depend elements of type "in".
343c0a68be4Smrg depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
344c0a68be4Smrg depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
345c0a68be4Smrg omp_depend_t objects. */
3461debfc3dSmrg
3471debfc3dSmrg void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)3481debfc3dSmrg GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
3491debfc3dSmrg long arg_size, long arg_align, bool if_clause, unsigned flags,
3501debfc3dSmrg void **depend, int priority)
3511debfc3dSmrg {
3521debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
3531debfc3dSmrg struct gomp_team *team = thr->ts.team;
3541debfc3dSmrg
3551debfc3dSmrg #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
3561debfc3dSmrg /* If pthread_mutex_* is used for omp_*lock*, then each task must be
3571debfc3dSmrg tied to one thread all the time. This means UNTIED tasks must be
3581debfc3dSmrg tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
3591debfc3dSmrg might be running on different thread than FN. */
3601debfc3dSmrg if (cpyfn)
3611debfc3dSmrg if_clause = false;
3621debfc3dSmrg flags &= ~GOMP_TASK_FLAG_UNTIED;
3631debfc3dSmrg #endif
3641debfc3dSmrg
3651debfc3dSmrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
366c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0) && team)
367c0a68be4Smrg {
368c0a68be4Smrg if (gomp_team_barrier_cancelled (&team->barrier))
3691debfc3dSmrg return;
370c0a68be4Smrg if (thr->task->taskgroup)
371c0a68be4Smrg {
372c0a68be4Smrg if (thr->task->taskgroup->cancelled)
373c0a68be4Smrg return;
374c0a68be4Smrg if (thr->task->taskgroup->workshare
375c0a68be4Smrg && thr->task->taskgroup->prev
376c0a68be4Smrg && thr->task->taskgroup->prev->cancelled)
377c0a68be4Smrg return;
378c0a68be4Smrg }
379c0a68be4Smrg }
3801debfc3dSmrg
3811debfc3dSmrg if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
3821debfc3dSmrg priority = 0;
3831debfc3dSmrg else if (priority > gomp_max_task_priority_var)
3841debfc3dSmrg priority = gomp_max_task_priority_var;
3851debfc3dSmrg
3861debfc3dSmrg if (!if_clause || team == NULL
3871debfc3dSmrg || (thr->task && thr->task->final_task)
3881debfc3dSmrg || team->task_count > 64 * team->nthreads)
3891debfc3dSmrg {
3901debfc3dSmrg struct gomp_task task;
3911debfc3dSmrg
3921debfc3dSmrg /* If there are depend clauses and earlier deferred sibling tasks
3931debfc3dSmrg with depend clauses, check if there isn't a dependency. If there
3941debfc3dSmrg is, we need to wait for them. There is no need to handle
3951debfc3dSmrg depend clauses for non-deferred tasks other than this, because
3961debfc3dSmrg the parent task is suspended until the child task finishes and thus
3971debfc3dSmrg it can't start further child tasks. */
3981debfc3dSmrg if ((flags & GOMP_TASK_FLAG_DEPEND)
3991debfc3dSmrg && thr->task && thr->task->depend_hash)
4001debfc3dSmrg gomp_task_maybe_wait_for_dependencies (depend);
4011debfc3dSmrg
4021debfc3dSmrg gomp_init_task (&task, thr->task, gomp_icv (false));
4031debfc3dSmrg task.kind = GOMP_TASK_UNDEFERRED;
4041debfc3dSmrg task.final_task = (thr->task && thr->task->final_task)
4051debfc3dSmrg || (flags & GOMP_TASK_FLAG_FINAL);
4061debfc3dSmrg task.priority = priority;
4071debfc3dSmrg if (thr->task)
4081debfc3dSmrg {
4091debfc3dSmrg task.in_tied_task = thr->task->in_tied_task;
4101debfc3dSmrg task.taskgroup = thr->task->taskgroup;
4111debfc3dSmrg }
4121debfc3dSmrg thr->task = &task;
4131debfc3dSmrg if (__builtin_expect (cpyfn != NULL, 0))
4141debfc3dSmrg {
4151debfc3dSmrg char buf[arg_size + arg_align - 1];
4161debfc3dSmrg char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
4171debfc3dSmrg & ~(uintptr_t) (arg_align - 1));
4181debfc3dSmrg cpyfn (arg, data);
4191debfc3dSmrg fn (arg);
4201debfc3dSmrg }
4211debfc3dSmrg else
4221debfc3dSmrg fn (data);
4231debfc3dSmrg /* Access to "children" is normally done inside a task_lock
4241debfc3dSmrg mutex region, but the only way this particular task.children
4251debfc3dSmrg can be set is if this thread's task work function (fn)
4261debfc3dSmrg creates children. So since the setter is *this* thread, we
4271debfc3dSmrg need no barriers here when testing for non-NULL. We can have
4281debfc3dSmrg task.children set by the current thread then changed by a
4291debfc3dSmrg child thread, but seeing a stale non-NULL value is not a
4301debfc3dSmrg problem. Once past the task_lock acquisition, this thread
4311debfc3dSmrg will see the real value of task.children. */
4321debfc3dSmrg if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
4331debfc3dSmrg {
4341debfc3dSmrg gomp_mutex_lock (&team->task_lock);
4351debfc3dSmrg gomp_clear_parent (&task.children_queue);
4361debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
4371debfc3dSmrg }
4381debfc3dSmrg gomp_end_task ();
4391debfc3dSmrg }
4401debfc3dSmrg else
4411debfc3dSmrg {
4421debfc3dSmrg struct gomp_task *task;
4431debfc3dSmrg struct gomp_task *parent = thr->task;
4441debfc3dSmrg struct gomp_taskgroup *taskgroup = parent->taskgroup;
4451debfc3dSmrg char *arg;
4461debfc3dSmrg bool do_wake;
4471debfc3dSmrg size_t depend_size = 0;
4481debfc3dSmrg
4491debfc3dSmrg if (flags & GOMP_TASK_FLAG_DEPEND)
450c0a68be4Smrg depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
4511debfc3dSmrg * sizeof (struct gomp_task_depend_entry));
4521debfc3dSmrg task = gomp_malloc (sizeof (*task) + depend_size
4531debfc3dSmrg + arg_size + arg_align - 1);
4541debfc3dSmrg arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
4551debfc3dSmrg & ~(uintptr_t) (arg_align - 1));
4561debfc3dSmrg gomp_init_task (task, parent, gomp_icv (false));
4571debfc3dSmrg task->priority = priority;
4581debfc3dSmrg task->kind = GOMP_TASK_UNDEFERRED;
4591debfc3dSmrg task->in_tied_task = parent->in_tied_task;
4601debfc3dSmrg task->taskgroup = taskgroup;
4611debfc3dSmrg thr->task = task;
4621debfc3dSmrg if (cpyfn)
4631debfc3dSmrg {
4641debfc3dSmrg cpyfn (arg, data);
4651debfc3dSmrg task->copy_ctors_done = true;
4661debfc3dSmrg }
4671debfc3dSmrg else
4681debfc3dSmrg memcpy (arg, data, arg_size);
4691debfc3dSmrg thr->task = parent;
4701debfc3dSmrg task->kind = GOMP_TASK_WAITING;
4711debfc3dSmrg task->fn = fn;
4721debfc3dSmrg task->fn_data = arg;
4731debfc3dSmrg task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
4741debfc3dSmrg gomp_mutex_lock (&team->task_lock);
4751debfc3dSmrg /* If parallel or taskgroup has been cancelled, don't start new
4761debfc3dSmrg tasks. */
477c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0)
478c0a68be4Smrg && !task->copy_ctors_done)
4791debfc3dSmrg {
480c0a68be4Smrg if (gomp_team_barrier_cancelled (&team->barrier))
481c0a68be4Smrg {
482c0a68be4Smrg do_cancel:
4831debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
4841debfc3dSmrg gomp_finish_task (task);
4851debfc3dSmrg free (task);
4861debfc3dSmrg return;
4871debfc3dSmrg }
4881debfc3dSmrg if (taskgroup)
489c0a68be4Smrg {
490c0a68be4Smrg if (taskgroup->cancelled)
491c0a68be4Smrg goto do_cancel;
492c0a68be4Smrg if (taskgroup->workshare
493c0a68be4Smrg && taskgroup->prev
494c0a68be4Smrg && taskgroup->prev->cancelled)
495c0a68be4Smrg goto do_cancel;
496c0a68be4Smrg }
497c0a68be4Smrg }
498c0a68be4Smrg if (taskgroup)
4991debfc3dSmrg taskgroup->num_children++;
5001debfc3dSmrg if (depend_size)
5011debfc3dSmrg {
5021debfc3dSmrg gomp_task_handle_depend (task, parent, depend);
5031debfc3dSmrg if (task->num_dependees)
5041debfc3dSmrg {
5051debfc3dSmrg /* Tasks that depend on other tasks are not put into the
5061debfc3dSmrg various waiting queues, so we are done for now. Said
5071debfc3dSmrg tasks are instead put into the queues via
5081debfc3dSmrg gomp_task_run_post_handle_dependers() after their
5091debfc3dSmrg dependencies have been satisfied. After which, they
5101debfc3dSmrg can be picked up by the various scheduling
5111debfc3dSmrg points. */
5121debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
5131debfc3dSmrg return;
5141debfc3dSmrg }
5151debfc3dSmrg }
5161debfc3dSmrg
5171debfc3dSmrg priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
5181debfc3dSmrg task, priority,
5191debfc3dSmrg PRIORITY_INSERT_BEGIN,
5201debfc3dSmrg /*adjust_parent_depends_on=*/false,
5211debfc3dSmrg task->parent_depends_on);
5221debfc3dSmrg if (taskgroup)
5231debfc3dSmrg priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
5241debfc3dSmrg task, priority,
5251debfc3dSmrg PRIORITY_INSERT_BEGIN,
5261debfc3dSmrg /*adjust_parent_depends_on=*/false,
5271debfc3dSmrg task->parent_depends_on);
5281debfc3dSmrg
5291debfc3dSmrg priority_queue_insert (PQ_TEAM, &team->task_queue,
5301debfc3dSmrg task, priority,
5311debfc3dSmrg PRIORITY_INSERT_END,
5321debfc3dSmrg /*adjust_parent_depends_on=*/false,
5331debfc3dSmrg task->parent_depends_on);
5341debfc3dSmrg
5351debfc3dSmrg ++team->task_count;
5361debfc3dSmrg ++team->task_queued_count;
5371debfc3dSmrg gomp_team_barrier_set_task_pending (&team->barrier);
5381debfc3dSmrg do_wake = team->task_running_count + !parent->in_tied_task
5391debfc3dSmrg < team->nthreads;
5401debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
5411debfc3dSmrg if (do_wake)
5421debfc3dSmrg gomp_team_barrier_wake (&team->barrier, 1);
5431debfc3dSmrg }
5441debfc3dSmrg }
5451debfc3dSmrg
5461debfc3dSmrg ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)5471debfc3dSmrg ialias (GOMP_taskgroup_end)
548c0a68be4Smrg ialias (GOMP_taskgroup_reduction_register)
5491debfc3dSmrg
5501debfc3dSmrg #define TYPE long
5511debfc3dSmrg #define UTYPE unsigned long
5521debfc3dSmrg #define TYPE_is_long 1
5531debfc3dSmrg #include "taskloop.c"
5541debfc3dSmrg #undef TYPE
5551debfc3dSmrg #undef UTYPE
5561debfc3dSmrg #undef TYPE_is_long
5571debfc3dSmrg
5581debfc3dSmrg #define TYPE unsigned long long
5591debfc3dSmrg #define UTYPE TYPE
5601debfc3dSmrg #define GOMP_taskloop GOMP_taskloop_ull
5611debfc3dSmrg #include "taskloop.c"
5621debfc3dSmrg #undef TYPE
5631debfc3dSmrg #undef UTYPE
5641debfc3dSmrg #undef GOMP_taskloop
5651debfc3dSmrg
5661debfc3dSmrg static void inline
5671debfc3dSmrg priority_queue_move_task_first (enum priority_queue_type type,
5681debfc3dSmrg struct priority_queue *head,
5691debfc3dSmrg struct gomp_task *task)
5701debfc3dSmrg {
5711debfc3dSmrg #if _LIBGOMP_CHECKING_
5721debfc3dSmrg if (!priority_queue_task_in_queue_p (type, head, task))
5731debfc3dSmrg gomp_fatal ("Attempt to move first missing task %p", task);
5741debfc3dSmrg #endif
5751debfc3dSmrg struct priority_list *list;
5761debfc3dSmrg if (priority_queue_multi_p (head))
5771debfc3dSmrg {
5781debfc3dSmrg list = priority_queue_lookup_priority (head, task->priority);
5791debfc3dSmrg #if _LIBGOMP_CHECKING_
5801debfc3dSmrg if (!list)
5811debfc3dSmrg gomp_fatal ("Unable to find priority %d", task->priority);
5821debfc3dSmrg #endif
5831debfc3dSmrg }
5841debfc3dSmrg else
5851debfc3dSmrg list = &head->l;
5861debfc3dSmrg priority_list_remove (list, task_to_priority_node (type, task), 0);
5871debfc3dSmrg priority_list_insert (type, list, task, task->priority,
5881debfc3dSmrg PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
5891debfc3dSmrg task->parent_depends_on);
5901debfc3dSmrg }
5911debfc3dSmrg
5921debfc3dSmrg /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
5931debfc3dSmrg with team->task_lock held, or is executed in the thread that called
5941debfc3dSmrg gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
5951debfc3dSmrg run before it acquires team->task_lock. */
5961debfc3dSmrg
5971debfc3dSmrg static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)5981debfc3dSmrg gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
5991debfc3dSmrg {
6001debfc3dSmrg struct gomp_task *parent = task->parent;
6011debfc3dSmrg if (parent)
6021debfc3dSmrg priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
6031debfc3dSmrg task);
6041debfc3dSmrg
6051debfc3dSmrg struct gomp_taskgroup *taskgroup = task->taskgroup;
6061debfc3dSmrg if (taskgroup)
6071debfc3dSmrg priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
6081debfc3dSmrg task);
6091debfc3dSmrg
6101debfc3dSmrg priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
6111debfc3dSmrg PRIORITY_INSERT_BEGIN, false,
6121debfc3dSmrg task->parent_depends_on);
6131debfc3dSmrg task->kind = GOMP_TASK_WAITING;
6141debfc3dSmrg if (parent && parent->taskwait)
6151debfc3dSmrg {
6161debfc3dSmrg if (parent->taskwait->in_taskwait)
6171debfc3dSmrg {
6181debfc3dSmrg /* One more task has had its dependencies met.
6191debfc3dSmrg Inform any waiters. */
6201debfc3dSmrg parent->taskwait->in_taskwait = false;
6211debfc3dSmrg gomp_sem_post (&parent->taskwait->taskwait_sem);
6221debfc3dSmrg }
6231debfc3dSmrg else if (parent->taskwait->in_depend_wait)
6241debfc3dSmrg {
6251debfc3dSmrg /* One more task has had its dependencies met.
6261debfc3dSmrg Inform any waiters. */
6271debfc3dSmrg parent->taskwait->in_depend_wait = false;
6281debfc3dSmrg gomp_sem_post (&parent->taskwait->taskwait_sem);
6291debfc3dSmrg }
6301debfc3dSmrg }
6311debfc3dSmrg if (taskgroup && taskgroup->in_taskgroup_wait)
6321debfc3dSmrg {
6331debfc3dSmrg /* One more task has had its dependencies met.
6341debfc3dSmrg Inform any waiters. */
6351debfc3dSmrg taskgroup->in_taskgroup_wait = false;
6361debfc3dSmrg gomp_sem_post (&taskgroup->taskgroup_sem);
6371debfc3dSmrg }
6381debfc3dSmrg
6391debfc3dSmrg ++team->task_queued_count;
6401debfc3dSmrg gomp_team_barrier_set_task_pending (&team->barrier);
6411debfc3dSmrg /* I'm afraid this can't be done after releasing team->task_lock,
6421debfc3dSmrg as gomp_target_task_completion is run from unrelated thread and
6431debfc3dSmrg therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
6441debfc3dSmrg the team could be gone already. */
6451debfc3dSmrg if (team->nthreads > team->task_running_count)
6461debfc3dSmrg gomp_team_barrier_wake (&team->barrier, 1);
6471debfc3dSmrg }
6481debfc3dSmrg
6491debfc3dSmrg /* Signal that a target task TTASK has completed the asynchronously
6501debfc3dSmrg running phase and should be requeued as a task to handle the
6511debfc3dSmrg variable unmapping. */
6521debfc3dSmrg
6531debfc3dSmrg void
GOMP_PLUGIN_target_task_completion(void * data)6541debfc3dSmrg GOMP_PLUGIN_target_task_completion (void *data)
6551debfc3dSmrg {
6561debfc3dSmrg struct gomp_target_task *ttask = (struct gomp_target_task *) data;
6571debfc3dSmrg struct gomp_task *task = ttask->task;
6581debfc3dSmrg struct gomp_team *team = ttask->team;
6591debfc3dSmrg
6601debfc3dSmrg gomp_mutex_lock (&team->task_lock);
6611debfc3dSmrg if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
6621debfc3dSmrg {
6631debfc3dSmrg ttask->state = GOMP_TARGET_TASK_FINISHED;
6641debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
6651debfc3dSmrg return;
6661debfc3dSmrg }
6671debfc3dSmrg ttask->state = GOMP_TARGET_TASK_FINISHED;
6681debfc3dSmrg gomp_target_task_completion (team, task);
6691debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
6701debfc3dSmrg }
6711debfc3dSmrg
6721debfc3dSmrg static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
6731debfc3dSmrg
6741debfc3dSmrg /* Called for nowait target tasks. */
6751debfc3dSmrg
6761debfc3dSmrg bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)6771debfc3dSmrg gomp_create_target_task (struct gomp_device_descr *devicep,
6781debfc3dSmrg void (*fn) (void *), size_t mapnum, void **hostaddrs,
6791debfc3dSmrg size_t *sizes, unsigned short *kinds,
6801debfc3dSmrg unsigned int flags, void **depend, void **args,
6811debfc3dSmrg enum gomp_target_task_state state)
6821debfc3dSmrg {
6831debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
6841debfc3dSmrg struct gomp_team *team = thr->ts.team;
6851debfc3dSmrg
6861debfc3dSmrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
687c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0) && team)
688c0a68be4Smrg {
689c0a68be4Smrg if (gomp_team_barrier_cancelled (&team->barrier))
6901debfc3dSmrg return true;
691c0a68be4Smrg if (thr->task->taskgroup)
692c0a68be4Smrg {
693c0a68be4Smrg if (thr->task->taskgroup->cancelled)
694c0a68be4Smrg return true;
695c0a68be4Smrg if (thr->task->taskgroup->workshare
696c0a68be4Smrg && thr->task->taskgroup->prev
697c0a68be4Smrg && thr->task->taskgroup->prev->cancelled)
698c0a68be4Smrg return true;
699c0a68be4Smrg }
700c0a68be4Smrg }
7011debfc3dSmrg
7021debfc3dSmrg struct gomp_target_task *ttask;
7031debfc3dSmrg struct gomp_task *task;
7041debfc3dSmrg struct gomp_task *parent = thr->task;
7051debfc3dSmrg struct gomp_taskgroup *taskgroup = parent->taskgroup;
7061debfc3dSmrg bool do_wake;
7071debfc3dSmrg size_t depend_size = 0;
7081debfc3dSmrg uintptr_t depend_cnt = 0;
7091debfc3dSmrg size_t tgt_align = 0, tgt_size = 0;
7101debfc3dSmrg
7111debfc3dSmrg if (depend != NULL)
7121debfc3dSmrg {
713c0a68be4Smrg depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
7141debfc3dSmrg depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
7151debfc3dSmrg }
7161debfc3dSmrg if (fn)
7171debfc3dSmrg {
7181debfc3dSmrg /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
7191debfc3dSmrg firstprivate on the target task. */
7201debfc3dSmrg size_t i;
7211debfc3dSmrg for (i = 0; i < mapnum; i++)
7221debfc3dSmrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
7231debfc3dSmrg {
7241debfc3dSmrg size_t align = (size_t) 1 << (kinds[i] >> 8);
7251debfc3dSmrg if (tgt_align < align)
7261debfc3dSmrg tgt_align = align;
7271debfc3dSmrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
7281debfc3dSmrg tgt_size += sizes[i];
7291debfc3dSmrg }
7301debfc3dSmrg if (tgt_align)
7311debfc3dSmrg tgt_size += tgt_align - 1;
7321debfc3dSmrg else
7331debfc3dSmrg tgt_size = 0;
7341debfc3dSmrg }
7351debfc3dSmrg
7361debfc3dSmrg task = gomp_malloc (sizeof (*task) + depend_size
7371debfc3dSmrg + sizeof (*ttask)
7381debfc3dSmrg + mapnum * (sizeof (void *) + sizeof (size_t)
7391debfc3dSmrg + sizeof (unsigned short))
7401debfc3dSmrg + tgt_size);
7411debfc3dSmrg gomp_init_task (task, parent, gomp_icv (false));
7421debfc3dSmrg task->priority = 0;
7431debfc3dSmrg task->kind = GOMP_TASK_WAITING;
7441debfc3dSmrg task->in_tied_task = parent->in_tied_task;
7451debfc3dSmrg task->taskgroup = taskgroup;
7461debfc3dSmrg ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
7471debfc3dSmrg ttask->devicep = devicep;
7481debfc3dSmrg ttask->fn = fn;
7491debfc3dSmrg ttask->mapnum = mapnum;
7501debfc3dSmrg ttask->args = args;
7511debfc3dSmrg memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
7521debfc3dSmrg ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
7531debfc3dSmrg memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
7541debfc3dSmrg ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
7551debfc3dSmrg memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
7561debfc3dSmrg if (tgt_align)
7571debfc3dSmrg {
7581debfc3dSmrg char *tgt = (char *) &ttask->kinds[mapnum];
7591debfc3dSmrg size_t i;
7601debfc3dSmrg uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
7611debfc3dSmrg if (al)
7621debfc3dSmrg tgt += tgt_align - al;
7631debfc3dSmrg tgt_size = 0;
7641debfc3dSmrg for (i = 0; i < mapnum; i++)
7651debfc3dSmrg if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
7661debfc3dSmrg {
7671debfc3dSmrg size_t align = (size_t) 1 << (kinds[i] >> 8);
7681debfc3dSmrg tgt_size = (tgt_size + align - 1) & ~(align - 1);
7691debfc3dSmrg memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
7701debfc3dSmrg ttask->hostaddrs[i] = tgt + tgt_size;
7711debfc3dSmrg tgt_size = tgt_size + sizes[i];
7721debfc3dSmrg }
7731debfc3dSmrg }
7741debfc3dSmrg ttask->flags = flags;
7751debfc3dSmrg ttask->state = state;
7761debfc3dSmrg ttask->task = task;
7771debfc3dSmrg ttask->team = team;
7781debfc3dSmrg task->fn = NULL;
7791debfc3dSmrg task->fn_data = ttask;
7801debfc3dSmrg task->final_task = 0;
7811debfc3dSmrg gomp_mutex_lock (&team->task_lock);
7821debfc3dSmrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
783c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0))
7841debfc3dSmrg {
785c0a68be4Smrg if (gomp_team_barrier_cancelled (&team->barrier))
786c0a68be4Smrg {
787c0a68be4Smrg do_cancel:
7881debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
7891debfc3dSmrg gomp_finish_task (task);
7901debfc3dSmrg free (task);
7911debfc3dSmrg return true;
7921debfc3dSmrg }
793c0a68be4Smrg if (taskgroup)
794c0a68be4Smrg {
795c0a68be4Smrg if (taskgroup->cancelled)
796c0a68be4Smrg goto do_cancel;
797c0a68be4Smrg if (taskgroup->workshare
798c0a68be4Smrg && taskgroup->prev
799c0a68be4Smrg && taskgroup->prev->cancelled)
800c0a68be4Smrg goto do_cancel;
801c0a68be4Smrg }
802c0a68be4Smrg }
8031debfc3dSmrg if (depend_size)
8041debfc3dSmrg {
8051debfc3dSmrg gomp_task_handle_depend (task, parent, depend);
8061debfc3dSmrg if (task->num_dependees)
8071debfc3dSmrg {
8081debfc3dSmrg if (taskgroup)
8091debfc3dSmrg taskgroup->num_children++;
8101debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
8111debfc3dSmrg return true;
8121debfc3dSmrg }
8131debfc3dSmrg }
8141debfc3dSmrg if (state == GOMP_TARGET_TASK_DATA)
8151debfc3dSmrg {
8161debfc3dSmrg gomp_task_run_post_handle_depend_hash (task);
8171debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
8181debfc3dSmrg gomp_finish_task (task);
8191debfc3dSmrg free (task);
8201debfc3dSmrg return false;
8211debfc3dSmrg }
8221debfc3dSmrg if (taskgroup)
8231debfc3dSmrg taskgroup->num_children++;
8241debfc3dSmrg /* For async offloading, if we don't need to wait for dependencies,
8251debfc3dSmrg run the gomp_target_task_fn right away, essentially schedule the
8261debfc3dSmrg mapping part of the task in the current thread. */
8271debfc3dSmrg if (devicep != NULL
8281debfc3dSmrg && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
8291debfc3dSmrg {
8301debfc3dSmrg priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
8311debfc3dSmrg PRIORITY_INSERT_END,
8321debfc3dSmrg /*adjust_parent_depends_on=*/false,
8331debfc3dSmrg task->parent_depends_on);
8341debfc3dSmrg if (taskgroup)
8351debfc3dSmrg priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
8361debfc3dSmrg task, 0, PRIORITY_INSERT_END,
8371debfc3dSmrg /*adjust_parent_depends_on=*/false,
8381debfc3dSmrg task->parent_depends_on);
8391debfc3dSmrg task->pnode[PQ_TEAM].next = NULL;
8401debfc3dSmrg task->pnode[PQ_TEAM].prev = NULL;
8411debfc3dSmrg task->kind = GOMP_TASK_TIED;
8421debfc3dSmrg ++team->task_count;
8431debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
8441debfc3dSmrg
8451debfc3dSmrg thr->task = task;
8461debfc3dSmrg gomp_target_task_fn (task->fn_data);
8471debfc3dSmrg thr->task = parent;
8481debfc3dSmrg
8491debfc3dSmrg gomp_mutex_lock (&team->task_lock);
8501debfc3dSmrg task->kind = GOMP_TASK_ASYNC_RUNNING;
8511debfc3dSmrg /* If GOMP_PLUGIN_target_task_completion has run already
8521debfc3dSmrg in between gomp_target_task_fn and the mutex lock,
8531debfc3dSmrg perform the requeuing here. */
8541debfc3dSmrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
8551debfc3dSmrg gomp_target_task_completion (team, task);
8561debfc3dSmrg else
8571debfc3dSmrg ttask->state = GOMP_TARGET_TASK_RUNNING;
8581debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
8591debfc3dSmrg return true;
8601debfc3dSmrg }
8611debfc3dSmrg priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
8621debfc3dSmrg PRIORITY_INSERT_BEGIN,
8631debfc3dSmrg /*adjust_parent_depends_on=*/false,
8641debfc3dSmrg task->parent_depends_on);
8651debfc3dSmrg if (taskgroup)
8661debfc3dSmrg priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
8671debfc3dSmrg PRIORITY_INSERT_BEGIN,
8681debfc3dSmrg /*adjust_parent_depends_on=*/false,
8691debfc3dSmrg task->parent_depends_on);
8701debfc3dSmrg priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
8711debfc3dSmrg PRIORITY_INSERT_END,
8721debfc3dSmrg /*adjust_parent_depends_on=*/false,
8731debfc3dSmrg task->parent_depends_on);
8741debfc3dSmrg ++team->task_count;
8751debfc3dSmrg ++team->task_queued_count;
8761debfc3dSmrg gomp_team_barrier_set_task_pending (&team->barrier);
8771debfc3dSmrg do_wake = team->task_running_count + !parent->in_tied_task
8781debfc3dSmrg < team->nthreads;
8791debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
8801debfc3dSmrg if (do_wake)
8811debfc3dSmrg gomp_team_barrier_wake (&team->barrier, 1);
8821debfc3dSmrg return true;
8831debfc3dSmrg }
8841debfc3dSmrg
8851debfc3dSmrg /* Given a parent_depends_on task in LIST, move it to the front of its
8861debfc3dSmrg priority so it is run as soon as possible.
8871debfc3dSmrg
8881debfc3dSmrg Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
8891debfc3dSmrg
8901debfc3dSmrg We rearrange the queue such that all parent_depends_on tasks are
8911debfc3dSmrg first, and last_parent_depends_on points to the last such task we
8921debfc3dSmrg rearranged. For example, given the following tasks in a queue
8931debfc3dSmrg where PD[123] are the parent_depends_on tasks:
8941debfc3dSmrg
8951debfc3dSmrg task->children
8961debfc3dSmrg |
8971debfc3dSmrg V
8981debfc3dSmrg C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
8991debfc3dSmrg
9001debfc3dSmrg We rearrange such that:
9011debfc3dSmrg
9021debfc3dSmrg task->children
9031debfc3dSmrg | +--- last_parent_depends_on
9041debfc3dSmrg | |
9051debfc3dSmrg V V
9061debfc3dSmrg PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
9071debfc3dSmrg
9081debfc3dSmrg static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)9091debfc3dSmrg priority_list_upgrade_task (struct priority_list *list,
9101debfc3dSmrg struct priority_node *node)
9111debfc3dSmrg {
9121debfc3dSmrg struct priority_node *last_parent_depends_on
9131debfc3dSmrg = list->last_parent_depends_on;
9141debfc3dSmrg if (last_parent_depends_on)
9151debfc3dSmrg {
9161debfc3dSmrg node->prev->next = node->next;
9171debfc3dSmrg node->next->prev = node->prev;
9181debfc3dSmrg node->prev = last_parent_depends_on;
9191debfc3dSmrg node->next = last_parent_depends_on->next;
9201debfc3dSmrg node->prev->next = node;
9211debfc3dSmrg node->next->prev = node;
9221debfc3dSmrg }
9231debfc3dSmrg else if (node != list->tasks)
9241debfc3dSmrg {
9251debfc3dSmrg node->prev->next = node->next;
9261debfc3dSmrg node->next->prev = node->prev;
9271debfc3dSmrg node->prev = list->tasks->prev;
9281debfc3dSmrg node->next = list->tasks;
9291debfc3dSmrg list->tasks = node;
9301debfc3dSmrg node->prev->next = node;
9311debfc3dSmrg node->next->prev = node;
9321debfc3dSmrg }
9331debfc3dSmrg list->last_parent_depends_on = node;
9341debfc3dSmrg }
9351debfc3dSmrg
9361debfc3dSmrg /* Given a parent_depends_on TASK in its parent's children_queue, move
9371debfc3dSmrg it to the front of its priority so it is run as soon as possible.
9381debfc3dSmrg
9391debfc3dSmrg PARENT is passed as an optimization.
9401debfc3dSmrg
9411debfc3dSmrg (This function could be defined in priority_queue.c, but we want it
9421debfc3dSmrg inlined, and putting it in priority_queue.h is not an option, given
9431debfc3dSmrg that gomp_task has not been properly defined at that point). */
9441debfc3dSmrg
9451debfc3dSmrg static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)9461debfc3dSmrg priority_queue_upgrade_task (struct gomp_task *task,
9471debfc3dSmrg struct gomp_task *parent)
9481debfc3dSmrg {
9491debfc3dSmrg struct priority_queue *head = &parent->children_queue;
9501debfc3dSmrg struct priority_node *node = &task->pnode[PQ_CHILDREN];
9511debfc3dSmrg #if _LIBGOMP_CHECKING_
9521debfc3dSmrg if (!task->parent_depends_on)
9531debfc3dSmrg gomp_fatal ("priority_queue_upgrade_task: task must be a "
9541debfc3dSmrg "parent_depends_on task");
9551debfc3dSmrg if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
9561debfc3dSmrg gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
9571debfc3dSmrg #endif
9581debfc3dSmrg if (priority_queue_multi_p (head))
9591debfc3dSmrg {
9601debfc3dSmrg struct priority_list *list
9611debfc3dSmrg = priority_queue_lookup_priority (head, task->priority);
9621debfc3dSmrg priority_list_upgrade_task (list, node);
9631debfc3dSmrg }
9641debfc3dSmrg else
9651debfc3dSmrg priority_list_upgrade_task (&head->l, node);
9661debfc3dSmrg }
9671debfc3dSmrg
9681debfc3dSmrg /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
9691debfc3dSmrg the way in LIST so that other tasks can be considered for
9701debfc3dSmrg execution. LIST contains tasks of type TYPE.
9711debfc3dSmrg
9721debfc3dSmrg Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
9731debfc3dSmrg if applicable. */
9741debfc3dSmrg
9751debfc3dSmrg static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)9761debfc3dSmrg priority_list_downgrade_task (enum priority_queue_type type,
9771debfc3dSmrg struct priority_list *list,
9781debfc3dSmrg struct gomp_task *child_task)
9791debfc3dSmrg {
9801debfc3dSmrg struct priority_node *node = task_to_priority_node (type, child_task);
9811debfc3dSmrg if (list->tasks == node)
9821debfc3dSmrg list->tasks = node->next;
9831debfc3dSmrg else if (node->next != list->tasks)
9841debfc3dSmrg {
9851debfc3dSmrg /* The task in NODE is about to become TIED and TIED tasks
9861debfc3dSmrg cannot come before WAITING tasks. If we're about to
9871debfc3dSmrg leave the queue in such an indeterminate state, rewire
9881debfc3dSmrg things appropriately. However, a TIED task at the end is
9891debfc3dSmrg perfectly fine. */
9901debfc3dSmrg struct gomp_task *next_task = priority_node_to_task (type, node->next);
9911debfc3dSmrg if (next_task->kind == GOMP_TASK_WAITING)
9921debfc3dSmrg {
9931debfc3dSmrg /* Remove from list. */
9941debfc3dSmrg node->prev->next = node->next;
9951debfc3dSmrg node->next->prev = node->prev;
9961debfc3dSmrg /* Rewire at the end. */
9971debfc3dSmrg node->next = list->tasks;
9981debfc3dSmrg node->prev = list->tasks->prev;
9991debfc3dSmrg list->tasks->prev->next = node;
10001debfc3dSmrg list->tasks->prev = node;
10011debfc3dSmrg }
10021debfc3dSmrg }
10031debfc3dSmrg
10041debfc3dSmrg /* If the current task is the last_parent_depends_on for its
10051debfc3dSmrg priority, adjust last_parent_depends_on appropriately. */
10061debfc3dSmrg if (__builtin_expect (child_task->parent_depends_on, 0)
10071debfc3dSmrg && list->last_parent_depends_on == node)
10081debfc3dSmrg {
10091debfc3dSmrg struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
10101debfc3dSmrg if (node->prev != node
10111debfc3dSmrg && prev_child->kind == GOMP_TASK_WAITING
10121debfc3dSmrg && prev_child->parent_depends_on)
10131debfc3dSmrg list->last_parent_depends_on = node->prev;
10141debfc3dSmrg else
10151debfc3dSmrg {
10161debfc3dSmrg /* There are no more parent_depends_on entries waiting
10171debfc3dSmrg to run, clear the list. */
10181debfc3dSmrg list->last_parent_depends_on = NULL;
10191debfc3dSmrg }
10201debfc3dSmrg }
10211debfc3dSmrg }
10221debfc3dSmrg
10231debfc3dSmrg /* Given a TASK in HEAD that is about to be executed, move it out of
10241debfc3dSmrg the way so that other tasks can be considered for execution. HEAD
10251debfc3dSmrg contains tasks of type TYPE.
10261debfc3dSmrg
10271debfc3dSmrg Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
10281debfc3dSmrg if applicable.
10291debfc3dSmrg
10301debfc3dSmrg (This function could be defined in priority_queue.c, but we want it
10311debfc3dSmrg inlined, and putting it in priority_queue.h is not an option, given
10321debfc3dSmrg that gomp_task has not been properly defined at that point). */
10331debfc3dSmrg
10341debfc3dSmrg static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)10351debfc3dSmrg priority_queue_downgrade_task (enum priority_queue_type type,
10361debfc3dSmrg struct priority_queue *head,
10371debfc3dSmrg struct gomp_task *task)
10381debfc3dSmrg {
10391debfc3dSmrg #if _LIBGOMP_CHECKING_
10401debfc3dSmrg if (!priority_queue_task_in_queue_p (type, head, task))
10411debfc3dSmrg gomp_fatal ("Attempt to downgrade missing task %p", task);
10421debfc3dSmrg #endif
10431debfc3dSmrg if (priority_queue_multi_p (head))
10441debfc3dSmrg {
10451debfc3dSmrg struct priority_list *list
10461debfc3dSmrg = priority_queue_lookup_priority (head, task->priority);
10471debfc3dSmrg priority_list_downgrade_task (type, list, task);
10481debfc3dSmrg }
10491debfc3dSmrg else
10501debfc3dSmrg priority_list_downgrade_task (type, &head->l, task);
10511debfc3dSmrg }
10521debfc3dSmrg
10531debfc3dSmrg /* Setup CHILD_TASK to execute. This is done by setting the task to
10541debfc3dSmrg TIED, and updating all relevant queues so that CHILD_TASK is no
10551debfc3dSmrg longer chosen for scheduling. Also, remove CHILD_TASK from the
10561debfc3dSmrg overall team task queue entirely.
10571debfc3dSmrg
10581debfc3dSmrg Return TRUE if task or its containing taskgroup has been
10591debfc3dSmrg cancelled. */
10601debfc3dSmrg
10611debfc3dSmrg static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)10621debfc3dSmrg gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
10631debfc3dSmrg struct gomp_team *team)
10641debfc3dSmrg {
10651debfc3dSmrg #if _LIBGOMP_CHECKING_
10661debfc3dSmrg if (child_task->parent)
10671debfc3dSmrg priority_queue_verify (PQ_CHILDREN,
10681debfc3dSmrg &child_task->parent->children_queue, true);
10691debfc3dSmrg if (child_task->taskgroup)
10701debfc3dSmrg priority_queue_verify (PQ_TASKGROUP,
10711debfc3dSmrg &child_task->taskgroup->taskgroup_queue, false);
10721debfc3dSmrg priority_queue_verify (PQ_TEAM, &team->task_queue, false);
10731debfc3dSmrg #endif
10741debfc3dSmrg
10751debfc3dSmrg /* Task is about to go tied, move it out of the way. */
10761debfc3dSmrg if (parent)
10771debfc3dSmrg priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
10781debfc3dSmrg child_task);
10791debfc3dSmrg
10801debfc3dSmrg /* Task is about to go tied, move it out of the way. */
10811debfc3dSmrg struct gomp_taskgroup *taskgroup = child_task->taskgroup;
10821debfc3dSmrg if (taskgroup)
10831debfc3dSmrg priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
10841debfc3dSmrg child_task);
10851debfc3dSmrg
10861debfc3dSmrg priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
10871debfc3dSmrg MEMMODEL_RELAXED);
10881debfc3dSmrg child_task->pnode[PQ_TEAM].next = NULL;
10891debfc3dSmrg child_task->pnode[PQ_TEAM].prev = NULL;
10901debfc3dSmrg child_task->kind = GOMP_TASK_TIED;
10911debfc3dSmrg
10921debfc3dSmrg if (--team->task_queued_count == 0)
10931debfc3dSmrg gomp_team_barrier_clear_task_pending (&team->barrier);
1094c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0)
10951debfc3dSmrg && !child_task->copy_ctors_done)
1096c0a68be4Smrg {
1097c0a68be4Smrg if (gomp_team_barrier_cancelled (&team->barrier))
10981debfc3dSmrg return true;
1099c0a68be4Smrg if (taskgroup)
1100c0a68be4Smrg {
1101c0a68be4Smrg if (taskgroup->cancelled)
1102c0a68be4Smrg return true;
1103c0a68be4Smrg if (taskgroup->workshare
1104c0a68be4Smrg && taskgroup->prev
1105c0a68be4Smrg && taskgroup->prev->cancelled)
1106c0a68be4Smrg return true;
1107c0a68be4Smrg }
1108c0a68be4Smrg }
11091debfc3dSmrg return false;
11101debfc3dSmrg }
11111debfc3dSmrg
11121debfc3dSmrg static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)11131debfc3dSmrg gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
11141debfc3dSmrg {
11151debfc3dSmrg struct gomp_task *parent = child_task->parent;
11161debfc3dSmrg size_t i;
11171debfc3dSmrg
11181debfc3dSmrg for (i = 0; i < child_task->depend_count; i++)
11191debfc3dSmrg if (!child_task->depend[i].redundant)
11201debfc3dSmrg {
11211debfc3dSmrg if (child_task->depend[i].next)
11221debfc3dSmrg child_task->depend[i].next->prev = child_task->depend[i].prev;
11231debfc3dSmrg if (child_task->depend[i].prev)
11241debfc3dSmrg child_task->depend[i].prev->next = child_task->depend[i].next;
11251debfc3dSmrg else
11261debfc3dSmrg {
11271debfc3dSmrg hash_entry_type *slot
11281debfc3dSmrg = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
11291debfc3dSmrg NO_INSERT);
11301debfc3dSmrg if (*slot != &child_task->depend[i])
11311debfc3dSmrg abort ();
11321debfc3dSmrg if (child_task->depend[i].next)
11331debfc3dSmrg *slot = child_task->depend[i].next;
11341debfc3dSmrg else
11351debfc3dSmrg htab_clear_slot (parent->depend_hash, slot);
11361debfc3dSmrg }
11371debfc3dSmrg }
11381debfc3dSmrg }
11391debfc3dSmrg
11401debfc3dSmrg /* After a CHILD_TASK has been run, adjust the dependency queue for
11411debfc3dSmrg each task that depends on CHILD_TASK, to record the fact that there
11421debfc3dSmrg is one less dependency to worry about. If a task that depended on
11431debfc3dSmrg CHILD_TASK now has no dependencies, place it in the various queues
11441debfc3dSmrg so it gets scheduled to run.
11451debfc3dSmrg
11461debfc3dSmrg TEAM is the team to which CHILD_TASK belongs to. */
11471debfc3dSmrg
11481debfc3dSmrg static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)11491debfc3dSmrg gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
11501debfc3dSmrg struct gomp_team *team)
11511debfc3dSmrg {
11521debfc3dSmrg struct gomp_task *parent = child_task->parent;
11531debfc3dSmrg size_t i, count = child_task->dependers->n_elem, ret = 0;
11541debfc3dSmrg for (i = 0; i < count; i++)
11551debfc3dSmrg {
11561debfc3dSmrg struct gomp_task *task = child_task->dependers->elem[i];
11571debfc3dSmrg
11581debfc3dSmrg /* CHILD_TASK satisfies a dependency for TASK. Keep track of
11591debfc3dSmrg TASK's remaining dependencies. Once TASK has no other
11608feb0f0bSmrg dependencies, put it into the various queues so it will get
11611debfc3dSmrg scheduled for execution. */
11621debfc3dSmrg if (--task->num_dependees != 0)
11631debfc3dSmrg continue;
11641debfc3dSmrg
11651debfc3dSmrg struct gomp_taskgroup *taskgroup = task->taskgroup;
11661debfc3dSmrg if (parent)
11671debfc3dSmrg {
11681debfc3dSmrg priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
11691debfc3dSmrg task, task->priority,
11701debfc3dSmrg PRIORITY_INSERT_BEGIN,
11711debfc3dSmrg /*adjust_parent_depends_on=*/true,
11721debfc3dSmrg task->parent_depends_on);
11731debfc3dSmrg if (parent->taskwait)
11741debfc3dSmrg {
11751debfc3dSmrg if (parent->taskwait->in_taskwait)
11761debfc3dSmrg {
11771debfc3dSmrg /* One more task has had its dependencies met.
11781debfc3dSmrg Inform any waiters. */
11791debfc3dSmrg parent->taskwait->in_taskwait = false;
11801debfc3dSmrg gomp_sem_post (&parent->taskwait->taskwait_sem);
11811debfc3dSmrg }
11821debfc3dSmrg else if (parent->taskwait->in_depend_wait)
11831debfc3dSmrg {
11841debfc3dSmrg /* One more task has had its dependencies met.
11851debfc3dSmrg Inform any waiters. */
11861debfc3dSmrg parent->taskwait->in_depend_wait = false;
11871debfc3dSmrg gomp_sem_post (&parent->taskwait->taskwait_sem);
11881debfc3dSmrg }
11891debfc3dSmrg }
11901debfc3dSmrg }
11918feb0f0bSmrg else
11928feb0f0bSmrg task->parent = NULL;
11931debfc3dSmrg if (taskgroup)
11941debfc3dSmrg {
11951debfc3dSmrg priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
11961debfc3dSmrg task, task->priority,
11971debfc3dSmrg PRIORITY_INSERT_BEGIN,
11981debfc3dSmrg /*adjust_parent_depends_on=*/false,
11991debfc3dSmrg task->parent_depends_on);
12001debfc3dSmrg if (taskgroup->in_taskgroup_wait)
12011debfc3dSmrg {
12021debfc3dSmrg /* One more task has had its dependencies met.
12031debfc3dSmrg Inform any waiters. */
12041debfc3dSmrg taskgroup->in_taskgroup_wait = false;
12051debfc3dSmrg gomp_sem_post (&taskgroup->taskgroup_sem);
12061debfc3dSmrg }
12071debfc3dSmrg }
12081debfc3dSmrg priority_queue_insert (PQ_TEAM, &team->task_queue,
12091debfc3dSmrg task, task->priority,
12101debfc3dSmrg PRIORITY_INSERT_END,
12111debfc3dSmrg /*adjust_parent_depends_on=*/false,
12121debfc3dSmrg task->parent_depends_on);
12131debfc3dSmrg ++team->task_count;
12141debfc3dSmrg ++team->task_queued_count;
12151debfc3dSmrg ++ret;
12161debfc3dSmrg }
12171debfc3dSmrg free (child_task->dependers);
12181debfc3dSmrg child_task->dependers = NULL;
12191debfc3dSmrg if (ret > 1)
12201debfc3dSmrg gomp_team_barrier_set_task_pending (&team->barrier);
12211debfc3dSmrg return ret;
12221debfc3dSmrg }
12231debfc3dSmrg
12241debfc3dSmrg static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)12251debfc3dSmrg gomp_task_run_post_handle_depend (struct gomp_task *child_task,
12261debfc3dSmrg struct gomp_team *team)
12271debfc3dSmrg {
12281debfc3dSmrg if (child_task->depend_count == 0)
12291debfc3dSmrg return 0;
12301debfc3dSmrg
12311debfc3dSmrg /* If parent is gone already, the hash table is freed and nothing
12321debfc3dSmrg will use the hash table anymore, no need to remove anything from it. */
12331debfc3dSmrg if (child_task->parent != NULL)
12341debfc3dSmrg gomp_task_run_post_handle_depend_hash (child_task);
12351debfc3dSmrg
12361debfc3dSmrg if (child_task->dependers == NULL)
12371debfc3dSmrg return 0;
12381debfc3dSmrg
12391debfc3dSmrg return gomp_task_run_post_handle_dependers (child_task, team);
12401debfc3dSmrg }
12411debfc3dSmrg
12421debfc3dSmrg /* Remove CHILD_TASK from its parent. */
12431debfc3dSmrg
12441debfc3dSmrg static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)12451debfc3dSmrg gomp_task_run_post_remove_parent (struct gomp_task *child_task)
12461debfc3dSmrg {
12471debfc3dSmrg struct gomp_task *parent = child_task->parent;
12481debfc3dSmrg if (parent == NULL)
12491debfc3dSmrg return;
12501debfc3dSmrg
12511debfc3dSmrg /* If this was the last task the parent was depending on,
12521debfc3dSmrg synchronize with gomp_task_maybe_wait_for_dependencies so it can
12531debfc3dSmrg clean up and return. */
12541debfc3dSmrg if (__builtin_expect (child_task->parent_depends_on, 0)
12551debfc3dSmrg && --parent->taskwait->n_depend == 0
12561debfc3dSmrg && parent->taskwait->in_depend_wait)
12571debfc3dSmrg {
12581debfc3dSmrg parent->taskwait->in_depend_wait = false;
12591debfc3dSmrg gomp_sem_post (&parent->taskwait->taskwait_sem);
12601debfc3dSmrg }
12611debfc3dSmrg
12621debfc3dSmrg if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
12631debfc3dSmrg child_task, MEMMODEL_RELEASE)
12641debfc3dSmrg && parent->taskwait && parent->taskwait->in_taskwait)
12651debfc3dSmrg {
12661debfc3dSmrg parent->taskwait->in_taskwait = false;
12671debfc3dSmrg gomp_sem_post (&parent->taskwait->taskwait_sem);
12681debfc3dSmrg }
12691debfc3dSmrg child_task->pnode[PQ_CHILDREN].next = NULL;
12701debfc3dSmrg child_task->pnode[PQ_CHILDREN].prev = NULL;
12711debfc3dSmrg }
12721debfc3dSmrg
12731debfc3dSmrg /* Remove CHILD_TASK from its taskgroup. */
12741debfc3dSmrg
12751debfc3dSmrg static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)12761debfc3dSmrg gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
12771debfc3dSmrg {
12781debfc3dSmrg struct gomp_taskgroup *taskgroup = child_task->taskgroup;
12791debfc3dSmrg if (taskgroup == NULL)
12801debfc3dSmrg return;
12811debfc3dSmrg bool empty = priority_queue_remove (PQ_TASKGROUP,
12821debfc3dSmrg &taskgroup->taskgroup_queue,
12831debfc3dSmrg child_task, MEMMODEL_RELAXED);
12841debfc3dSmrg child_task->pnode[PQ_TASKGROUP].next = NULL;
12851debfc3dSmrg child_task->pnode[PQ_TASKGROUP].prev = NULL;
12861debfc3dSmrg if (taskgroup->num_children > 1)
12871debfc3dSmrg --taskgroup->num_children;
12881debfc3dSmrg else
12891debfc3dSmrg {
12901debfc3dSmrg /* We access taskgroup->num_children in GOMP_taskgroup_end
12911debfc3dSmrg outside of the task lock mutex region, so
12921debfc3dSmrg need a release barrier here to ensure memory
12931debfc3dSmrg written by child_task->fn above is flushed
12941debfc3dSmrg before the NULL is written. */
12951debfc3dSmrg __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
12961debfc3dSmrg }
12971debfc3dSmrg if (empty && taskgroup->in_taskgroup_wait)
12981debfc3dSmrg {
12991debfc3dSmrg taskgroup->in_taskgroup_wait = false;
13001debfc3dSmrg gomp_sem_post (&taskgroup->taskgroup_sem);
13011debfc3dSmrg }
13021debfc3dSmrg }
13031debfc3dSmrg
13041debfc3dSmrg void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)13051debfc3dSmrg gomp_barrier_handle_tasks (gomp_barrier_state_t state)
13061debfc3dSmrg {
13071debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
13081debfc3dSmrg struct gomp_team *team = thr->ts.team;
13091debfc3dSmrg struct gomp_task *task = thr->task;
13101debfc3dSmrg struct gomp_task *child_task = NULL;
13111debfc3dSmrg struct gomp_task *to_free = NULL;
13121debfc3dSmrg int do_wake = 0;
13131debfc3dSmrg
13141debfc3dSmrg gomp_mutex_lock (&team->task_lock);
13151debfc3dSmrg if (gomp_barrier_last_thread (state))
13161debfc3dSmrg {
13171debfc3dSmrg if (team->task_count == 0)
13181debfc3dSmrg {
13191debfc3dSmrg gomp_team_barrier_done (&team->barrier, state);
13201debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
13211debfc3dSmrg gomp_team_barrier_wake (&team->barrier, 0);
13221debfc3dSmrg return;
13231debfc3dSmrg }
13241debfc3dSmrg gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
13251debfc3dSmrg }
13261debfc3dSmrg
13271debfc3dSmrg while (1)
13281debfc3dSmrg {
13291debfc3dSmrg bool cancelled = false;
13301debfc3dSmrg if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
13311debfc3dSmrg {
13321debfc3dSmrg bool ignored;
13331debfc3dSmrg child_task
13341debfc3dSmrg = priority_queue_next_task (PQ_TEAM, &team->task_queue,
13351debfc3dSmrg PQ_IGNORED, NULL,
13361debfc3dSmrg &ignored);
13371debfc3dSmrg cancelled = gomp_task_run_pre (child_task, child_task->parent,
13381debfc3dSmrg team);
13391debfc3dSmrg if (__builtin_expect (cancelled, 0))
13401debfc3dSmrg {
13411debfc3dSmrg if (to_free)
13421debfc3dSmrg {
13431debfc3dSmrg gomp_finish_task (to_free);
13441debfc3dSmrg free (to_free);
13451debfc3dSmrg to_free = NULL;
13461debfc3dSmrg }
13471debfc3dSmrg goto finish_cancelled;
13481debfc3dSmrg }
13491debfc3dSmrg team->task_running_count++;
13501debfc3dSmrg child_task->in_tied_task = true;
13511debfc3dSmrg }
13521debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
13531debfc3dSmrg if (do_wake)
13541debfc3dSmrg {
13551debfc3dSmrg gomp_team_barrier_wake (&team->barrier, do_wake);
13561debfc3dSmrg do_wake = 0;
13571debfc3dSmrg }
13581debfc3dSmrg if (to_free)
13591debfc3dSmrg {
13601debfc3dSmrg gomp_finish_task (to_free);
13611debfc3dSmrg free (to_free);
13621debfc3dSmrg to_free = NULL;
13631debfc3dSmrg }
13641debfc3dSmrg if (child_task)
13651debfc3dSmrg {
13661debfc3dSmrg thr->task = child_task;
13671debfc3dSmrg if (__builtin_expect (child_task->fn == NULL, 0))
13681debfc3dSmrg {
13691debfc3dSmrg if (gomp_target_task_fn (child_task->fn_data))
13701debfc3dSmrg {
13711debfc3dSmrg thr->task = task;
13721debfc3dSmrg gomp_mutex_lock (&team->task_lock);
13731debfc3dSmrg child_task->kind = GOMP_TASK_ASYNC_RUNNING;
13741debfc3dSmrg team->task_running_count--;
13751debfc3dSmrg struct gomp_target_task *ttask
13761debfc3dSmrg = (struct gomp_target_task *) child_task->fn_data;
13771debfc3dSmrg /* If GOMP_PLUGIN_target_task_completion has run already
13781debfc3dSmrg in between gomp_target_task_fn and the mutex lock,
13791debfc3dSmrg perform the requeuing here. */
13801debfc3dSmrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
13811debfc3dSmrg gomp_target_task_completion (team, child_task);
13821debfc3dSmrg else
13831debfc3dSmrg ttask->state = GOMP_TARGET_TASK_RUNNING;
13841debfc3dSmrg child_task = NULL;
13851debfc3dSmrg continue;
13861debfc3dSmrg }
13871debfc3dSmrg }
13881debfc3dSmrg else
13891debfc3dSmrg child_task->fn (child_task->fn_data);
13901debfc3dSmrg thr->task = task;
13911debfc3dSmrg }
13921debfc3dSmrg else
13931debfc3dSmrg return;
13941debfc3dSmrg gomp_mutex_lock (&team->task_lock);
13951debfc3dSmrg if (child_task)
13961debfc3dSmrg {
13971debfc3dSmrg finish_cancelled:;
13981debfc3dSmrg size_t new_tasks
13991debfc3dSmrg = gomp_task_run_post_handle_depend (child_task, team);
14001debfc3dSmrg gomp_task_run_post_remove_parent (child_task);
14011debfc3dSmrg gomp_clear_parent (&child_task->children_queue);
14021debfc3dSmrg gomp_task_run_post_remove_taskgroup (child_task);
14031debfc3dSmrg to_free = child_task;
14041debfc3dSmrg child_task = NULL;
14051debfc3dSmrg if (!cancelled)
14061debfc3dSmrg team->task_running_count--;
14071debfc3dSmrg if (new_tasks > 1)
14081debfc3dSmrg {
14091debfc3dSmrg do_wake = team->nthreads - team->task_running_count;
14101debfc3dSmrg if (do_wake > new_tasks)
14111debfc3dSmrg do_wake = new_tasks;
14121debfc3dSmrg }
14131debfc3dSmrg if (--team->task_count == 0
14141debfc3dSmrg && gomp_team_barrier_waiting_for_tasks (&team->barrier))
14151debfc3dSmrg {
14161debfc3dSmrg gomp_team_barrier_done (&team->barrier, state);
14171debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
14181debfc3dSmrg gomp_team_barrier_wake (&team->barrier, 0);
14191debfc3dSmrg gomp_mutex_lock (&team->task_lock);
14201debfc3dSmrg }
14211debfc3dSmrg }
14221debfc3dSmrg }
14231debfc3dSmrg }
14241debfc3dSmrg
14251debfc3dSmrg /* Called when encountering a taskwait directive.
14261debfc3dSmrg
14271debfc3dSmrg Wait for all children of the current task. */
14281debfc3dSmrg
14291debfc3dSmrg void
GOMP_taskwait(void)14301debfc3dSmrg GOMP_taskwait (void)
14311debfc3dSmrg {
14321debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
14331debfc3dSmrg struct gomp_team *team = thr->ts.team;
14341debfc3dSmrg struct gomp_task *task = thr->task;
14351debfc3dSmrg struct gomp_task *child_task = NULL;
14361debfc3dSmrg struct gomp_task *to_free = NULL;
14371debfc3dSmrg struct gomp_taskwait taskwait;
14381debfc3dSmrg int do_wake = 0;
14391debfc3dSmrg
14401debfc3dSmrg /* The acquire barrier on load of task->children here synchronizes
14411debfc3dSmrg with the write of a NULL in gomp_task_run_post_remove_parent. It is
14421debfc3dSmrg not necessary that we synchronize with other non-NULL writes at
14431debfc3dSmrg this point, but we must ensure that all writes to memory by a
14441debfc3dSmrg child thread task work function are seen before we exit from
14451debfc3dSmrg GOMP_taskwait. */
14461debfc3dSmrg if (task == NULL
14471debfc3dSmrg || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
14481debfc3dSmrg return;
14491debfc3dSmrg
14501debfc3dSmrg memset (&taskwait, 0, sizeof (taskwait));
14511debfc3dSmrg bool child_q = false;
14521debfc3dSmrg gomp_mutex_lock (&team->task_lock);
14531debfc3dSmrg while (1)
14541debfc3dSmrg {
14551debfc3dSmrg bool cancelled = false;
14561debfc3dSmrg if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
14571debfc3dSmrg {
14581debfc3dSmrg bool destroy_taskwait = task->taskwait != NULL;
14591debfc3dSmrg task->taskwait = NULL;
14601debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
14611debfc3dSmrg if (to_free)
14621debfc3dSmrg {
14631debfc3dSmrg gomp_finish_task (to_free);
14641debfc3dSmrg free (to_free);
14651debfc3dSmrg }
14661debfc3dSmrg if (destroy_taskwait)
14671debfc3dSmrg gomp_sem_destroy (&taskwait.taskwait_sem);
14681debfc3dSmrg return;
14691debfc3dSmrg }
14701debfc3dSmrg struct gomp_task *next_task
14711debfc3dSmrg = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
14721debfc3dSmrg PQ_TEAM, &team->task_queue, &child_q);
14731debfc3dSmrg if (next_task->kind == GOMP_TASK_WAITING)
14741debfc3dSmrg {
14751debfc3dSmrg child_task = next_task;
14761debfc3dSmrg cancelled
14771debfc3dSmrg = gomp_task_run_pre (child_task, task, team);
14781debfc3dSmrg if (__builtin_expect (cancelled, 0))
14791debfc3dSmrg {
14801debfc3dSmrg if (to_free)
14811debfc3dSmrg {
14821debfc3dSmrg gomp_finish_task (to_free);
14831debfc3dSmrg free (to_free);
14841debfc3dSmrg to_free = NULL;
14851debfc3dSmrg }
14861debfc3dSmrg goto finish_cancelled;
14871debfc3dSmrg }
14881debfc3dSmrg }
14891debfc3dSmrg else
14901debfc3dSmrg {
14911debfc3dSmrg /* All tasks we are waiting for are either running in other
14921debfc3dSmrg threads, or they are tasks that have not had their
14931debfc3dSmrg dependencies met (so they're not even in the queue). Wait
14941debfc3dSmrg for them. */
14951debfc3dSmrg if (task->taskwait == NULL)
14961debfc3dSmrg {
14971debfc3dSmrg taskwait.in_depend_wait = false;
14981debfc3dSmrg gomp_sem_init (&taskwait.taskwait_sem, 0);
14991debfc3dSmrg task->taskwait = &taskwait;
15001debfc3dSmrg }
15011debfc3dSmrg taskwait.in_taskwait = true;
15021debfc3dSmrg }
15031debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
15041debfc3dSmrg if (do_wake)
15051debfc3dSmrg {
15061debfc3dSmrg gomp_team_barrier_wake (&team->barrier, do_wake);
15071debfc3dSmrg do_wake = 0;
15081debfc3dSmrg }
15091debfc3dSmrg if (to_free)
15101debfc3dSmrg {
15111debfc3dSmrg gomp_finish_task (to_free);
15121debfc3dSmrg free (to_free);
15131debfc3dSmrg to_free = NULL;
15141debfc3dSmrg }
15151debfc3dSmrg if (child_task)
15161debfc3dSmrg {
15171debfc3dSmrg thr->task = child_task;
15181debfc3dSmrg if (__builtin_expect (child_task->fn == NULL, 0))
15191debfc3dSmrg {
15201debfc3dSmrg if (gomp_target_task_fn (child_task->fn_data))
15211debfc3dSmrg {
15221debfc3dSmrg thr->task = task;
15231debfc3dSmrg gomp_mutex_lock (&team->task_lock);
15241debfc3dSmrg child_task->kind = GOMP_TASK_ASYNC_RUNNING;
15251debfc3dSmrg struct gomp_target_task *ttask
15261debfc3dSmrg = (struct gomp_target_task *) child_task->fn_data;
15271debfc3dSmrg /* If GOMP_PLUGIN_target_task_completion has run already
15281debfc3dSmrg in between gomp_target_task_fn and the mutex lock,
15291debfc3dSmrg perform the requeuing here. */
15301debfc3dSmrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
15311debfc3dSmrg gomp_target_task_completion (team, child_task);
15321debfc3dSmrg else
15331debfc3dSmrg ttask->state = GOMP_TARGET_TASK_RUNNING;
15341debfc3dSmrg child_task = NULL;
15351debfc3dSmrg continue;
15361debfc3dSmrg }
15371debfc3dSmrg }
15381debfc3dSmrg else
15391debfc3dSmrg child_task->fn (child_task->fn_data);
15401debfc3dSmrg thr->task = task;
15411debfc3dSmrg }
15421debfc3dSmrg else
15431debfc3dSmrg gomp_sem_wait (&taskwait.taskwait_sem);
15441debfc3dSmrg gomp_mutex_lock (&team->task_lock);
15451debfc3dSmrg if (child_task)
15461debfc3dSmrg {
15471debfc3dSmrg finish_cancelled:;
15481debfc3dSmrg size_t new_tasks
15491debfc3dSmrg = gomp_task_run_post_handle_depend (child_task, team);
15501debfc3dSmrg
15511debfc3dSmrg if (child_q)
15521debfc3dSmrg {
15531debfc3dSmrg priority_queue_remove (PQ_CHILDREN, &task->children_queue,
15541debfc3dSmrg child_task, MEMMODEL_RELAXED);
15551debfc3dSmrg child_task->pnode[PQ_CHILDREN].next = NULL;
15561debfc3dSmrg child_task->pnode[PQ_CHILDREN].prev = NULL;
15571debfc3dSmrg }
15581debfc3dSmrg
15591debfc3dSmrg gomp_clear_parent (&child_task->children_queue);
15601debfc3dSmrg
15611debfc3dSmrg gomp_task_run_post_remove_taskgroup (child_task);
15621debfc3dSmrg
15631debfc3dSmrg to_free = child_task;
15641debfc3dSmrg child_task = NULL;
15651debfc3dSmrg team->task_count--;
15661debfc3dSmrg if (new_tasks > 1)
15671debfc3dSmrg {
15681debfc3dSmrg do_wake = team->nthreads - team->task_running_count
15691debfc3dSmrg - !task->in_tied_task;
15701debfc3dSmrg if (do_wake > new_tasks)
15711debfc3dSmrg do_wake = new_tasks;
15721debfc3dSmrg }
15731debfc3dSmrg }
15741debfc3dSmrg }
15751debfc3dSmrg }
15761debfc3dSmrg
1577c0a68be4Smrg /* Called when encountering a taskwait directive with depend clause(s).
1578c0a68be4Smrg Wait as if it was an mergeable included task construct with empty body. */
1579c0a68be4Smrg
1580c0a68be4Smrg void
GOMP_taskwait_depend(void ** depend)1581c0a68be4Smrg GOMP_taskwait_depend (void **depend)
1582c0a68be4Smrg {
1583c0a68be4Smrg struct gomp_thread *thr = gomp_thread ();
1584c0a68be4Smrg struct gomp_team *team = thr->ts.team;
1585c0a68be4Smrg
1586c0a68be4Smrg /* If parallel or taskgroup has been cancelled, return early. */
1587c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0) && team)
1588c0a68be4Smrg {
1589c0a68be4Smrg if (gomp_team_barrier_cancelled (&team->barrier))
1590c0a68be4Smrg return;
1591c0a68be4Smrg if (thr->task->taskgroup)
1592c0a68be4Smrg {
1593c0a68be4Smrg if (thr->task->taskgroup->cancelled)
1594c0a68be4Smrg return;
1595c0a68be4Smrg if (thr->task->taskgroup->workshare
1596c0a68be4Smrg && thr->task->taskgroup->prev
1597c0a68be4Smrg && thr->task->taskgroup->prev->cancelled)
1598c0a68be4Smrg return;
1599c0a68be4Smrg }
1600c0a68be4Smrg }
1601c0a68be4Smrg
1602c0a68be4Smrg if (thr->task && thr->task->depend_hash)
1603c0a68be4Smrg gomp_task_maybe_wait_for_dependencies (depend);
1604c0a68be4Smrg }
1605c0a68be4Smrg
16061debfc3dSmrg /* An undeferred task is about to run. Wait for all tasks that this
16071debfc3dSmrg undeferred task depends on.
16081debfc3dSmrg
16091debfc3dSmrg This is done by first putting all known ready dependencies
16101debfc3dSmrg (dependencies that have their own dependencies met) at the top of
16111debfc3dSmrg the scheduling queues. Then we iterate through these imminently
16121debfc3dSmrg ready tasks (and possibly other high priority tasks), and run them.
16131debfc3dSmrg If we run out of ready dependencies to execute, we either wait for
1614c0a68be4Smrg the remaining dependencies to finish, or wait for them to get
16151debfc3dSmrg scheduled so we can run them.
16161debfc3dSmrg
16171debfc3dSmrg DEPEND is as in GOMP_task. */
16181debfc3dSmrg
16191debfc3dSmrg void
gomp_task_maybe_wait_for_dependencies(void ** depend)16201debfc3dSmrg gomp_task_maybe_wait_for_dependencies (void **depend)
16211debfc3dSmrg {
16221debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
16231debfc3dSmrg struct gomp_task *task = thr->task;
16241debfc3dSmrg struct gomp_team *team = thr->ts.team;
16251debfc3dSmrg struct gomp_task_depend_entry elem, *ent = NULL;
16261debfc3dSmrg struct gomp_taskwait taskwait;
1627c0a68be4Smrg size_t orig_ndepend = (uintptr_t) depend[0];
16281debfc3dSmrg size_t nout = (uintptr_t) depend[1];
1629c0a68be4Smrg size_t ndepend = orig_ndepend;
1630c0a68be4Smrg size_t normal = ndepend;
1631c0a68be4Smrg size_t n = 2;
16321debfc3dSmrg size_t i;
16331debfc3dSmrg size_t num_awaited = 0;
16341debfc3dSmrg struct gomp_task *child_task = NULL;
16351debfc3dSmrg struct gomp_task *to_free = NULL;
16361debfc3dSmrg int do_wake = 0;
16371debfc3dSmrg
1638c0a68be4Smrg if (ndepend == 0)
1639c0a68be4Smrg {
1640c0a68be4Smrg ndepend = nout;
1641c0a68be4Smrg nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1642c0a68be4Smrg normal = nout + (uintptr_t) depend[4];
1643c0a68be4Smrg n = 5;
1644c0a68be4Smrg }
16451debfc3dSmrg gomp_mutex_lock (&team->task_lock);
16461debfc3dSmrg for (i = 0; i < ndepend; i++)
16471debfc3dSmrg {
1648c0a68be4Smrg elem.addr = depend[i + n];
1649c0a68be4Smrg elem.is_in = i >= nout;
1650c0a68be4Smrg if (__builtin_expect (i >= normal, 0))
1651c0a68be4Smrg {
1652c0a68be4Smrg void **d = (void **) elem.addr;
1653c0a68be4Smrg switch ((uintptr_t) d[1])
1654c0a68be4Smrg {
1655c0a68be4Smrg case GOMP_DEPEND_IN:
1656c0a68be4Smrg break;
1657c0a68be4Smrg case GOMP_DEPEND_OUT:
1658c0a68be4Smrg case GOMP_DEPEND_INOUT:
1659c0a68be4Smrg case GOMP_DEPEND_MUTEXINOUTSET:
1660c0a68be4Smrg elem.is_in = 0;
1661c0a68be4Smrg break;
1662c0a68be4Smrg default:
1663c0a68be4Smrg gomp_fatal ("unknown omp_depend_t dependence type %d",
1664c0a68be4Smrg (int) (uintptr_t) d[1]);
1665c0a68be4Smrg }
1666c0a68be4Smrg elem.addr = d[0];
1667c0a68be4Smrg }
16681debfc3dSmrg ent = htab_find (task->depend_hash, &elem);
16691debfc3dSmrg for (; ent; ent = ent->next)
1670c0a68be4Smrg if (elem.is_in && ent->is_in)
16711debfc3dSmrg continue;
16721debfc3dSmrg else
16731debfc3dSmrg {
16741debfc3dSmrg struct gomp_task *tsk = ent->task;
16751debfc3dSmrg if (!tsk->parent_depends_on)
16761debfc3dSmrg {
16771debfc3dSmrg tsk->parent_depends_on = true;
16781debfc3dSmrg ++num_awaited;
16798feb0f0bSmrg /* If dependency TSK itself has no dependencies and is
16801debfc3dSmrg ready to run, move it up front so that we run it as
16811debfc3dSmrg soon as possible. */
16821debfc3dSmrg if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
16831debfc3dSmrg priority_queue_upgrade_task (tsk, task);
16841debfc3dSmrg }
16851debfc3dSmrg }
16861debfc3dSmrg }
16871debfc3dSmrg if (num_awaited == 0)
16881debfc3dSmrg {
16891debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
16901debfc3dSmrg return;
16911debfc3dSmrg }
16921debfc3dSmrg
16931debfc3dSmrg memset (&taskwait, 0, sizeof (taskwait));
16941debfc3dSmrg taskwait.n_depend = num_awaited;
16951debfc3dSmrg gomp_sem_init (&taskwait.taskwait_sem, 0);
16961debfc3dSmrg task->taskwait = &taskwait;
16971debfc3dSmrg
16981debfc3dSmrg while (1)
16991debfc3dSmrg {
17001debfc3dSmrg bool cancelled = false;
17011debfc3dSmrg if (taskwait.n_depend == 0)
17021debfc3dSmrg {
17031debfc3dSmrg task->taskwait = NULL;
17041debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
17051debfc3dSmrg if (to_free)
17061debfc3dSmrg {
17071debfc3dSmrg gomp_finish_task (to_free);
17081debfc3dSmrg free (to_free);
17091debfc3dSmrg }
17101debfc3dSmrg gomp_sem_destroy (&taskwait.taskwait_sem);
17111debfc3dSmrg return;
17121debfc3dSmrg }
17131debfc3dSmrg
17141debfc3dSmrg /* Theoretically when we have multiple priorities, we should
17151debfc3dSmrg chose between the highest priority item in
17161debfc3dSmrg task->children_queue and team->task_queue here, so we should
17171debfc3dSmrg use priority_queue_next_task(). However, since we are
17181debfc3dSmrg running an undeferred task, perhaps that makes all tasks it
17191debfc3dSmrg depends on undeferred, thus a priority of INF? This would
17201debfc3dSmrg make it unnecessary to take anything into account here,
17211debfc3dSmrg but the dependencies.
17221debfc3dSmrg
17231debfc3dSmrg On the other hand, if we want to use priority_queue_next_task(),
17241debfc3dSmrg care should be taken to only use priority_queue_remove()
17251debfc3dSmrg below if the task was actually removed from the children
17261debfc3dSmrg queue. */
17271debfc3dSmrg bool ignored;
17281debfc3dSmrg struct gomp_task *next_task
17291debfc3dSmrg = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
17301debfc3dSmrg PQ_IGNORED, NULL, &ignored);
17311debfc3dSmrg
17321debfc3dSmrg if (next_task->kind == GOMP_TASK_WAITING)
17331debfc3dSmrg {
17341debfc3dSmrg child_task = next_task;
17351debfc3dSmrg cancelled
17361debfc3dSmrg = gomp_task_run_pre (child_task, task, team);
17371debfc3dSmrg if (__builtin_expect (cancelled, 0))
17381debfc3dSmrg {
17391debfc3dSmrg if (to_free)
17401debfc3dSmrg {
17411debfc3dSmrg gomp_finish_task (to_free);
17421debfc3dSmrg free (to_free);
17431debfc3dSmrg to_free = NULL;
17441debfc3dSmrg }
17451debfc3dSmrg goto finish_cancelled;
17461debfc3dSmrg }
17471debfc3dSmrg }
17481debfc3dSmrg else
17491debfc3dSmrg /* All tasks we are waiting for are either running in other
17501debfc3dSmrg threads, or they are tasks that have not had their
17511debfc3dSmrg dependencies met (so they're not even in the queue). Wait
17521debfc3dSmrg for them. */
17531debfc3dSmrg taskwait.in_depend_wait = true;
17541debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
17551debfc3dSmrg if (do_wake)
17561debfc3dSmrg {
17571debfc3dSmrg gomp_team_barrier_wake (&team->barrier, do_wake);
17581debfc3dSmrg do_wake = 0;
17591debfc3dSmrg }
17601debfc3dSmrg if (to_free)
17611debfc3dSmrg {
17621debfc3dSmrg gomp_finish_task (to_free);
17631debfc3dSmrg free (to_free);
17641debfc3dSmrg to_free = NULL;
17651debfc3dSmrg }
17661debfc3dSmrg if (child_task)
17671debfc3dSmrg {
17681debfc3dSmrg thr->task = child_task;
17691debfc3dSmrg if (__builtin_expect (child_task->fn == NULL, 0))
17701debfc3dSmrg {
17711debfc3dSmrg if (gomp_target_task_fn (child_task->fn_data))
17721debfc3dSmrg {
17731debfc3dSmrg thr->task = task;
17741debfc3dSmrg gomp_mutex_lock (&team->task_lock);
17751debfc3dSmrg child_task->kind = GOMP_TASK_ASYNC_RUNNING;
17761debfc3dSmrg struct gomp_target_task *ttask
17771debfc3dSmrg = (struct gomp_target_task *) child_task->fn_data;
17781debfc3dSmrg /* If GOMP_PLUGIN_target_task_completion has run already
17791debfc3dSmrg in between gomp_target_task_fn and the mutex lock,
17801debfc3dSmrg perform the requeuing here. */
17811debfc3dSmrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
17821debfc3dSmrg gomp_target_task_completion (team, child_task);
17831debfc3dSmrg else
17841debfc3dSmrg ttask->state = GOMP_TARGET_TASK_RUNNING;
17851debfc3dSmrg child_task = NULL;
17861debfc3dSmrg continue;
17871debfc3dSmrg }
17881debfc3dSmrg }
17891debfc3dSmrg else
17901debfc3dSmrg child_task->fn (child_task->fn_data);
17911debfc3dSmrg thr->task = task;
17921debfc3dSmrg }
17931debfc3dSmrg else
17941debfc3dSmrg gomp_sem_wait (&taskwait.taskwait_sem);
17951debfc3dSmrg gomp_mutex_lock (&team->task_lock);
17961debfc3dSmrg if (child_task)
17971debfc3dSmrg {
17981debfc3dSmrg finish_cancelled:;
17991debfc3dSmrg size_t new_tasks
18001debfc3dSmrg = gomp_task_run_post_handle_depend (child_task, team);
18011debfc3dSmrg if (child_task->parent_depends_on)
18021debfc3dSmrg --taskwait.n_depend;
18031debfc3dSmrg
18041debfc3dSmrg priority_queue_remove (PQ_CHILDREN, &task->children_queue,
18051debfc3dSmrg child_task, MEMMODEL_RELAXED);
18061debfc3dSmrg child_task->pnode[PQ_CHILDREN].next = NULL;
18071debfc3dSmrg child_task->pnode[PQ_CHILDREN].prev = NULL;
18081debfc3dSmrg
18091debfc3dSmrg gomp_clear_parent (&child_task->children_queue);
18101debfc3dSmrg gomp_task_run_post_remove_taskgroup (child_task);
18111debfc3dSmrg to_free = child_task;
18121debfc3dSmrg child_task = NULL;
18131debfc3dSmrg team->task_count--;
18141debfc3dSmrg if (new_tasks > 1)
18151debfc3dSmrg {
18161debfc3dSmrg do_wake = team->nthreads - team->task_running_count
18171debfc3dSmrg - !task->in_tied_task;
18181debfc3dSmrg if (do_wake > new_tasks)
18191debfc3dSmrg do_wake = new_tasks;
18201debfc3dSmrg }
18211debfc3dSmrg }
18221debfc3dSmrg }
18231debfc3dSmrg }
18241debfc3dSmrg
18251debfc3dSmrg /* Called when encountering a taskyield directive. */
18261debfc3dSmrg
18271debfc3dSmrg void
GOMP_taskyield(void)18281debfc3dSmrg GOMP_taskyield (void)
18291debfc3dSmrg {
18301debfc3dSmrg /* Nothing at the moment. */
18311debfc3dSmrg }
18321debfc3dSmrg
1833c0a68be4Smrg static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1834c0a68be4Smrg gomp_taskgroup_init (struct gomp_taskgroup *prev)
1835c0a68be4Smrg {
1836c0a68be4Smrg struct gomp_taskgroup *taskgroup
1837c0a68be4Smrg = gomp_malloc (sizeof (struct gomp_taskgroup));
1838c0a68be4Smrg taskgroup->prev = prev;
1839c0a68be4Smrg priority_queue_init (&taskgroup->taskgroup_queue);
1840c0a68be4Smrg taskgroup->reductions = prev ? prev->reductions : NULL;
1841c0a68be4Smrg taskgroup->in_taskgroup_wait = false;
1842c0a68be4Smrg taskgroup->cancelled = false;
1843c0a68be4Smrg taskgroup->workshare = false;
1844c0a68be4Smrg taskgroup->num_children = 0;
1845c0a68be4Smrg gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1846c0a68be4Smrg return taskgroup;
1847c0a68be4Smrg }
1848c0a68be4Smrg
18491debfc3dSmrg void
GOMP_taskgroup_start(void)18501debfc3dSmrg GOMP_taskgroup_start (void)
18511debfc3dSmrg {
18521debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
18531debfc3dSmrg struct gomp_team *team = thr->ts.team;
18541debfc3dSmrg struct gomp_task *task = thr->task;
18551debfc3dSmrg
18561debfc3dSmrg /* If team is NULL, all tasks are executed as
18571debfc3dSmrg GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
18581debfc3dSmrg taskgroup and their descendant tasks will be finished
18591debfc3dSmrg by the time GOMP_taskgroup_end is called. */
18601debfc3dSmrg if (team == NULL)
18611debfc3dSmrg return;
1862c0a68be4Smrg task->taskgroup = gomp_taskgroup_init (task->taskgroup);
18631debfc3dSmrg }
18641debfc3dSmrg
18651debfc3dSmrg void
GOMP_taskgroup_end(void)18661debfc3dSmrg GOMP_taskgroup_end (void)
18671debfc3dSmrg {
18681debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
18691debfc3dSmrg struct gomp_team *team = thr->ts.team;
18701debfc3dSmrg struct gomp_task *task = thr->task;
18711debfc3dSmrg struct gomp_taskgroup *taskgroup;
18721debfc3dSmrg struct gomp_task *child_task = NULL;
18731debfc3dSmrg struct gomp_task *to_free = NULL;
18741debfc3dSmrg int do_wake = 0;
18751debfc3dSmrg
18761debfc3dSmrg if (team == NULL)
18771debfc3dSmrg return;
18781debfc3dSmrg taskgroup = task->taskgroup;
18791debfc3dSmrg if (__builtin_expect (taskgroup == NULL, 0)
18801debfc3dSmrg && thr->ts.level == 0)
18811debfc3dSmrg {
18821debfc3dSmrg /* This can happen if GOMP_taskgroup_start is called when
18831debfc3dSmrg thr->ts.team == NULL, but inside of the taskgroup there
18841debfc3dSmrg is #pragma omp target nowait that creates an implicit
18851debfc3dSmrg team with a single thread. In this case, we want to wait
18861debfc3dSmrg for all outstanding tasks in this team. */
18871debfc3dSmrg gomp_team_barrier_wait (&team->barrier);
18881debfc3dSmrg return;
18891debfc3dSmrg }
18901debfc3dSmrg
18911debfc3dSmrg /* The acquire barrier on load of taskgroup->num_children here
18921debfc3dSmrg synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
18931debfc3dSmrg It is not necessary that we synchronize with other non-0 writes at
18941debfc3dSmrg this point, but we must ensure that all writes to memory by a
18951debfc3dSmrg child thread task work function are seen before we exit from
18961debfc3dSmrg GOMP_taskgroup_end. */
18971debfc3dSmrg if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
18981debfc3dSmrg goto finish;
18991debfc3dSmrg
19001debfc3dSmrg bool unused;
19011debfc3dSmrg gomp_mutex_lock (&team->task_lock);
19021debfc3dSmrg while (1)
19031debfc3dSmrg {
19041debfc3dSmrg bool cancelled = false;
19051debfc3dSmrg if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
19061debfc3dSmrg MEMMODEL_RELAXED))
19071debfc3dSmrg {
19081debfc3dSmrg if (taskgroup->num_children)
19091debfc3dSmrg {
19101debfc3dSmrg if (priority_queue_empty_p (&task->children_queue,
19111debfc3dSmrg MEMMODEL_RELAXED))
19121debfc3dSmrg goto do_wait;
19131debfc3dSmrg child_task
19141debfc3dSmrg = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
19151debfc3dSmrg PQ_TEAM, &team->task_queue,
19161debfc3dSmrg &unused);
19171debfc3dSmrg }
19181debfc3dSmrg else
19191debfc3dSmrg {
19201debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
19211debfc3dSmrg if (to_free)
19221debfc3dSmrg {
19231debfc3dSmrg gomp_finish_task (to_free);
19241debfc3dSmrg free (to_free);
19251debfc3dSmrg }
19261debfc3dSmrg goto finish;
19271debfc3dSmrg }
19281debfc3dSmrg }
19291debfc3dSmrg else
19301debfc3dSmrg child_task
19311debfc3dSmrg = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
19321debfc3dSmrg PQ_TEAM, &team->task_queue, &unused);
19331debfc3dSmrg if (child_task->kind == GOMP_TASK_WAITING)
19341debfc3dSmrg {
19351debfc3dSmrg cancelled
19361debfc3dSmrg = gomp_task_run_pre (child_task, child_task->parent, team);
19371debfc3dSmrg if (__builtin_expect (cancelled, 0))
19381debfc3dSmrg {
19391debfc3dSmrg if (to_free)
19401debfc3dSmrg {
19411debfc3dSmrg gomp_finish_task (to_free);
19421debfc3dSmrg free (to_free);
19431debfc3dSmrg to_free = NULL;
19441debfc3dSmrg }
19451debfc3dSmrg goto finish_cancelled;
19461debfc3dSmrg }
19471debfc3dSmrg }
19481debfc3dSmrg else
19491debfc3dSmrg {
19501debfc3dSmrg child_task = NULL;
19511debfc3dSmrg do_wait:
19521debfc3dSmrg /* All tasks we are waiting for are either running in other
19531debfc3dSmrg threads, or they are tasks that have not had their
19541debfc3dSmrg dependencies met (so they're not even in the queue). Wait
19551debfc3dSmrg for them. */
19561debfc3dSmrg taskgroup->in_taskgroup_wait = true;
19571debfc3dSmrg }
19581debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
19591debfc3dSmrg if (do_wake)
19601debfc3dSmrg {
19611debfc3dSmrg gomp_team_barrier_wake (&team->barrier, do_wake);
19621debfc3dSmrg do_wake = 0;
19631debfc3dSmrg }
19641debfc3dSmrg if (to_free)
19651debfc3dSmrg {
19661debfc3dSmrg gomp_finish_task (to_free);
19671debfc3dSmrg free (to_free);
19681debfc3dSmrg to_free = NULL;
19691debfc3dSmrg }
19701debfc3dSmrg if (child_task)
19711debfc3dSmrg {
19721debfc3dSmrg thr->task = child_task;
19731debfc3dSmrg if (__builtin_expect (child_task->fn == NULL, 0))
19741debfc3dSmrg {
19751debfc3dSmrg if (gomp_target_task_fn (child_task->fn_data))
19761debfc3dSmrg {
19771debfc3dSmrg thr->task = task;
19781debfc3dSmrg gomp_mutex_lock (&team->task_lock);
19791debfc3dSmrg child_task->kind = GOMP_TASK_ASYNC_RUNNING;
19801debfc3dSmrg struct gomp_target_task *ttask
19811debfc3dSmrg = (struct gomp_target_task *) child_task->fn_data;
19821debfc3dSmrg /* If GOMP_PLUGIN_target_task_completion has run already
19831debfc3dSmrg in between gomp_target_task_fn and the mutex lock,
19841debfc3dSmrg perform the requeuing here. */
19851debfc3dSmrg if (ttask->state == GOMP_TARGET_TASK_FINISHED)
19861debfc3dSmrg gomp_target_task_completion (team, child_task);
19871debfc3dSmrg else
19881debfc3dSmrg ttask->state = GOMP_TARGET_TASK_RUNNING;
19891debfc3dSmrg child_task = NULL;
19901debfc3dSmrg continue;
19911debfc3dSmrg }
19921debfc3dSmrg }
19931debfc3dSmrg else
19941debfc3dSmrg child_task->fn (child_task->fn_data);
19951debfc3dSmrg thr->task = task;
19961debfc3dSmrg }
19971debfc3dSmrg else
19981debfc3dSmrg gomp_sem_wait (&taskgroup->taskgroup_sem);
19991debfc3dSmrg gomp_mutex_lock (&team->task_lock);
20001debfc3dSmrg if (child_task)
20011debfc3dSmrg {
20021debfc3dSmrg finish_cancelled:;
20031debfc3dSmrg size_t new_tasks
20041debfc3dSmrg = gomp_task_run_post_handle_depend (child_task, team);
20051debfc3dSmrg gomp_task_run_post_remove_parent (child_task);
20061debfc3dSmrg gomp_clear_parent (&child_task->children_queue);
20071debfc3dSmrg gomp_task_run_post_remove_taskgroup (child_task);
20081debfc3dSmrg to_free = child_task;
20091debfc3dSmrg child_task = NULL;
20101debfc3dSmrg team->task_count--;
20111debfc3dSmrg if (new_tasks > 1)
20121debfc3dSmrg {
20131debfc3dSmrg do_wake = team->nthreads - team->task_running_count
20141debfc3dSmrg - !task->in_tied_task;
20151debfc3dSmrg if (do_wake > new_tasks)
20161debfc3dSmrg do_wake = new_tasks;
20171debfc3dSmrg }
20181debfc3dSmrg }
20191debfc3dSmrg }
20201debfc3dSmrg
20211debfc3dSmrg finish:
20221debfc3dSmrg task->taskgroup = taskgroup->prev;
20231debfc3dSmrg gomp_sem_destroy (&taskgroup->taskgroup_sem);
20241debfc3dSmrg free (taskgroup);
20251debfc3dSmrg }
20261debfc3dSmrg
2027c0a68be4Smrg static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2028c0a68be4Smrg gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2029c0a68be4Smrg unsigned nthreads)
2030c0a68be4Smrg {
2031c0a68be4Smrg size_t total_cnt = 0;
2032c0a68be4Smrg uintptr_t *d = data;
2033c0a68be4Smrg struct htab *old_htab = NULL, *new_htab;
2034c0a68be4Smrg do
2035c0a68be4Smrg {
2036c0a68be4Smrg if (__builtin_expect (orig != NULL, 0))
2037c0a68be4Smrg {
2038c0a68be4Smrg /* For worksharing task reductions, memory has been allocated
2039c0a68be4Smrg already by some other thread that encountered the construct
2040c0a68be4Smrg earlier. */
2041c0a68be4Smrg d[2] = orig[2];
2042c0a68be4Smrg d[6] = orig[6];
2043c0a68be4Smrg orig = (uintptr_t *) orig[4];
2044c0a68be4Smrg }
2045c0a68be4Smrg else
2046c0a68be4Smrg {
2047c0a68be4Smrg size_t sz = d[1] * nthreads;
2048c0a68be4Smrg /* Should use omp_alloc if d[3] is not -1. */
2049c0a68be4Smrg void *ptr = gomp_aligned_alloc (d[2], sz);
2050c0a68be4Smrg memset (ptr, '\0', sz);
2051c0a68be4Smrg d[2] = (uintptr_t) ptr;
2052c0a68be4Smrg d[6] = d[2] + sz;
2053c0a68be4Smrg }
2054c0a68be4Smrg d[5] = 0;
2055c0a68be4Smrg total_cnt += d[0];
2056c0a68be4Smrg if (d[4] == 0)
2057c0a68be4Smrg {
2058c0a68be4Smrg d[4] = (uintptr_t) old;
2059c0a68be4Smrg break;
2060c0a68be4Smrg }
2061c0a68be4Smrg else
2062c0a68be4Smrg d = (uintptr_t *) d[4];
2063c0a68be4Smrg }
2064c0a68be4Smrg while (1);
2065c0a68be4Smrg if (old && old[5])
2066c0a68be4Smrg {
2067c0a68be4Smrg old_htab = (struct htab *) old[5];
2068c0a68be4Smrg total_cnt += htab_elements (old_htab);
2069c0a68be4Smrg }
2070c0a68be4Smrg new_htab = htab_create (total_cnt);
2071c0a68be4Smrg if (old_htab)
2072c0a68be4Smrg {
2073c0a68be4Smrg /* Copy old hash table, like in htab_expand. */
2074c0a68be4Smrg hash_entry_type *p, *olimit;
2075c0a68be4Smrg new_htab->n_elements = htab_elements (old_htab);
2076c0a68be4Smrg olimit = old_htab->entries + old_htab->size;
2077c0a68be4Smrg p = old_htab->entries;
2078c0a68be4Smrg do
2079c0a68be4Smrg {
2080c0a68be4Smrg hash_entry_type x = *p;
2081c0a68be4Smrg if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2082c0a68be4Smrg *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2083c0a68be4Smrg p++;
2084c0a68be4Smrg }
2085c0a68be4Smrg while (p < olimit);
2086c0a68be4Smrg }
2087c0a68be4Smrg d = data;
2088c0a68be4Smrg do
2089c0a68be4Smrg {
2090c0a68be4Smrg size_t j;
2091c0a68be4Smrg for (j = 0; j < d[0]; ++j)
2092c0a68be4Smrg {
2093c0a68be4Smrg uintptr_t *p = d + 7 + j * 3;
2094c0a68be4Smrg p[2] = (uintptr_t) d;
2095c0a68be4Smrg /* Ugly hack, hash_entry_type is defined for the task dependencies,
2096c0a68be4Smrg which hash on the first element which is a pointer. We need
2097c0a68be4Smrg to hash also on the first sizeof (uintptr_t) bytes which contain
2098c0a68be4Smrg a pointer. Hide the cast from the compiler. */
2099c0a68be4Smrg hash_entry_type n;
2100c0a68be4Smrg __asm ("" : "=g" (n) : "0" (p));
2101c0a68be4Smrg *htab_find_slot (&new_htab, n, INSERT) = n;
2102c0a68be4Smrg }
2103c0a68be4Smrg if (d[4] == (uintptr_t) old)
2104c0a68be4Smrg break;
2105c0a68be4Smrg else
2106c0a68be4Smrg d = (uintptr_t *) d[4];
2107c0a68be4Smrg }
2108c0a68be4Smrg while (1);
2109c0a68be4Smrg d[5] = (uintptr_t) new_htab;
2110c0a68be4Smrg }
2111c0a68be4Smrg
2112c0a68be4Smrg static void
gomp_create_artificial_team(void)2113c0a68be4Smrg gomp_create_artificial_team (void)
2114c0a68be4Smrg {
2115c0a68be4Smrg struct gomp_thread *thr = gomp_thread ();
2116c0a68be4Smrg struct gomp_task_icv *icv;
2117c0a68be4Smrg struct gomp_team *team = gomp_new_team (1);
2118c0a68be4Smrg struct gomp_task *task = thr->task;
2119*23f5f463Smrg struct gomp_task **implicit_task = &task;
2120c0a68be4Smrg icv = task ? &task->icv : &gomp_global_icv;
2121c0a68be4Smrg team->prev_ts = thr->ts;
2122c0a68be4Smrg thr->ts.team = team;
2123c0a68be4Smrg thr->ts.team_id = 0;
2124c0a68be4Smrg thr->ts.work_share = &team->work_shares[0];
2125c0a68be4Smrg thr->ts.last_work_share = NULL;
2126c0a68be4Smrg #ifdef HAVE_SYNC_BUILTINS
2127c0a68be4Smrg thr->ts.single_count = 0;
2128c0a68be4Smrg #endif
2129c0a68be4Smrg thr->ts.static_trip = 0;
2130c0a68be4Smrg thr->task = &team->implicit_task[0];
2131c0a68be4Smrg gomp_init_task (thr->task, NULL, icv);
2132*23f5f463Smrg while (*implicit_task
2133*23f5f463Smrg && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2134*23f5f463Smrg implicit_task = &(*implicit_task)->parent;
2135*23f5f463Smrg if (*implicit_task)
2136c0a68be4Smrg {
2137*23f5f463Smrg thr->task = *implicit_task;
2138c0a68be4Smrg gomp_end_task ();
2139*23f5f463Smrg free (*implicit_task);
2140c0a68be4Smrg thr->task = &team->implicit_task[0];
2141c0a68be4Smrg }
2142c0a68be4Smrg #ifdef LIBGOMP_USE_PTHREADS
2143c0a68be4Smrg else
2144c0a68be4Smrg pthread_setspecific (gomp_thread_destructor, thr);
2145c0a68be4Smrg #endif
2146*23f5f463Smrg if (implicit_task != &task)
2147*23f5f463Smrg {
2148*23f5f463Smrg *implicit_task = thr->task;
2149*23f5f463Smrg thr->task = task;
2150*23f5f463Smrg }
2151c0a68be4Smrg }
2152c0a68be4Smrg
2153c0a68be4Smrg /* The format of data is:
2154c0a68be4Smrg data[0] cnt
2155c0a68be4Smrg data[1] size
2156c0a68be4Smrg data[2] alignment (on output array pointer)
2157c0a68be4Smrg data[3] allocator (-1 if malloc allocator)
2158c0a68be4Smrg data[4] next pointer
2159c0a68be4Smrg data[5] used internally (htab pointer)
2160c0a68be4Smrg data[6] used internally (end of array)
2161c0a68be4Smrg cnt times
2162c0a68be4Smrg ent[0] address
2163c0a68be4Smrg ent[1] offset
2164c0a68be4Smrg ent[2] used internally (pointer to data[0])
2165c0a68be4Smrg The entries are sorted by increasing offset, so that a binary
2166c0a68be4Smrg search can be performed. Normally, data[8] is 0, exception is
2167c0a68be4Smrg for worksharing construct task reductions in cancellable parallel,
2168c0a68be4Smrg where at offset 0 there should be space for a pointer and an integer
2169c0a68be4Smrg which are used internally. */
2170c0a68be4Smrg
2171c0a68be4Smrg void
GOMP_taskgroup_reduction_register(uintptr_t * data)2172c0a68be4Smrg GOMP_taskgroup_reduction_register (uintptr_t *data)
2173c0a68be4Smrg {
2174c0a68be4Smrg struct gomp_thread *thr = gomp_thread ();
2175c0a68be4Smrg struct gomp_team *team = thr->ts.team;
2176c0a68be4Smrg struct gomp_task *task;
2177c0a68be4Smrg unsigned nthreads;
2178c0a68be4Smrg if (__builtin_expect (team == NULL, 0))
2179c0a68be4Smrg {
2180c0a68be4Smrg /* The task reduction code needs a team and task, so for
2181c0a68be4Smrg orphaned taskgroups just create the implicit team. */
2182c0a68be4Smrg gomp_create_artificial_team ();
2183c0a68be4Smrg ialias_call (GOMP_taskgroup_start) ();
2184c0a68be4Smrg team = thr->ts.team;
2185c0a68be4Smrg }
2186c0a68be4Smrg nthreads = team->nthreads;
2187c0a68be4Smrg task = thr->task;
2188c0a68be4Smrg gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2189c0a68be4Smrg task->taskgroup->reductions = data;
2190c0a68be4Smrg }
2191c0a68be4Smrg
2192c0a68be4Smrg void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2193c0a68be4Smrg GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2194c0a68be4Smrg {
2195c0a68be4Smrg uintptr_t *d = data;
2196c0a68be4Smrg htab_free ((struct htab *) data[5]);
2197c0a68be4Smrg do
2198c0a68be4Smrg {
2199c0a68be4Smrg gomp_aligned_free ((void *) d[2]);
2200c0a68be4Smrg d = (uintptr_t *) d[4];
2201c0a68be4Smrg }
2202c0a68be4Smrg while (d && !d[5]);
2203c0a68be4Smrg }
ialias(GOMP_taskgroup_reduction_unregister)2204c0a68be4Smrg ialias (GOMP_taskgroup_reduction_unregister)
2205c0a68be4Smrg
2206c0a68be4Smrg /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2207c0a68be4Smrg original list item or address of previously remapped original list
2208c0a68be4Smrg item to address of the private copy, store that to ptrs[i].
2209c0a68be4Smrg For i < cntorig, additionally set ptrs[cnt+i] to the address of
2210c0a68be4Smrg the original list item. */
2211c0a68be4Smrg
2212c0a68be4Smrg void
2213c0a68be4Smrg GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2214c0a68be4Smrg {
2215c0a68be4Smrg struct gomp_thread *thr = gomp_thread ();
2216c0a68be4Smrg struct gomp_task *task = thr->task;
2217c0a68be4Smrg unsigned id = thr->ts.team_id;
2218c0a68be4Smrg uintptr_t *data = task->taskgroup->reductions;
2219c0a68be4Smrg uintptr_t *d;
2220c0a68be4Smrg struct htab *reduction_htab = (struct htab *) data[5];
2221c0a68be4Smrg size_t i;
2222c0a68be4Smrg for (i = 0; i < cnt; ++i)
2223c0a68be4Smrg {
2224c0a68be4Smrg hash_entry_type ent, n;
2225c0a68be4Smrg __asm ("" : "=g" (ent) : "0" (ptrs + i));
2226c0a68be4Smrg n = htab_find (reduction_htab, ent);
2227c0a68be4Smrg if (n)
2228c0a68be4Smrg {
2229c0a68be4Smrg uintptr_t *p;
2230c0a68be4Smrg __asm ("" : "=g" (p) : "0" (n));
2231c0a68be4Smrg /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2232c0a68be4Smrg p[1] is the offset within the allocated chunk for each
2233c0a68be4Smrg thread, p[2] is the array registered with
2234c0a68be4Smrg GOMP_taskgroup_reduction_register, d[2] is the base of the
2235c0a68be4Smrg allocated memory and d[1] is the size of the allocated chunk
2236c0a68be4Smrg for one thread. */
2237c0a68be4Smrg d = (uintptr_t *) p[2];
2238c0a68be4Smrg ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2239c0a68be4Smrg if (__builtin_expect (i < cntorig, 0))
2240c0a68be4Smrg ptrs[cnt + i] = (void *) p[0];
2241c0a68be4Smrg continue;
2242c0a68be4Smrg }
2243c0a68be4Smrg d = data;
2244c0a68be4Smrg while (d != NULL)
2245c0a68be4Smrg {
2246c0a68be4Smrg if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2247c0a68be4Smrg break;
2248c0a68be4Smrg d = (uintptr_t *) d[4];
2249c0a68be4Smrg }
2250c0a68be4Smrg if (d == NULL)
2251c0a68be4Smrg gomp_fatal ("couldn't find matching task_reduction or reduction with "
2252c0a68be4Smrg "task modifier for %p", ptrs[i]);
2253c0a68be4Smrg uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2254c0a68be4Smrg ptrs[i] = (void *) (d[2] + id * d[1] + off);
2255c0a68be4Smrg if (__builtin_expect (i < cntorig, 0))
2256c0a68be4Smrg {
2257c0a68be4Smrg size_t lo = 0, hi = d[0] - 1;
2258c0a68be4Smrg while (lo <= hi)
2259c0a68be4Smrg {
2260c0a68be4Smrg size_t m = (lo + hi) / 2;
2261c0a68be4Smrg if (d[7 + 3 * m + 1] < off)
2262c0a68be4Smrg lo = m + 1;
2263c0a68be4Smrg else if (d[7 + 3 * m + 1] == off)
2264c0a68be4Smrg {
2265c0a68be4Smrg ptrs[cnt + i] = (void *) d[7 + 3 * m];
2266c0a68be4Smrg break;
2267c0a68be4Smrg }
2268c0a68be4Smrg else
2269c0a68be4Smrg hi = m - 1;
2270c0a68be4Smrg }
2271c0a68be4Smrg if (lo > hi)
2272c0a68be4Smrg gomp_fatal ("couldn't find matching task_reduction or reduction "
2273c0a68be4Smrg "with task modifier for %p", ptrs[i]);
2274c0a68be4Smrg }
2275c0a68be4Smrg }
2276c0a68be4Smrg }
2277c0a68be4Smrg
2278c0a68be4Smrg struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2279c0a68be4Smrg gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2280c0a68be4Smrg {
2281c0a68be4Smrg struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2282c0a68be4Smrg gomp_reduction_register (data, NULL, NULL, nthreads);
2283c0a68be4Smrg taskgroup->reductions = data;
2284c0a68be4Smrg return taskgroup;
2285c0a68be4Smrg }
2286c0a68be4Smrg
2287c0a68be4Smrg void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2288c0a68be4Smrg gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2289c0a68be4Smrg {
2290c0a68be4Smrg struct gomp_thread *thr = gomp_thread ();
2291c0a68be4Smrg struct gomp_team *team = thr->ts.team;
2292c0a68be4Smrg struct gomp_task *task = thr->task;
2293c0a68be4Smrg unsigned nthreads = team->nthreads;
2294c0a68be4Smrg gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2295c0a68be4Smrg task->taskgroup->reductions = data;
2296c0a68be4Smrg }
2297c0a68be4Smrg
2298c0a68be4Smrg void
gomp_workshare_taskgroup_start(void)2299c0a68be4Smrg gomp_workshare_taskgroup_start (void)
2300c0a68be4Smrg {
2301c0a68be4Smrg struct gomp_thread *thr = gomp_thread ();
2302c0a68be4Smrg struct gomp_team *team = thr->ts.team;
2303c0a68be4Smrg struct gomp_task *task;
2304c0a68be4Smrg
2305c0a68be4Smrg if (team == NULL)
2306c0a68be4Smrg {
2307c0a68be4Smrg gomp_create_artificial_team ();
2308c0a68be4Smrg team = thr->ts.team;
2309c0a68be4Smrg }
2310c0a68be4Smrg task = thr->task;
2311c0a68be4Smrg task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2312c0a68be4Smrg task->taskgroup->workshare = true;
2313c0a68be4Smrg }
2314c0a68be4Smrg
2315c0a68be4Smrg void
GOMP_workshare_task_reduction_unregister(bool cancelled)2316c0a68be4Smrg GOMP_workshare_task_reduction_unregister (bool cancelled)
2317c0a68be4Smrg {
2318c0a68be4Smrg struct gomp_thread *thr = gomp_thread ();
2319c0a68be4Smrg struct gomp_task *task = thr->task;
2320c0a68be4Smrg struct gomp_team *team = thr->ts.team;
2321c0a68be4Smrg uintptr_t *data = task->taskgroup->reductions;
2322c0a68be4Smrg ialias_call (GOMP_taskgroup_end) ();
2323c0a68be4Smrg if (thr->ts.team_id == 0)
2324c0a68be4Smrg ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2325c0a68be4Smrg else
2326c0a68be4Smrg htab_free ((struct htab *) data[5]);
2327c0a68be4Smrg
2328c0a68be4Smrg if (!cancelled)
2329c0a68be4Smrg gomp_team_barrier_wait (&team->barrier);
2330c0a68be4Smrg }
2331c0a68be4Smrg
23321debfc3dSmrg int
omp_in_final(void)23331debfc3dSmrg omp_in_final (void)
23341debfc3dSmrg {
23351debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
23361debfc3dSmrg return thr->task && thr->task->final_task;
23371debfc3dSmrg }
23381debfc3dSmrg
23391debfc3dSmrg ialias (omp_in_final)
2340