1*38fd1498Szrj /* Copyright (C) 2007-2018 Free Software Foundation, Inc.
2*38fd1498Szrj Contributed by Richard Henderson <rth@redhat.com>.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of the GNU Offloading and Multi Processing Library
5*38fd1498Szrj (libgomp).
6*38fd1498Szrj
7*38fd1498Szrj Libgomp is free software; you can redistribute it and/or modify it
8*38fd1498Szrj under the terms of the GNU General Public License as published by
9*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj any later version.
11*38fd1498Szrj
12*38fd1498Szrj Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*38fd1498Szrj FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15*38fd1498Szrj more details.
16*38fd1498Szrj
17*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
18*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
19*38fd1498Szrj 3.1, as published by the Free Software Foundation.
20*38fd1498Szrj
21*38fd1498Szrj You should have received a copy of the GNU General Public License and
22*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
23*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24*38fd1498Szrj <http://www.gnu.org/licenses/>. */
25*38fd1498Szrj
26*38fd1498Szrj /* This file handles the maintainence of tasks in response to task
27*38fd1498Szrj creation and termination. */
28*38fd1498Szrj
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include <stdlib.h>
31*38fd1498Szrj #include <string.h>
32*38fd1498Szrj #include "gomp-constants.h"
33*38fd1498Szrj
34*38fd1498Szrj typedef struct gomp_task_depend_entry *hash_entry_type;
35*38fd1498Szrj
36*38fd1498Szrj static inline void *
htab_alloc(size_t size)37*38fd1498Szrj htab_alloc (size_t size)
38*38fd1498Szrj {
39*38fd1498Szrj return gomp_malloc (size);
40*38fd1498Szrj }
41*38fd1498Szrj
42*38fd1498Szrj static inline void
htab_free(void * ptr)43*38fd1498Szrj htab_free (void *ptr)
44*38fd1498Szrj {
45*38fd1498Szrj free (ptr);
46*38fd1498Szrj }
47*38fd1498Szrj
48*38fd1498Szrj #include "hashtab.h"
49*38fd1498Szrj
50*38fd1498Szrj static inline hashval_t
htab_hash(hash_entry_type element)51*38fd1498Szrj htab_hash (hash_entry_type element)
52*38fd1498Szrj {
53*38fd1498Szrj return hash_pointer (element->addr);
54*38fd1498Szrj }
55*38fd1498Szrj
56*38fd1498Szrj static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)57*38fd1498Szrj htab_eq (hash_entry_type x, hash_entry_type y)
58*38fd1498Szrj {
59*38fd1498Szrj return x->addr == y->addr;
60*38fd1498Szrj }
61*38fd1498Szrj
62*38fd1498Szrj /* Create a new task data structure. */
63*38fd1498Szrj
64*38fd1498Szrj void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)65*38fd1498Szrj gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66*38fd1498Szrj struct gomp_task_icv *prev_icv)
67*38fd1498Szrj {
68*38fd1498Szrj /* It would seem that using memset here would be a win, but it turns
69*38fd1498Szrj out that partially filling gomp_task allows us to keep the
70*38fd1498Szrj overhead of task creation low. In the nqueens-1.c test, for a
71*38fd1498Szrj sufficiently large N, we drop the overhead from 5-6% to 1%.
72*38fd1498Szrj
73*38fd1498Szrj Note, the nqueens-1.c test in serial mode is a good test to
74*38fd1498Szrj benchmark the overhead of creating tasks as there are millions of
75*38fd1498Szrj tiny tasks created that all run undeferred. */
76*38fd1498Szrj task->parent = parent_task;
77*38fd1498Szrj task->icv = *prev_icv;
78*38fd1498Szrj task->kind = GOMP_TASK_IMPLICIT;
79*38fd1498Szrj task->taskwait = NULL;
80*38fd1498Szrj task->in_tied_task = false;
81*38fd1498Szrj task->final_task = false;
82*38fd1498Szrj task->copy_ctors_done = false;
83*38fd1498Szrj task->parent_depends_on = false;
84*38fd1498Szrj priority_queue_init (&task->children_queue);
85*38fd1498Szrj task->taskgroup = NULL;
86*38fd1498Szrj task->dependers = NULL;
87*38fd1498Szrj task->depend_hash = NULL;
88*38fd1498Szrj task->depend_count = 0;
89*38fd1498Szrj }
90*38fd1498Szrj
91*38fd1498Szrj /* Clean up a task, after completing it. */
92*38fd1498Szrj
93*38fd1498Szrj void
gomp_end_task(void)94*38fd1498Szrj gomp_end_task (void)
95*38fd1498Szrj {
96*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
97*38fd1498Szrj struct gomp_task *task = thr->task;
98*38fd1498Szrj
99*38fd1498Szrj gomp_finish_task (task);
100*38fd1498Szrj thr->task = task->parent;
101*38fd1498Szrj }
102*38fd1498Szrj
103*38fd1498Szrj /* Clear the parent field of every task in LIST. */
104*38fd1498Szrj
105*38fd1498Szrj static inline void
gomp_clear_parent_in_list(struct priority_list * list)106*38fd1498Szrj gomp_clear_parent_in_list (struct priority_list *list)
107*38fd1498Szrj {
108*38fd1498Szrj struct priority_node *p = list->tasks;
109*38fd1498Szrj if (p)
110*38fd1498Szrj do
111*38fd1498Szrj {
112*38fd1498Szrj priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113*38fd1498Szrj p = p->next;
114*38fd1498Szrj }
115*38fd1498Szrj while (p != list->tasks);
116*38fd1498Szrj }
117*38fd1498Szrj
118*38fd1498Szrj /* Splay tree version of gomp_clear_parent_in_list.
119*38fd1498Szrj
120*38fd1498Szrj Clear the parent field of every task in NODE within SP, and free
121*38fd1498Szrj the node when done. */
122*38fd1498Szrj
123*38fd1498Szrj static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)124*38fd1498Szrj gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125*38fd1498Szrj {
126*38fd1498Szrj if (!node)
127*38fd1498Szrj return;
128*38fd1498Szrj prio_splay_tree_node left = node->left, right = node->right;
129*38fd1498Szrj gomp_clear_parent_in_list (&node->key.l);
130*38fd1498Szrj #if _LIBGOMP_CHECKING_
131*38fd1498Szrj memset (node, 0xaf, sizeof (*node));
132*38fd1498Szrj #endif
133*38fd1498Szrj /* No need to remove the node from the tree. We're nuking
134*38fd1498Szrj everything, so just free the nodes and our caller can clear the
135*38fd1498Szrj entire splay tree. */
136*38fd1498Szrj free (node);
137*38fd1498Szrj gomp_clear_parent_in_tree (sp, left);
138*38fd1498Szrj gomp_clear_parent_in_tree (sp, right);
139*38fd1498Szrj }
140*38fd1498Szrj
141*38fd1498Szrj /* Clear the parent field of every task in Q and remove every task
142*38fd1498Szrj from Q. */
143*38fd1498Szrj
144*38fd1498Szrj static inline void
gomp_clear_parent(struct priority_queue * q)145*38fd1498Szrj gomp_clear_parent (struct priority_queue *q)
146*38fd1498Szrj {
147*38fd1498Szrj if (priority_queue_multi_p (q))
148*38fd1498Szrj {
149*38fd1498Szrj gomp_clear_parent_in_tree (&q->t, q->t.root);
150*38fd1498Szrj /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151*38fd1498Szrj No need to remove anything. We can just nuke everything. */
152*38fd1498Szrj q->t.root = NULL;
153*38fd1498Szrj }
154*38fd1498Szrj else
155*38fd1498Szrj gomp_clear_parent_in_list (&q->l);
156*38fd1498Szrj }
157*38fd1498Szrj
158*38fd1498Szrj /* Helper function for GOMP_task and gomp_create_target_task.
159*38fd1498Szrj
160*38fd1498Szrj For a TASK with in/out dependencies, fill in the various dependency
161*38fd1498Szrj queues. PARENT is the parent of said task. DEPEND is as in
162*38fd1498Szrj GOMP_task. */
163*38fd1498Szrj
164*38fd1498Szrj static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)165*38fd1498Szrj gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166*38fd1498Szrj void **depend)
167*38fd1498Szrj {
168*38fd1498Szrj size_t ndepend = (uintptr_t) depend[0];
169*38fd1498Szrj size_t nout = (uintptr_t) depend[1];
170*38fd1498Szrj size_t i;
171*38fd1498Szrj hash_entry_type ent;
172*38fd1498Szrj
173*38fd1498Szrj task->depend_count = ndepend;
174*38fd1498Szrj task->num_dependees = 0;
175*38fd1498Szrj if (parent->depend_hash == NULL)
176*38fd1498Szrj parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177*38fd1498Szrj for (i = 0; i < ndepend; i++)
178*38fd1498Szrj {
179*38fd1498Szrj task->depend[i].addr = depend[2 + i];
180*38fd1498Szrj task->depend[i].next = NULL;
181*38fd1498Szrj task->depend[i].prev = NULL;
182*38fd1498Szrj task->depend[i].task = task;
183*38fd1498Szrj task->depend[i].is_in = i >= nout;
184*38fd1498Szrj task->depend[i].redundant = false;
185*38fd1498Szrj task->depend[i].redundant_out = false;
186*38fd1498Szrj
187*38fd1498Szrj hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188*38fd1498Szrj &task->depend[i], INSERT);
189*38fd1498Szrj hash_entry_type out = NULL, last = NULL;
190*38fd1498Szrj if (*slot)
191*38fd1498Szrj {
192*38fd1498Szrj /* If multiple depends on the same task are the same, all but the
193*38fd1498Szrj first one are redundant. As inout/out come first, if any of them
194*38fd1498Szrj is inout/out, it will win, which is the right semantics. */
195*38fd1498Szrj if ((*slot)->task == task)
196*38fd1498Szrj {
197*38fd1498Szrj task->depend[i].redundant = true;
198*38fd1498Szrj continue;
199*38fd1498Szrj }
200*38fd1498Szrj for (ent = *slot; ent; ent = ent->next)
201*38fd1498Szrj {
202*38fd1498Szrj if (ent->redundant_out)
203*38fd1498Szrj break;
204*38fd1498Szrj
205*38fd1498Szrj last = ent;
206*38fd1498Szrj
207*38fd1498Szrj /* depend(in:...) doesn't depend on earlier depend(in:...). */
208*38fd1498Szrj if (i >= nout && ent->is_in)
209*38fd1498Szrj continue;
210*38fd1498Szrj
211*38fd1498Szrj if (!ent->is_in)
212*38fd1498Szrj out = ent;
213*38fd1498Szrj
214*38fd1498Szrj struct gomp_task *tsk = ent->task;
215*38fd1498Szrj if (tsk->dependers == NULL)
216*38fd1498Szrj {
217*38fd1498Szrj tsk->dependers
218*38fd1498Szrj = gomp_malloc (sizeof (struct gomp_dependers_vec)
219*38fd1498Szrj + 6 * sizeof (struct gomp_task *));
220*38fd1498Szrj tsk->dependers->n_elem = 1;
221*38fd1498Szrj tsk->dependers->allocated = 6;
222*38fd1498Szrj tsk->dependers->elem[0] = task;
223*38fd1498Szrj task->num_dependees++;
224*38fd1498Szrj continue;
225*38fd1498Szrj }
226*38fd1498Szrj /* We already have some other dependency on tsk from earlier
227*38fd1498Szrj depend clause. */
228*38fd1498Szrj else if (tsk->dependers->n_elem
229*38fd1498Szrj && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230*38fd1498Szrj == task))
231*38fd1498Szrj continue;
232*38fd1498Szrj else if (tsk->dependers->n_elem == tsk->dependers->allocated)
233*38fd1498Szrj {
234*38fd1498Szrj tsk->dependers->allocated
235*38fd1498Szrj = tsk->dependers->allocated * 2 + 2;
236*38fd1498Szrj tsk->dependers
237*38fd1498Szrj = gomp_realloc (tsk->dependers,
238*38fd1498Szrj sizeof (struct gomp_dependers_vec)
239*38fd1498Szrj + (tsk->dependers->allocated
240*38fd1498Szrj * sizeof (struct gomp_task *)));
241*38fd1498Szrj }
242*38fd1498Szrj tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243*38fd1498Szrj task->num_dependees++;
244*38fd1498Szrj }
245*38fd1498Szrj task->depend[i].next = *slot;
246*38fd1498Szrj (*slot)->prev = &task->depend[i];
247*38fd1498Szrj }
248*38fd1498Szrj *slot = &task->depend[i];
249*38fd1498Szrj
250*38fd1498Szrj /* There is no need to store more than one depend({,in}out:) task per
251*38fd1498Szrj address in the hash table chain for the purpose of creation of
252*38fd1498Szrj deferred tasks, because each out depends on all earlier outs, thus it
253*38fd1498Szrj is enough to record just the last depend({,in}out:). For depend(in:),
254*38fd1498Szrj we need to keep all of the previous ones not terminated yet, because
255*38fd1498Szrj a later depend({,in}out:) might need to depend on all of them. So, if
256*38fd1498Szrj the new task's clause is depend({,in}out:), we know there is at most
257*38fd1498Szrj one other depend({,in}out:) clause in the list (out). For
258*38fd1498Szrj non-deferred tasks we want to see all outs, so they are moved to the
259*38fd1498Szrj end of the chain, after first redundant_out entry all following
260*38fd1498Szrj entries should be redundant_out. */
261*38fd1498Szrj if (!task->depend[i].is_in && out)
262*38fd1498Szrj {
263*38fd1498Szrj if (out != last)
264*38fd1498Szrj {
265*38fd1498Szrj out->next->prev = out->prev;
266*38fd1498Szrj out->prev->next = out->next;
267*38fd1498Szrj out->next = last->next;
268*38fd1498Szrj out->prev = last;
269*38fd1498Szrj last->next = out;
270*38fd1498Szrj if (out->next)
271*38fd1498Szrj out->next->prev = out;
272*38fd1498Szrj }
273*38fd1498Szrj out->redundant_out = true;
274*38fd1498Szrj }
275*38fd1498Szrj }
276*38fd1498Szrj }
277*38fd1498Szrj
278*38fd1498Szrj /* Called when encountering an explicit task directive. If IF_CLAUSE is
279*38fd1498Szrj false, then we must not delay in executing the task. If UNTIED is true,
280*38fd1498Szrj then the task may be executed by any member of the team.
281*38fd1498Szrj
282*38fd1498Szrj DEPEND is an array containing:
283*38fd1498Szrj depend[0]: number of depend elements.
284*38fd1498Szrj depend[1]: number of depend elements of type "out".
285*38fd1498Szrj depend[2..N+1]: address of [1..N]th depend element. */
286*38fd1498Szrj
287*38fd1498Szrj void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)288*38fd1498Szrj GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289*38fd1498Szrj long arg_size, long arg_align, bool if_clause, unsigned flags,
290*38fd1498Szrj void **depend, int priority)
291*38fd1498Szrj {
292*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
293*38fd1498Szrj struct gomp_team *team = thr->ts.team;
294*38fd1498Szrj
295*38fd1498Szrj #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296*38fd1498Szrj /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297*38fd1498Szrj tied to one thread all the time. This means UNTIED tasks must be
298*38fd1498Szrj tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299*38fd1498Szrj might be running on different thread than FN. */
300*38fd1498Szrj if (cpyfn)
301*38fd1498Szrj if_clause = false;
302*38fd1498Szrj flags &= ~GOMP_TASK_FLAG_UNTIED;
303*38fd1498Szrj #endif
304*38fd1498Szrj
305*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new tasks. */
306*38fd1498Szrj if (team
307*38fd1498Szrj && (gomp_team_barrier_cancelled (&team->barrier)
308*38fd1498Szrj || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309*38fd1498Szrj return;
310*38fd1498Szrj
311*38fd1498Szrj if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312*38fd1498Szrj priority = 0;
313*38fd1498Szrj else if (priority > gomp_max_task_priority_var)
314*38fd1498Szrj priority = gomp_max_task_priority_var;
315*38fd1498Szrj
316*38fd1498Szrj if (!if_clause || team == NULL
317*38fd1498Szrj || (thr->task && thr->task->final_task)
318*38fd1498Szrj || team->task_count > 64 * team->nthreads)
319*38fd1498Szrj {
320*38fd1498Szrj struct gomp_task task;
321*38fd1498Szrj
322*38fd1498Szrj /* If there are depend clauses and earlier deferred sibling tasks
323*38fd1498Szrj with depend clauses, check if there isn't a dependency. If there
324*38fd1498Szrj is, we need to wait for them. There is no need to handle
325*38fd1498Szrj depend clauses for non-deferred tasks other than this, because
326*38fd1498Szrj the parent task is suspended until the child task finishes and thus
327*38fd1498Szrj it can't start further child tasks. */
328*38fd1498Szrj if ((flags & GOMP_TASK_FLAG_DEPEND)
329*38fd1498Szrj && thr->task && thr->task->depend_hash)
330*38fd1498Szrj gomp_task_maybe_wait_for_dependencies (depend);
331*38fd1498Szrj
332*38fd1498Szrj gomp_init_task (&task, thr->task, gomp_icv (false));
333*38fd1498Szrj task.kind = GOMP_TASK_UNDEFERRED;
334*38fd1498Szrj task.final_task = (thr->task && thr->task->final_task)
335*38fd1498Szrj || (flags & GOMP_TASK_FLAG_FINAL);
336*38fd1498Szrj task.priority = priority;
337*38fd1498Szrj if (thr->task)
338*38fd1498Szrj {
339*38fd1498Szrj task.in_tied_task = thr->task->in_tied_task;
340*38fd1498Szrj task.taskgroup = thr->task->taskgroup;
341*38fd1498Szrj }
342*38fd1498Szrj thr->task = &task;
343*38fd1498Szrj if (__builtin_expect (cpyfn != NULL, 0))
344*38fd1498Szrj {
345*38fd1498Szrj char buf[arg_size + arg_align - 1];
346*38fd1498Szrj char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347*38fd1498Szrj & ~(uintptr_t) (arg_align - 1));
348*38fd1498Szrj cpyfn (arg, data);
349*38fd1498Szrj fn (arg);
350*38fd1498Szrj }
351*38fd1498Szrj else
352*38fd1498Szrj fn (data);
353*38fd1498Szrj /* Access to "children" is normally done inside a task_lock
354*38fd1498Szrj mutex region, but the only way this particular task.children
355*38fd1498Szrj can be set is if this thread's task work function (fn)
356*38fd1498Szrj creates children. So since the setter is *this* thread, we
357*38fd1498Szrj need no barriers here when testing for non-NULL. We can have
358*38fd1498Szrj task.children set by the current thread then changed by a
359*38fd1498Szrj child thread, but seeing a stale non-NULL value is not a
360*38fd1498Szrj problem. Once past the task_lock acquisition, this thread
361*38fd1498Szrj will see the real value of task.children. */
362*38fd1498Szrj if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
363*38fd1498Szrj {
364*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
365*38fd1498Szrj gomp_clear_parent (&task.children_queue);
366*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
367*38fd1498Szrj }
368*38fd1498Szrj gomp_end_task ();
369*38fd1498Szrj }
370*38fd1498Szrj else
371*38fd1498Szrj {
372*38fd1498Szrj struct gomp_task *task;
373*38fd1498Szrj struct gomp_task *parent = thr->task;
374*38fd1498Szrj struct gomp_taskgroup *taskgroup = parent->taskgroup;
375*38fd1498Szrj char *arg;
376*38fd1498Szrj bool do_wake;
377*38fd1498Szrj size_t depend_size = 0;
378*38fd1498Szrj
379*38fd1498Szrj if (flags & GOMP_TASK_FLAG_DEPEND)
380*38fd1498Szrj depend_size = ((uintptr_t) depend[0]
381*38fd1498Szrj * sizeof (struct gomp_task_depend_entry));
382*38fd1498Szrj task = gomp_malloc (sizeof (*task) + depend_size
383*38fd1498Szrj + arg_size + arg_align - 1);
384*38fd1498Szrj arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385*38fd1498Szrj & ~(uintptr_t) (arg_align - 1));
386*38fd1498Szrj gomp_init_task (task, parent, gomp_icv (false));
387*38fd1498Szrj task->priority = priority;
388*38fd1498Szrj task->kind = GOMP_TASK_UNDEFERRED;
389*38fd1498Szrj task->in_tied_task = parent->in_tied_task;
390*38fd1498Szrj task->taskgroup = taskgroup;
391*38fd1498Szrj thr->task = task;
392*38fd1498Szrj if (cpyfn)
393*38fd1498Szrj {
394*38fd1498Szrj cpyfn (arg, data);
395*38fd1498Szrj task->copy_ctors_done = true;
396*38fd1498Szrj }
397*38fd1498Szrj else
398*38fd1498Szrj memcpy (arg, data, arg_size);
399*38fd1498Szrj thr->task = parent;
400*38fd1498Szrj task->kind = GOMP_TASK_WAITING;
401*38fd1498Szrj task->fn = fn;
402*38fd1498Szrj task->fn_data = arg;
403*38fd1498Szrj task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
405*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new
406*38fd1498Szrj tasks. */
407*38fd1498Szrj if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408*38fd1498Szrj || (taskgroup && taskgroup->cancelled))
409*38fd1498Szrj && !task->copy_ctors_done, 0))
410*38fd1498Szrj {
411*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
412*38fd1498Szrj gomp_finish_task (task);
413*38fd1498Szrj free (task);
414*38fd1498Szrj return;
415*38fd1498Szrj }
416*38fd1498Szrj if (taskgroup)
417*38fd1498Szrj taskgroup->num_children++;
418*38fd1498Szrj if (depend_size)
419*38fd1498Szrj {
420*38fd1498Szrj gomp_task_handle_depend (task, parent, depend);
421*38fd1498Szrj if (task->num_dependees)
422*38fd1498Szrj {
423*38fd1498Szrj /* Tasks that depend on other tasks are not put into the
424*38fd1498Szrj various waiting queues, so we are done for now. Said
425*38fd1498Szrj tasks are instead put into the queues via
426*38fd1498Szrj gomp_task_run_post_handle_dependers() after their
427*38fd1498Szrj dependencies have been satisfied. After which, they
428*38fd1498Szrj can be picked up by the various scheduling
429*38fd1498Szrj points. */
430*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
431*38fd1498Szrj return;
432*38fd1498Szrj }
433*38fd1498Szrj }
434*38fd1498Szrj
435*38fd1498Szrj priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436*38fd1498Szrj task, priority,
437*38fd1498Szrj PRIORITY_INSERT_BEGIN,
438*38fd1498Szrj /*adjust_parent_depends_on=*/false,
439*38fd1498Szrj task->parent_depends_on);
440*38fd1498Szrj if (taskgroup)
441*38fd1498Szrj priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442*38fd1498Szrj task, priority,
443*38fd1498Szrj PRIORITY_INSERT_BEGIN,
444*38fd1498Szrj /*adjust_parent_depends_on=*/false,
445*38fd1498Szrj task->parent_depends_on);
446*38fd1498Szrj
447*38fd1498Szrj priority_queue_insert (PQ_TEAM, &team->task_queue,
448*38fd1498Szrj task, priority,
449*38fd1498Szrj PRIORITY_INSERT_END,
450*38fd1498Szrj /*adjust_parent_depends_on=*/false,
451*38fd1498Szrj task->parent_depends_on);
452*38fd1498Szrj
453*38fd1498Szrj ++team->task_count;
454*38fd1498Szrj ++team->task_queued_count;
455*38fd1498Szrj gomp_team_barrier_set_task_pending (&team->barrier);
456*38fd1498Szrj do_wake = team->task_running_count + !parent->in_tied_task
457*38fd1498Szrj < team->nthreads;
458*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
459*38fd1498Szrj if (do_wake)
460*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, 1);
461*38fd1498Szrj }
462*38fd1498Szrj }
463*38fd1498Szrj
464*38fd1498Szrj ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)465*38fd1498Szrj ialias (GOMP_taskgroup_end)
466*38fd1498Szrj
467*38fd1498Szrj #define TYPE long
468*38fd1498Szrj #define UTYPE unsigned long
469*38fd1498Szrj #define TYPE_is_long 1
470*38fd1498Szrj #include "taskloop.c"
471*38fd1498Szrj #undef TYPE
472*38fd1498Szrj #undef UTYPE
473*38fd1498Szrj #undef TYPE_is_long
474*38fd1498Szrj
475*38fd1498Szrj #define TYPE unsigned long long
476*38fd1498Szrj #define UTYPE TYPE
477*38fd1498Szrj #define GOMP_taskloop GOMP_taskloop_ull
478*38fd1498Szrj #include "taskloop.c"
479*38fd1498Szrj #undef TYPE
480*38fd1498Szrj #undef UTYPE
481*38fd1498Szrj #undef GOMP_taskloop
482*38fd1498Szrj
483*38fd1498Szrj static void inline
484*38fd1498Szrj priority_queue_move_task_first (enum priority_queue_type type,
485*38fd1498Szrj struct priority_queue *head,
486*38fd1498Szrj struct gomp_task *task)
487*38fd1498Szrj {
488*38fd1498Szrj #if _LIBGOMP_CHECKING_
489*38fd1498Szrj if (!priority_queue_task_in_queue_p (type, head, task))
490*38fd1498Szrj gomp_fatal ("Attempt to move first missing task %p", task);
491*38fd1498Szrj #endif
492*38fd1498Szrj struct priority_list *list;
493*38fd1498Szrj if (priority_queue_multi_p (head))
494*38fd1498Szrj {
495*38fd1498Szrj list = priority_queue_lookup_priority (head, task->priority);
496*38fd1498Szrj #if _LIBGOMP_CHECKING_
497*38fd1498Szrj if (!list)
498*38fd1498Szrj gomp_fatal ("Unable to find priority %d", task->priority);
499*38fd1498Szrj #endif
500*38fd1498Szrj }
501*38fd1498Szrj else
502*38fd1498Szrj list = &head->l;
503*38fd1498Szrj priority_list_remove (list, task_to_priority_node (type, task), 0);
504*38fd1498Szrj priority_list_insert (type, list, task, task->priority,
505*38fd1498Szrj PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506*38fd1498Szrj task->parent_depends_on);
507*38fd1498Szrj }
508*38fd1498Szrj
509*38fd1498Szrj /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510*38fd1498Szrj with team->task_lock held, or is executed in the thread that called
511*38fd1498Szrj gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512*38fd1498Szrj run before it acquires team->task_lock. */
513*38fd1498Szrj
514*38fd1498Szrj static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)515*38fd1498Szrj gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
516*38fd1498Szrj {
517*38fd1498Szrj struct gomp_task *parent = task->parent;
518*38fd1498Szrj if (parent)
519*38fd1498Szrj priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520*38fd1498Szrj task);
521*38fd1498Szrj
522*38fd1498Szrj struct gomp_taskgroup *taskgroup = task->taskgroup;
523*38fd1498Szrj if (taskgroup)
524*38fd1498Szrj priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525*38fd1498Szrj task);
526*38fd1498Szrj
527*38fd1498Szrj priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528*38fd1498Szrj PRIORITY_INSERT_BEGIN, false,
529*38fd1498Szrj task->parent_depends_on);
530*38fd1498Szrj task->kind = GOMP_TASK_WAITING;
531*38fd1498Szrj if (parent && parent->taskwait)
532*38fd1498Szrj {
533*38fd1498Szrj if (parent->taskwait->in_taskwait)
534*38fd1498Szrj {
535*38fd1498Szrj /* One more task has had its dependencies met.
536*38fd1498Szrj Inform any waiters. */
537*38fd1498Szrj parent->taskwait->in_taskwait = false;
538*38fd1498Szrj gomp_sem_post (&parent->taskwait->taskwait_sem);
539*38fd1498Szrj }
540*38fd1498Szrj else if (parent->taskwait->in_depend_wait)
541*38fd1498Szrj {
542*38fd1498Szrj /* One more task has had its dependencies met.
543*38fd1498Szrj Inform any waiters. */
544*38fd1498Szrj parent->taskwait->in_depend_wait = false;
545*38fd1498Szrj gomp_sem_post (&parent->taskwait->taskwait_sem);
546*38fd1498Szrj }
547*38fd1498Szrj }
548*38fd1498Szrj if (taskgroup && taskgroup->in_taskgroup_wait)
549*38fd1498Szrj {
550*38fd1498Szrj /* One more task has had its dependencies met.
551*38fd1498Szrj Inform any waiters. */
552*38fd1498Szrj taskgroup->in_taskgroup_wait = false;
553*38fd1498Szrj gomp_sem_post (&taskgroup->taskgroup_sem);
554*38fd1498Szrj }
555*38fd1498Szrj
556*38fd1498Szrj ++team->task_queued_count;
557*38fd1498Szrj gomp_team_barrier_set_task_pending (&team->barrier);
558*38fd1498Szrj /* I'm afraid this can't be done after releasing team->task_lock,
559*38fd1498Szrj as gomp_target_task_completion is run from unrelated thread and
560*38fd1498Szrj therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561*38fd1498Szrj the team could be gone already. */
562*38fd1498Szrj if (team->nthreads > team->task_running_count)
563*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, 1);
564*38fd1498Szrj }
565*38fd1498Szrj
566*38fd1498Szrj /* Signal that a target task TTASK has completed the asynchronously
567*38fd1498Szrj running phase and should be requeued as a task to handle the
568*38fd1498Szrj variable unmapping. */
569*38fd1498Szrj
570*38fd1498Szrj void
GOMP_PLUGIN_target_task_completion(void * data)571*38fd1498Szrj GOMP_PLUGIN_target_task_completion (void *data)
572*38fd1498Szrj {
573*38fd1498Szrj struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574*38fd1498Szrj struct gomp_task *task = ttask->task;
575*38fd1498Szrj struct gomp_team *team = ttask->team;
576*38fd1498Szrj
577*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
578*38fd1498Szrj if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
579*38fd1498Szrj {
580*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_FINISHED;
581*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
582*38fd1498Szrj return;
583*38fd1498Szrj }
584*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_FINISHED;
585*38fd1498Szrj gomp_target_task_completion (team, task);
586*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
587*38fd1498Szrj }
588*38fd1498Szrj
589*38fd1498Szrj static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
590*38fd1498Szrj
591*38fd1498Szrj /* Called for nowait target tasks. */
592*38fd1498Szrj
593*38fd1498Szrj bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)594*38fd1498Szrj gomp_create_target_task (struct gomp_device_descr *devicep,
595*38fd1498Szrj void (*fn) (void *), size_t mapnum, void **hostaddrs,
596*38fd1498Szrj size_t *sizes, unsigned short *kinds,
597*38fd1498Szrj unsigned int flags, void **depend, void **args,
598*38fd1498Szrj enum gomp_target_task_state state)
599*38fd1498Szrj {
600*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
601*38fd1498Szrj struct gomp_team *team = thr->ts.team;
602*38fd1498Szrj
603*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new tasks. */
604*38fd1498Szrj if (team
605*38fd1498Szrj && (gomp_team_barrier_cancelled (&team->barrier)
606*38fd1498Szrj || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
607*38fd1498Szrj return true;
608*38fd1498Szrj
609*38fd1498Szrj struct gomp_target_task *ttask;
610*38fd1498Szrj struct gomp_task *task;
611*38fd1498Szrj struct gomp_task *parent = thr->task;
612*38fd1498Szrj struct gomp_taskgroup *taskgroup = parent->taskgroup;
613*38fd1498Szrj bool do_wake;
614*38fd1498Szrj size_t depend_size = 0;
615*38fd1498Szrj uintptr_t depend_cnt = 0;
616*38fd1498Szrj size_t tgt_align = 0, tgt_size = 0;
617*38fd1498Szrj
618*38fd1498Szrj if (depend != NULL)
619*38fd1498Szrj {
620*38fd1498Szrj depend_cnt = (uintptr_t) depend[0];
621*38fd1498Szrj depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
622*38fd1498Szrj }
623*38fd1498Szrj if (fn)
624*38fd1498Szrj {
625*38fd1498Szrj /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
626*38fd1498Szrj firstprivate on the target task. */
627*38fd1498Szrj size_t i;
628*38fd1498Szrj for (i = 0; i < mapnum; i++)
629*38fd1498Szrj if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
630*38fd1498Szrj {
631*38fd1498Szrj size_t align = (size_t) 1 << (kinds[i] >> 8);
632*38fd1498Szrj if (tgt_align < align)
633*38fd1498Szrj tgt_align = align;
634*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
635*38fd1498Szrj tgt_size += sizes[i];
636*38fd1498Szrj }
637*38fd1498Szrj if (tgt_align)
638*38fd1498Szrj tgt_size += tgt_align - 1;
639*38fd1498Szrj else
640*38fd1498Szrj tgt_size = 0;
641*38fd1498Szrj }
642*38fd1498Szrj
643*38fd1498Szrj task = gomp_malloc (sizeof (*task) + depend_size
644*38fd1498Szrj + sizeof (*ttask)
645*38fd1498Szrj + mapnum * (sizeof (void *) + sizeof (size_t)
646*38fd1498Szrj + sizeof (unsigned short))
647*38fd1498Szrj + tgt_size);
648*38fd1498Szrj gomp_init_task (task, parent, gomp_icv (false));
649*38fd1498Szrj task->priority = 0;
650*38fd1498Szrj task->kind = GOMP_TASK_WAITING;
651*38fd1498Szrj task->in_tied_task = parent->in_tied_task;
652*38fd1498Szrj task->taskgroup = taskgroup;
653*38fd1498Szrj ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
654*38fd1498Szrj ttask->devicep = devicep;
655*38fd1498Szrj ttask->fn = fn;
656*38fd1498Szrj ttask->mapnum = mapnum;
657*38fd1498Szrj ttask->args = args;
658*38fd1498Szrj memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
659*38fd1498Szrj ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
660*38fd1498Szrj memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
661*38fd1498Szrj ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
662*38fd1498Szrj memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
663*38fd1498Szrj if (tgt_align)
664*38fd1498Szrj {
665*38fd1498Szrj char *tgt = (char *) &ttask->kinds[mapnum];
666*38fd1498Szrj size_t i;
667*38fd1498Szrj uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
668*38fd1498Szrj if (al)
669*38fd1498Szrj tgt += tgt_align - al;
670*38fd1498Szrj tgt_size = 0;
671*38fd1498Szrj for (i = 0; i < mapnum; i++)
672*38fd1498Szrj if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
673*38fd1498Szrj {
674*38fd1498Szrj size_t align = (size_t) 1 << (kinds[i] >> 8);
675*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
676*38fd1498Szrj memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
677*38fd1498Szrj ttask->hostaddrs[i] = tgt + tgt_size;
678*38fd1498Szrj tgt_size = tgt_size + sizes[i];
679*38fd1498Szrj }
680*38fd1498Szrj }
681*38fd1498Szrj ttask->flags = flags;
682*38fd1498Szrj ttask->state = state;
683*38fd1498Szrj ttask->task = task;
684*38fd1498Szrj ttask->team = team;
685*38fd1498Szrj task->fn = NULL;
686*38fd1498Szrj task->fn_data = ttask;
687*38fd1498Szrj task->final_task = 0;
688*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
689*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new tasks. */
690*38fd1498Szrj if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
691*38fd1498Szrj || (taskgroup && taskgroup->cancelled), 0))
692*38fd1498Szrj {
693*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
694*38fd1498Szrj gomp_finish_task (task);
695*38fd1498Szrj free (task);
696*38fd1498Szrj return true;
697*38fd1498Szrj }
698*38fd1498Szrj if (depend_size)
699*38fd1498Szrj {
700*38fd1498Szrj gomp_task_handle_depend (task, parent, depend);
701*38fd1498Szrj if (task->num_dependees)
702*38fd1498Szrj {
703*38fd1498Szrj if (taskgroup)
704*38fd1498Szrj taskgroup->num_children++;
705*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
706*38fd1498Szrj return true;
707*38fd1498Szrj }
708*38fd1498Szrj }
709*38fd1498Szrj if (state == GOMP_TARGET_TASK_DATA)
710*38fd1498Szrj {
711*38fd1498Szrj gomp_task_run_post_handle_depend_hash (task);
712*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
713*38fd1498Szrj gomp_finish_task (task);
714*38fd1498Szrj free (task);
715*38fd1498Szrj return false;
716*38fd1498Szrj }
717*38fd1498Szrj if (taskgroup)
718*38fd1498Szrj taskgroup->num_children++;
719*38fd1498Szrj /* For async offloading, if we don't need to wait for dependencies,
720*38fd1498Szrj run the gomp_target_task_fn right away, essentially schedule the
721*38fd1498Szrj mapping part of the task in the current thread. */
722*38fd1498Szrj if (devicep != NULL
723*38fd1498Szrj && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
724*38fd1498Szrj {
725*38fd1498Szrj priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
726*38fd1498Szrj PRIORITY_INSERT_END,
727*38fd1498Szrj /*adjust_parent_depends_on=*/false,
728*38fd1498Szrj task->parent_depends_on);
729*38fd1498Szrj if (taskgroup)
730*38fd1498Szrj priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
731*38fd1498Szrj task, 0, PRIORITY_INSERT_END,
732*38fd1498Szrj /*adjust_parent_depends_on=*/false,
733*38fd1498Szrj task->parent_depends_on);
734*38fd1498Szrj task->pnode[PQ_TEAM].next = NULL;
735*38fd1498Szrj task->pnode[PQ_TEAM].prev = NULL;
736*38fd1498Szrj task->kind = GOMP_TASK_TIED;
737*38fd1498Szrj ++team->task_count;
738*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
739*38fd1498Szrj
740*38fd1498Szrj thr->task = task;
741*38fd1498Szrj gomp_target_task_fn (task->fn_data);
742*38fd1498Szrj thr->task = parent;
743*38fd1498Szrj
744*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
745*38fd1498Szrj task->kind = GOMP_TASK_ASYNC_RUNNING;
746*38fd1498Szrj /* If GOMP_PLUGIN_target_task_completion has run already
747*38fd1498Szrj in between gomp_target_task_fn and the mutex lock,
748*38fd1498Szrj perform the requeuing here. */
749*38fd1498Szrj if (ttask->state == GOMP_TARGET_TASK_FINISHED)
750*38fd1498Szrj gomp_target_task_completion (team, task);
751*38fd1498Szrj else
752*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_RUNNING;
753*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
754*38fd1498Szrj return true;
755*38fd1498Szrj }
756*38fd1498Szrj priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
757*38fd1498Szrj PRIORITY_INSERT_BEGIN,
758*38fd1498Szrj /*adjust_parent_depends_on=*/false,
759*38fd1498Szrj task->parent_depends_on);
760*38fd1498Szrj if (taskgroup)
761*38fd1498Szrj priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
762*38fd1498Szrj PRIORITY_INSERT_BEGIN,
763*38fd1498Szrj /*adjust_parent_depends_on=*/false,
764*38fd1498Szrj task->parent_depends_on);
765*38fd1498Szrj priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
766*38fd1498Szrj PRIORITY_INSERT_END,
767*38fd1498Szrj /*adjust_parent_depends_on=*/false,
768*38fd1498Szrj task->parent_depends_on);
769*38fd1498Szrj ++team->task_count;
770*38fd1498Szrj ++team->task_queued_count;
771*38fd1498Szrj gomp_team_barrier_set_task_pending (&team->barrier);
772*38fd1498Szrj do_wake = team->task_running_count + !parent->in_tied_task
773*38fd1498Szrj < team->nthreads;
774*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
775*38fd1498Szrj if (do_wake)
776*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, 1);
777*38fd1498Szrj return true;
778*38fd1498Szrj }
779*38fd1498Szrj
780*38fd1498Szrj /* Given a parent_depends_on task in LIST, move it to the front of its
781*38fd1498Szrj priority so it is run as soon as possible.
782*38fd1498Szrj
783*38fd1498Szrj Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
784*38fd1498Szrj
785*38fd1498Szrj We rearrange the queue such that all parent_depends_on tasks are
786*38fd1498Szrj first, and last_parent_depends_on points to the last such task we
787*38fd1498Szrj rearranged. For example, given the following tasks in a queue
788*38fd1498Szrj where PD[123] are the parent_depends_on tasks:
789*38fd1498Szrj
790*38fd1498Szrj task->children
791*38fd1498Szrj |
792*38fd1498Szrj V
793*38fd1498Szrj C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
794*38fd1498Szrj
795*38fd1498Szrj We rearrange such that:
796*38fd1498Szrj
797*38fd1498Szrj task->children
798*38fd1498Szrj | +--- last_parent_depends_on
799*38fd1498Szrj | |
800*38fd1498Szrj V V
801*38fd1498Szrj PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
802*38fd1498Szrj
803*38fd1498Szrj static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)804*38fd1498Szrj priority_list_upgrade_task (struct priority_list *list,
805*38fd1498Szrj struct priority_node *node)
806*38fd1498Szrj {
807*38fd1498Szrj struct priority_node *last_parent_depends_on
808*38fd1498Szrj = list->last_parent_depends_on;
809*38fd1498Szrj if (last_parent_depends_on)
810*38fd1498Szrj {
811*38fd1498Szrj node->prev->next = node->next;
812*38fd1498Szrj node->next->prev = node->prev;
813*38fd1498Szrj node->prev = last_parent_depends_on;
814*38fd1498Szrj node->next = last_parent_depends_on->next;
815*38fd1498Szrj node->prev->next = node;
816*38fd1498Szrj node->next->prev = node;
817*38fd1498Szrj }
818*38fd1498Szrj else if (node != list->tasks)
819*38fd1498Szrj {
820*38fd1498Szrj node->prev->next = node->next;
821*38fd1498Szrj node->next->prev = node->prev;
822*38fd1498Szrj node->prev = list->tasks->prev;
823*38fd1498Szrj node->next = list->tasks;
824*38fd1498Szrj list->tasks = node;
825*38fd1498Szrj node->prev->next = node;
826*38fd1498Szrj node->next->prev = node;
827*38fd1498Szrj }
828*38fd1498Szrj list->last_parent_depends_on = node;
829*38fd1498Szrj }
830*38fd1498Szrj
831*38fd1498Szrj /* Given a parent_depends_on TASK in its parent's children_queue, move
832*38fd1498Szrj it to the front of its priority so it is run as soon as possible.
833*38fd1498Szrj
834*38fd1498Szrj PARENT is passed as an optimization.
835*38fd1498Szrj
836*38fd1498Szrj (This function could be defined in priority_queue.c, but we want it
837*38fd1498Szrj inlined, and putting it in priority_queue.h is not an option, given
838*38fd1498Szrj that gomp_task has not been properly defined at that point). */
839*38fd1498Szrj
840*38fd1498Szrj static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)841*38fd1498Szrj priority_queue_upgrade_task (struct gomp_task *task,
842*38fd1498Szrj struct gomp_task *parent)
843*38fd1498Szrj {
844*38fd1498Szrj struct priority_queue *head = &parent->children_queue;
845*38fd1498Szrj struct priority_node *node = &task->pnode[PQ_CHILDREN];
846*38fd1498Szrj #if _LIBGOMP_CHECKING_
847*38fd1498Szrj if (!task->parent_depends_on)
848*38fd1498Szrj gomp_fatal ("priority_queue_upgrade_task: task must be a "
849*38fd1498Szrj "parent_depends_on task");
850*38fd1498Szrj if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
851*38fd1498Szrj gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
852*38fd1498Szrj #endif
853*38fd1498Szrj if (priority_queue_multi_p (head))
854*38fd1498Szrj {
855*38fd1498Szrj struct priority_list *list
856*38fd1498Szrj = priority_queue_lookup_priority (head, task->priority);
857*38fd1498Szrj priority_list_upgrade_task (list, node);
858*38fd1498Szrj }
859*38fd1498Szrj else
860*38fd1498Szrj priority_list_upgrade_task (&head->l, node);
861*38fd1498Szrj }
862*38fd1498Szrj
863*38fd1498Szrj /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
864*38fd1498Szrj the way in LIST so that other tasks can be considered for
865*38fd1498Szrj execution. LIST contains tasks of type TYPE.
866*38fd1498Szrj
867*38fd1498Szrj Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
868*38fd1498Szrj if applicable. */
869*38fd1498Szrj
870*38fd1498Szrj static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)871*38fd1498Szrj priority_list_downgrade_task (enum priority_queue_type type,
872*38fd1498Szrj struct priority_list *list,
873*38fd1498Szrj struct gomp_task *child_task)
874*38fd1498Szrj {
875*38fd1498Szrj struct priority_node *node = task_to_priority_node (type, child_task);
876*38fd1498Szrj if (list->tasks == node)
877*38fd1498Szrj list->tasks = node->next;
878*38fd1498Szrj else if (node->next != list->tasks)
879*38fd1498Szrj {
880*38fd1498Szrj /* The task in NODE is about to become TIED and TIED tasks
881*38fd1498Szrj cannot come before WAITING tasks. If we're about to
882*38fd1498Szrj leave the queue in such an indeterminate state, rewire
883*38fd1498Szrj things appropriately. However, a TIED task at the end is
884*38fd1498Szrj perfectly fine. */
885*38fd1498Szrj struct gomp_task *next_task = priority_node_to_task (type, node->next);
886*38fd1498Szrj if (next_task->kind == GOMP_TASK_WAITING)
887*38fd1498Szrj {
888*38fd1498Szrj /* Remove from list. */
889*38fd1498Szrj node->prev->next = node->next;
890*38fd1498Szrj node->next->prev = node->prev;
891*38fd1498Szrj /* Rewire at the end. */
892*38fd1498Szrj node->next = list->tasks;
893*38fd1498Szrj node->prev = list->tasks->prev;
894*38fd1498Szrj list->tasks->prev->next = node;
895*38fd1498Szrj list->tasks->prev = node;
896*38fd1498Szrj }
897*38fd1498Szrj }
898*38fd1498Szrj
899*38fd1498Szrj /* If the current task is the last_parent_depends_on for its
900*38fd1498Szrj priority, adjust last_parent_depends_on appropriately. */
901*38fd1498Szrj if (__builtin_expect (child_task->parent_depends_on, 0)
902*38fd1498Szrj && list->last_parent_depends_on == node)
903*38fd1498Szrj {
904*38fd1498Szrj struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
905*38fd1498Szrj if (node->prev != node
906*38fd1498Szrj && prev_child->kind == GOMP_TASK_WAITING
907*38fd1498Szrj && prev_child->parent_depends_on)
908*38fd1498Szrj list->last_parent_depends_on = node->prev;
909*38fd1498Szrj else
910*38fd1498Szrj {
911*38fd1498Szrj /* There are no more parent_depends_on entries waiting
912*38fd1498Szrj to run, clear the list. */
913*38fd1498Szrj list->last_parent_depends_on = NULL;
914*38fd1498Szrj }
915*38fd1498Szrj }
916*38fd1498Szrj }
917*38fd1498Szrj
918*38fd1498Szrj /* Given a TASK in HEAD that is about to be executed, move it out of
919*38fd1498Szrj the way so that other tasks can be considered for execution. HEAD
920*38fd1498Szrj contains tasks of type TYPE.
921*38fd1498Szrj
922*38fd1498Szrj Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
923*38fd1498Szrj if applicable.
924*38fd1498Szrj
925*38fd1498Szrj (This function could be defined in priority_queue.c, but we want it
926*38fd1498Szrj inlined, and putting it in priority_queue.h is not an option, given
927*38fd1498Szrj that gomp_task has not been properly defined at that point). */
928*38fd1498Szrj
929*38fd1498Szrj static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)930*38fd1498Szrj priority_queue_downgrade_task (enum priority_queue_type type,
931*38fd1498Szrj struct priority_queue *head,
932*38fd1498Szrj struct gomp_task *task)
933*38fd1498Szrj {
934*38fd1498Szrj #if _LIBGOMP_CHECKING_
935*38fd1498Szrj if (!priority_queue_task_in_queue_p (type, head, task))
936*38fd1498Szrj gomp_fatal ("Attempt to downgrade missing task %p", task);
937*38fd1498Szrj #endif
938*38fd1498Szrj if (priority_queue_multi_p (head))
939*38fd1498Szrj {
940*38fd1498Szrj struct priority_list *list
941*38fd1498Szrj = priority_queue_lookup_priority (head, task->priority);
942*38fd1498Szrj priority_list_downgrade_task (type, list, task);
943*38fd1498Szrj }
944*38fd1498Szrj else
945*38fd1498Szrj priority_list_downgrade_task (type, &head->l, task);
946*38fd1498Szrj }
947*38fd1498Szrj
948*38fd1498Szrj /* Setup CHILD_TASK to execute. This is done by setting the task to
949*38fd1498Szrj TIED, and updating all relevant queues so that CHILD_TASK is no
950*38fd1498Szrj longer chosen for scheduling. Also, remove CHILD_TASK from the
951*38fd1498Szrj overall team task queue entirely.
952*38fd1498Szrj
953*38fd1498Szrj Return TRUE if task or its containing taskgroup has been
954*38fd1498Szrj cancelled. */
955*38fd1498Szrj
956*38fd1498Szrj static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)957*38fd1498Szrj gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
958*38fd1498Szrj struct gomp_team *team)
959*38fd1498Szrj {
960*38fd1498Szrj #if _LIBGOMP_CHECKING_
961*38fd1498Szrj if (child_task->parent)
962*38fd1498Szrj priority_queue_verify (PQ_CHILDREN,
963*38fd1498Szrj &child_task->parent->children_queue, true);
964*38fd1498Szrj if (child_task->taskgroup)
965*38fd1498Szrj priority_queue_verify (PQ_TASKGROUP,
966*38fd1498Szrj &child_task->taskgroup->taskgroup_queue, false);
967*38fd1498Szrj priority_queue_verify (PQ_TEAM, &team->task_queue, false);
968*38fd1498Szrj #endif
969*38fd1498Szrj
970*38fd1498Szrj /* Task is about to go tied, move it out of the way. */
971*38fd1498Szrj if (parent)
972*38fd1498Szrj priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
973*38fd1498Szrj child_task);
974*38fd1498Szrj
975*38fd1498Szrj /* Task is about to go tied, move it out of the way. */
976*38fd1498Szrj struct gomp_taskgroup *taskgroup = child_task->taskgroup;
977*38fd1498Szrj if (taskgroup)
978*38fd1498Szrj priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
979*38fd1498Szrj child_task);
980*38fd1498Szrj
981*38fd1498Szrj priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
982*38fd1498Szrj MEMMODEL_RELAXED);
983*38fd1498Szrj child_task->pnode[PQ_TEAM].next = NULL;
984*38fd1498Szrj child_task->pnode[PQ_TEAM].prev = NULL;
985*38fd1498Szrj child_task->kind = GOMP_TASK_TIED;
986*38fd1498Szrj
987*38fd1498Szrj if (--team->task_queued_count == 0)
988*38fd1498Szrj gomp_team_barrier_clear_task_pending (&team->barrier);
989*38fd1498Szrj if ((gomp_team_barrier_cancelled (&team->barrier)
990*38fd1498Szrj || (taskgroup && taskgroup->cancelled))
991*38fd1498Szrj && !child_task->copy_ctors_done)
992*38fd1498Szrj return true;
993*38fd1498Szrj return false;
994*38fd1498Szrj }
995*38fd1498Szrj
996*38fd1498Szrj static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)997*38fd1498Szrj gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
998*38fd1498Szrj {
999*38fd1498Szrj struct gomp_task *parent = child_task->parent;
1000*38fd1498Szrj size_t i;
1001*38fd1498Szrj
1002*38fd1498Szrj for (i = 0; i < child_task->depend_count; i++)
1003*38fd1498Szrj if (!child_task->depend[i].redundant)
1004*38fd1498Szrj {
1005*38fd1498Szrj if (child_task->depend[i].next)
1006*38fd1498Szrj child_task->depend[i].next->prev = child_task->depend[i].prev;
1007*38fd1498Szrj if (child_task->depend[i].prev)
1008*38fd1498Szrj child_task->depend[i].prev->next = child_task->depend[i].next;
1009*38fd1498Szrj else
1010*38fd1498Szrj {
1011*38fd1498Szrj hash_entry_type *slot
1012*38fd1498Szrj = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1013*38fd1498Szrj NO_INSERT);
1014*38fd1498Szrj if (*slot != &child_task->depend[i])
1015*38fd1498Szrj abort ();
1016*38fd1498Szrj if (child_task->depend[i].next)
1017*38fd1498Szrj *slot = child_task->depend[i].next;
1018*38fd1498Szrj else
1019*38fd1498Szrj htab_clear_slot (parent->depend_hash, slot);
1020*38fd1498Szrj }
1021*38fd1498Szrj }
1022*38fd1498Szrj }
1023*38fd1498Szrj
1024*38fd1498Szrj /* After a CHILD_TASK has been run, adjust the dependency queue for
1025*38fd1498Szrj each task that depends on CHILD_TASK, to record the fact that there
1026*38fd1498Szrj is one less dependency to worry about. If a task that depended on
1027*38fd1498Szrj CHILD_TASK now has no dependencies, place it in the various queues
1028*38fd1498Szrj so it gets scheduled to run.
1029*38fd1498Szrj
1030*38fd1498Szrj TEAM is the team to which CHILD_TASK belongs to. */
1031*38fd1498Szrj
1032*38fd1498Szrj static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1033*38fd1498Szrj gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1034*38fd1498Szrj struct gomp_team *team)
1035*38fd1498Szrj {
1036*38fd1498Szrj struct gomp_task *parent = child_task->parent;
1037*38fd1498Szrj size_t i, count = child_task->dependers->n_elem, ret = 0;
1038*38fd1498Szrj for (i = 0; i < count; i++)
1039*38fd1498Szrj {
1040*38fd1498Szrj struct gomp_task *task = child_task->dependers->elem[i];
1041*38fd1498Szrj
1042*38fd1498Szrj /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1043*38fd1498Szrj TASK's remaining dependencies. Once TASK has no other
1044*38fd1498Szrj depenencies, put it into the various queues so it will get
1045*38fd1498Szrj scheduled for execution. */
1046*38fd1498Szrj if (--task->num_dependees != 0)
1047*38fd1498Szrj continue;
1048*38fd1498Szrj
1049*38fd1498Szrj struct gomp_taskgroup *taskgroup = task->taskgroup;
1050*38fd1498Szrj if (parent)
1051*38fd1498Szrj {
1052*38fd1498Szrj priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1053*38fd1498Szrj task, task->priority,
1054*38fd1498Szrj PRIORITY_INSERT_BEGIN,
1055*38fd1498Szrj /*adjust_parent_depends_on=*/true,
1056*38fd1498Szrj task->parent_depends_on);
1057*38fd1498Szrj if (parent->taskwait)
1058*38fd1498Szrj {
1059*38fd1498Szrj if (parent->taskwait->in_taskwait)
1060*38fd1498Szrj {
1061*38fd1498Szrj /* One more task has had its dependencies met.
1062*38fd1498Szrj Inform any waiters. */
1063*38fd1498Szrj parent->taskwait->in_taskwait = false;
1064*38fd1498Szrj gomp_sem_post (&parent->taskwait->taskwait_sem);
1065*38fd1498Szrj }
1066*38fd1498Szrj else if (parent->taskwait->in_depend_wait)
1067*38fd1498Szrj {
1068*38fd1498Szrj /* One more task has had its dependencies met.
1069*38fd1498Szrj Inform any waiters. */
1070*38fd1498Szrj parent->taskwait->in_depend_wait = false;
1071*38fd1498Szrj gomp_sem_post (&parent->taskwait->taskwait_sem);
1072*38fd1498Szrj }
1073*38fd1498Szrj }
1074*38fd1498Szrj }
1075*38fd1498Szrj if (taskgroup)
1076*38fd1498Szrj {
1077*38fd1498Szrj priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1078*38fd1498Szrj task, task->priority,
1079*38fd1498Szrj PRIORITY_INSERT_BEGIN,
1080*38fd1498Szrj /*adjust_parent_depends_on=*/false,
1081*38fd1498Szrj task->parent_depends_on);
1082*38fd1498Szrj if (taskgroup->in_taskgroup_wait)
1083*38fd1498Szrj {
1084*38fd1498Szrj /* One more task has had its dependencies met.
1085*38fd1498Szrj Inform any waiters. */
1086*38fd1498Szrj taskgroup->in_taskgroup_wait = false;
1087*38fd1498Szrj gomp_sem_post (&taskgroup->taskgroup_sem);
1088*38fd1498Szrj }
1089*38fd1498Szrj }
1090*38fd1498Szrj priority_queue_insert (PQ_TEAM, &team->task_queue,
1091*38fd1498Szrj task, task->priority,
1092*38fd1498Szrj PRIORITY_INSERT_END,
1093*38fd1498Szrj /*adjust_parent_depends_on=*/false,
1094*38fd1498Szrj task->parent_depends_on);
1095*38fd1498Szrj ++team->task_count;
1096*38fd1498Szrj ++team->task_queued_count;
1097*38fd1498Szrj ++ret;
1098*38fd1498Szrj }
1099*38fd1498Szrj free (child_task->dependers);
1100*38fd1498Szrj child_task->dependers = NULL;
1101*38fd1498Szrj if (ret > 1)
1102*38fd1498Szrj gomp_team_barrier_set_task_pending (&team->barrier);
1103*38fd1498Szrj return ret;
1104*38fd1498Szrj }
1105*38fd1498Szrj
1106*38fd1498Szrj static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1107*38fd1498Szrj gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1108*38fd1498Szrj struct gomp_team *team)
1109*38fd1498Szrj {
1110*38fd1498Szrj if (child_task->depend_count == 0)
1111*38fd1498Szrj return 0;
1112*38fd1498Szrj
1113*38fd1498Szrj /* If parent is gone already, the hash table is freed and nothing
1114*38fd1498Szrj will use the hash table anymore, no need to remove anything from it. */
1115*38fd1498Szrj if (child_task->parent != NULL)
1116*38fd1498Szrj gomp_task_run_post_handle_depend_hash (child_task);
1117*38fd1498Szrj
1118*38fd1498Szrj if (child_task->dependers == NULL)
1119*38fd1498Szrj return 0;
1120*38fd1498Szrj
1121*38fd1498Szrj return gomp_task_run_post_handle_dependers (child_task, team);
1122*38fd1498Szrj }
1123*38fd1498Szrj
1124*38fd1498Szrj /* Remove CHILD_TASK from its parent. */
1125*38fd1498Szrj
1126*38fd1498Szrj static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1127*38fd1498Szrj gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1128*38fd1498Szrj {
1129*38fd1498Szrj struct gomp_task *parent = child_task->parent;
1130*38fd1498Szrj if (parent == NULL)
1131*38fd1498Szrj return;
1132*38fd1498Szrj
1133*38fd1498Szrj /* If this was the last task the parent was depending on,
1134*38fd1498Szrj synchronize with gomp_task_maybe_wait_for_dependencies so it can
1135*38fd1498Szrj clean up and return. */
1136*38fd1498Szrj if (__builtin_expect (child_task->parent_depends_on, 0)
1137*38fd1498Szrj && --parent->taskwait->n_depend == 0
1138*38fd1498Szrj && parent->taskwait->in_depend_wait)
1139*38fd1498Szrj {
1140*38fd1498Szrj parent->taskwait->in_depend_wait = false;
1141*38fd1498Szrj gomp_sem_post (&parent->taskwait->taskwait_sem);
1142*38fd1498Szrj }
1143*38fd1498Szrj
1144*38fd1498Szrj if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1145*38fd1498Szrj child_task, MEMMODEL_RELEASE)
1146*38fd1498Szrj && parent->taskwait && parent->taskwait->in_taskwait)
1147*38fd1498Szrj {
1148*38fd1498Szrj parent->taskwait->in_taskwait = false;
1149*38fd1498Szrj gomp_sem_post (&parent->taskwait->taskwait_sem);
1150*38fd1498Szrj }
1151*38fd1498Szrj child_task->pnode[PQ_CHILDREN].next = NULL;
1152*38fd1498Szrj child_task->pnode[PQ_CHILDREN].prev = NULL;
1153*38fd1498Szrj }
1154*38fd1498Szrj
1155*38fd1498Szrj /* Remove CHILD_TASK from its taskgroup. */
1156*38fd1498Szrj
1157*38fd1498Szrj static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1158*38fd1498Szrj gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1159*38fd1498Szrj {
1160*38fd1498Szrj struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1161*38fd1498Szrj if (taskgroup == NULL)
1162*38fd1498Szrj return;
1163*38fd1498Szrj bool empty = priority_queue_remove (PQ_TASKGROUP,
1164*38fd1498Szrj &taskgroup->taskgroup_queue,
1165*38fd1498Szrj child_task, MEMMODEL_RELAXED);
1166*38fd1498Szrj child_task->pnode[PQ_TASKGROUP].next = NULL;
1167*38fd1498Szrj child_task->pnode[PQ_TASKGROUP].prev = NULL;
1168*38fd1498Szrj if (taskgroup->num_children > 1)
1169*38fd1498Szrj --taskgroup->num_children;
1170*38fd1498Szrj else
1171*38fd1498Szrj {
1172*38fd1498Szrj /* We access taskgroup->num_children in GOMP_taskgroup_end
1173*38fd1498Szrj outside of the task lock mutex region, so
1174*38fd1498Szrj need a release barrier here to ensure memory
1175*38fd1498Szrj written by child_task->fn above is flushed
1176*38fd1498Szrj before the NULL is written. */
1177*38fd1498Szrj __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1178*38fd1498Szrj }
1179*38fd1498Szrj if (empty && taskgroup->in_taskgroup_wait)
1180*38fd1498Szrj {
1181*38fd1498Szrj taskgroup->in_taskgroup_wait = false;
1182*38fd1498Szrj gomp_sem_post (&taskgroup->taskgroup_sem);
1183*38fd1498Szrj }
1184*38fd1498Szrj }
1185*38fd1498Szrj
1186*38fd1498Szrj void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1187*38fd1498Szrj gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1188*38fd1498Szrj {
1189*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1190*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1191*38fd1498Szrj struct gomp_task *task = thr->task;
1192*38fd1498Szrj struct gomp_task *child_task = NULL;
1193*38fd1498Szrj struct gomp_task *to_free = NULL;
1194*38fd1498Szrj int do_wake = 0;
1195*38fd1498Szrj
1196*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1197*38fd1498Szrj if (gomp_barrier_last_thread (state))
1198*38fd1498Szrj {
1199*38fd1498Szrj if (team->task_count == 0)
1200*38fd1498Szrj {
1201*38fd1498Szrj gomp_team_barrier_done (&team->barrier, state);
1202*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1203*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, 0);
1204*38fd1498Szrj return;
1205*38fd1498Szrj }
1206*38fd1498Szrj gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1207*38fd1498Szrj }
1208*38fd1498Szrj
1209*38fd1498Szrj while (1)
1210*38fd1498Szrj {
1211*38fd1498Szrj bool cancelled = false;
1212*38fd1498Szrj if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1213*38fd1498Szrj {
1214*38fd1498Szrj bool ignored;
1215*38fd1498Szrj child_task
1216*38fd1498Szrj = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1217*38fd1498Szrj PQ_IGNORED, NULL,
1218*38fd1498Szrj &ignored);
1219*38fd1498Szrj cancelled = gomp_task_run_pre (child_task, child_task->parent,
1220*38fd1498Szrj team);
1221*38fd1498Szrj if (__builtin_expect (cancelled, 0))
1222*38fd1498Szrj {
1223*38fd1498Szrj if (to_free)
1224*38fd1498Szrj {
1225*38fd1498Szrj gomp_finish_task (to_free);
1226*38fd1498Szrj free (to_free);
1227*38fd1498Szrj to_free = NULL;
1228*38fd1498Szrj }
1229*38fd1498Szrj goto finish_cancelled;
1230*38fd1498Szrj }
1231*38fd1498Szrj team->task_running_count++;
1232*38fd1498Szrj child_task->in_tied_task = true;
1233*38fd1498Szrj }
1234*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1235*38fd1498Szrj if (do_wake)
1236*38fd1498Szrj {
1237*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, do_wake);
1238*38fd1498Szrj do_wake = 0;
1239*38fd1498Szrj }
1240*38fd1498Szrj if (to_free)
1241*38fd1498Szrj {
1242*38fd1498Szrj gomp_finish_task (to_free);
1243*38fd1498Szrj free (to_free);
1244*38fd1498Szrj to_free = NULL;
1245*38fd1498Szrj }
1246*38fd1498Szrj if (child_task)
1247*38fd1498Szrj {
1248*38fd1498Szrj thr->task = child_task;
1249*38fd1498Szrj if (__builtin_expect (child_task->fn == NULL, 0))
1250*38fd1498Szrj {
1251*38fd1498Szrj if (gomp_target_task_fn (child_task->fn_data))
1252*38fd1498Szrj {
1253*38fd1498Szrj thr->task = task;
1254*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1255*38fd1498Szrj child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1256*38fd1498Szrj team->task_running_count--;
1257*38fd1498Szrj struct gomp_target_task *ttask
1258*38fd1498Szrj = (struct gomp_target_task *) child_task->fn_data;
1259*38fd1498Szrj /* If GOMP_PLUGIN_target_task_completion has run already
1260*38fd1498Szrj in between gomp_target_task_fn and the mutex lock,
1261*38fd1498Szrj perform the requeuing here. */
1262*38fd1498Szrj if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1263*38fd1498Szrj gomp_target_task_completion (team, child_task);
1264*38fd1498Szrj else
1265*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_RUNNING;
1266*38fd1498Szrj child_task = NULL;
1267*38fd1498Szrj continue;
1268*38fd1498Szrj }
1269*38fd1498Szrj }
1270*38fd1498Szrj else
1271*38fd1498Szrj child_task->fn (child_task->fn_data);
1272*38fd1498Szrj thr->task = task;
1273*38fd1498Szrj }
1274*38fd1498Szrj else
1275*38fd1498Szrj return;
1276*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1277*38fd1498Szrj if (child_task)
1278*38fd1498Szrj {
1279*38fd1498Szrj finish_cancelled:;
1280*38fd1498Szrj size_t new_tasks
1281*38fd1498Szrj = gomp_task_run_post_handle_depend (child_task, team);
1282*38fd1498Szrj gomp_task_run_post_remove_parent (child_task);
1283*38fd1498Szrj gomp_clear_parent (&child_task->children_queue);
1284*38fd1498Szrj gomp_task_run_post_remove_taskgroup (child_task);
1285*38fd1498Szrj to_free = child_task;
1286*38fd1498Szrj child_task = NULL;
1287*38fd1498Szrj if (!cancelled)
1288*38fd1498Szrj team->task_running_count--;
1289*38fd1498Szrj if (new_tasks > 1)
1290*38fd1498Szrj {
1291*38fd1498Szrj do_wake = team->nthreads - team->task_running_count;
1292*38fd1498Szrj if (do_wake > new_tasks)
1293*38fd1498Szrj do_wake = new_tasks;
1294*38fd1498Szrj }
1295*38fd1498Szrj if (--team->task_count == 0
1296*38fd1498Szrj && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1297*38fd1498Szrj {
1298*38fd1498Szrj gomp_team_barrier_done (&team->barrier, state);
1299*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1300*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, 0);
1301*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1302*38fd1498Szrj }
1303*38fd1498Szrj }
1304*38fd1498Szrj }
1305*38fd1498Szrj }
1306*38fd1498Szrj
1307*38fd1498Szrj /* Called when encountering a taskwait directive.
1308*38fd1498Szrj
1309*38fd1498Szrj Wait for all children of the current task. */
1310*38fd1498Szrj
1311*38fd1498Szrj void
GOMP_taskwait(void)1312*38fd1498Szrj GOMP_taskwait (void)
1313*38fd1498Szrj {
1314*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1315*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1316*38fd1498Szrj struct gomp_task *task = thr->task;
1317*38fd1498Szrj struct gomp_task *child_task = NULL;
1318*38fd1498Szrj struct gomp_task *to_free = NULL;
1319*38fd1498Szrj struct gomp_taskwait taskwait;
1320*38fd1498Szrj int do_wake = 0;
1321*38fd1498Szrj
1322*38fd1498Szrj /* The acquire barrier on load of task->children here synchronizes
1323*38fd1498Szrj with the write of a NULL in gomp_task_run_post_remove_parent. It is
1324*38fd1498Szrj not necessary that we synchronize with other non-NULL writes at
1325*38fd1498Szrj this point, but we must ensure that all writes to memory by a
1326*38fd1498Szrj child thread task work function are seen before we exit from
1327*38fd1498Szrj GOMP_taskwait. */
1328*38fd1498Szrj if (task == NULL
1329*38fd1498Szrj || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1330*38fd1498Szrj return;
1331*38fd1498Szrj
1332*38fd1498Szrj memset (&taskwait, 0, sizeof (taskwait));
1333*38fd1498Szrj bool child_q = false;
1334*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1335*38fd1498Szrj while (1)
1336*38fd1498Szrj {
1337*38fd1498Szrj bool cancelled = false;
1338*38fd1498Szrj if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1339*38fd1498Szrj {
1340*38fd1498Szrj bool destroy_taskwait = task->taskwait != NULL;
1341*38fd1498Szrj task->taskwait = NULL;
1342*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1343*38fd1498Szrj if (to_free)
1344*38fd1498Szrj {
1345*38fd1498Szrj gomp_finish_task (to_free);
1346*38fd1498Szrj free (to_free);
1347*38fd1498Szrj }
1348*38fd1498Szrj if (destroy_taskwait)
1349*38fd1498Szrj gomp_sem_destroy (&taskwait.taskwait_sem);
1350*38fd1498Szrj return;
1351*38fd1498Szrj }
1352*38fd1498Szrj struct gomp_task *next_task
1353*38fd1498Szrj = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1354*38fd1498Szrj PQ_TEAM, &team->task_queue, &child_q);
1355*38fd1498Szrj if (next_task->kind == GOMP_TASK_WAITING)
1356*38fd1498Szrj {
1357*38fd1498Szrj child_task = next_task;
1358*38fd1498Szrj cancelled
1359*38fd1498Szrj = gomp_task_run_pre (child_task, task, team);
1360*38fd1498Szrj if (__builtin_expect (cancelled, 0))
1361*38fd1498Szrj {
1362*38fd1498Szrj if (to_free)
1363*38fd1498Szrj {
1364*38fd1498Szrj gomp_finish_task (to_free);
1365*38fd1498Szrj free (to_free);
1366*38fd1498Szrj to_free = NULL;
1367*38fd1498Szrj }
1368*38fd1498Szrj goto finish_cancelled;
1369*38fd1498Szrj }
1370*38fd1498Szrj }
1371*38fd1498Szrj else
1372*38fd1498Szrj {
1373*38fd1498Szrj /* All tasks we are waiting for are either running in other
1374*38fd1498Szrj threads, or they are tasks that have not had their
1375*38fd1498Szrj dependencies met (so they're not even in the queue). Wait
1376*38fd1498Szrj for them. */
1377*38fd1498Szrj if (task->taskwait == NULL)
1378*38fd1498Szrj {
1379*38fd1498Szrj taskwait.in_depend_wait = false;
1380*38fd1498Szrj gomp_sem_init (&taskwait.taskwait_sem, 0);
1381*38fd1498Szrj task->taskwait = &taskwait;
1382*38fd1498Szrj }
1383*38fd1498Szrj taskwait.in_taskwait = true;
1384*38fd1498Szrj }
1385*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1386*38fd1498Szrj if (do_wake)
1387*38fd1498Szrj {
1388*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, do_wake);
1389*38fd1498Szrj do_wake = 0;
1390*38fd1498Szrj }
1391*38fd1498Szrj if (to_free)
1392*38fd1498Szrj {
1393*38fd1498Szrj gomp_finish_task (to_free);
1394*38fd1498Szrj free (to_free);
1395*38fd1498Szrj to_free = NULL;
1396*38fd1498Szrj }
1397*38fd1498Szrj if (child_task)
1398*38fd1498Szrj {
1399*38fd1498Szrj thr->task = child_task;
1400*38fd1498Szrj if (__builtin_expect (child_task->fn == NULL, 0))
1401*38fd1498Szrj {
1402*38fd1498Szrj if (gomp_target_task_fn (child_task->fn_data))
1403*38fd1498Szrj {
1404*38fd1498Szrj thr->task = task;
1405*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1406*38fd1498Szrj child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1407*38fd1498Szrj struct gomp_target_task *ttask
1408*38fd1498Szrj = (struct gomp_target_task *) child_task->fn_data;
1409*38fd1498Szrj /* If GOMP_PLUGIN_target_task_completion has run already
1410*38fd1498Szrj in between gomp_target_task_fn and the mutex lock,
1411*38fd1498Szrj perform the requeuing here. */
1412*38fd1498Szrj if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1413*38fd1498Szrj gomp_target_task_completion (team, child_task);
1414*38fd1498Szrj else
1415*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_RUNNING;
1416*38fd1498Szrj child_task = NULL;
1417*38fd1498Szrj continue;
1418*38fd1498Szrj }
1419*38fd1498Szrj }
1420*38fd1498Szrj else
1421*38fd1498Szrj child_task->fn (child_task->fn_data);
1422*38fd1498Szrj thr->task = task;
1423*38fd1498Szrj }
1424*38fd1498Szrj else
1425*38fd1498Szrj gomp_sem_wait (&taskwait.taskwait_sem);
1426*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1427*38fd1498Szrj if (child_task)
1428*38fd1498Szrj {
1429*38fd1498Szrj finish_cancelled:;
1430*38fd1498Szrj size_t new_tasks
1431*38fd1498Szrj = gomp_task_run_post_handle_depend (child_task, team);
1432*38fd1498Szrj
1433*38fd1498Szrj if (child_q)
1434*38fd1498Szrj {
1435*38fd1498Szrj priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1436*38fd1498Szrj child_task, MEMMODEL_RELAXED);
1437*38fd1498Szrj child_task->pnode[PQ_CHILDREN].next = NULL;
1438*38fd1498Szrj child_task->pnode[PQ_CHILDREN].prev = NULL;
1439*38fd1498Szrj }
1440*38fd1498Szrj
1441*38fd1498Szrj gomp_clear_parent (&child_task->children_queue);
1442*38fd1498Szrj
1443*38fd1498Szrj gomp_task_run_post_remove_taskgroup (child_task);
1444*38fd1498Szrj
1445*38fd1498Szrj to_free = child_task;
1446*38fd1498Szrj child_task = NULL;
1447*38fd1498Szrj team->task_count--;
1448*38fd1498Szrj if (new_tasks > 1)
1449*38fd1498Szrj {
1450*38fd1498Szrj do_wake = team->nthreads - team->task_running_count
1451*38fd1498Szrj - !task->in_tied_task;
1452*38fd1498Szrj if (do_wake > new_tasks)
1453*38fd1498Szrj do_wake = new_tasks;
1454*38fd1498Szrj }
1455*38fd1498Szrj }
1456*38fd1498Szrj }
1457*38fd1498Szrj }
1458*38fd1498Szrj
1459*38fd1498Szrj /* An undeferred task is about to run. Wait for all tasks that this
1460*38fd1498Szrj undeferred task depends on.
1461*38fd1498Szrj
1462*38fd1498Szrj This is done by first putting all known ready dependencies
1463*38fd1498Szrj (dependencies that have their own dependencies met) at the top of
1464*38fd1498Szrj the scheduling queues. Then we iterate through these imminently
1465*38fd1498Szrj ready tasks (and possibly other high priority tasks), and run them.
1466*38fd1498Szrj If we run out of ready dependencies to execute, we either wait for
1467*38fd1498Szrj the reamining dependencies to finish, or wait for them to get
1468*38fd1498Szrj scheduled so we can run them.
1469*38fd1498Szrj
1470*38fd1498Szrj DEPEND is as in GOMP_task. */
1471*38fd1498Szrj
1472*38fd1498Szrj void
gomp_task_maybe_wait_for_dependencies(void ** depend)1473*38fd1498Szrj gomp_task_maybe_wait_for_dependencies (void **depend)
1474*38fd1498Szrj {
1475*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1476*38fd1498Szrj struct gomp_task *task = thr->task;
1477*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1478*38fd1498Szrj struct gomp_task_depend_entry elem, *ent = NULL;
1479*38fd1498Szrj struct gomp_taskwait taskwait;
1480*38fd1498Szrj size_t ndepend = (uintptr_t) depend[0];
1481*38fd1498Szrj size_t nout = (uintptr_t) depend[1];
1482*38fd1498Szrj size_t i;
1483*38fd1498Szrj size_t num_awaited = 0;
1484*38fd1498Szrj struct gomp_task *child_task = NULL;
1485*38fd1498Szrj struct gomp_task *to_free = NULL;
1486*38fd1498Szrj int do_wake = 0;
1487*38fd1498Szrj
1488*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1489*38fd1498Szrj for (i = 0; i < ndepend; i++)
1490*38fd1498Szrj {
1491*38fd1498Szrj elem.addr = depend[i + 2];
1492*38fd1498Szrj ent = htab_find (task->depend_hash, &elem);
1493*38fd1498Szrj for (; ent; ent = ent->next)
1494*38fd1498Szrj if (i >= nout && ent->is_in)
1495*38fd1498Szrj continue;
1496*38fd1498Szrj else
1497*38fd1498Szrj {
1498*38fd1498Szrj struct gomp_task *tsk = ent->task;
1499*38fd1498Szrj if (!tsk->parent_depends_on)
1500*38fd1498Szrj {
1501*38fd1498Szrj tsk->parent_depends_on = true;
1502*38fd1498Szrj ++num_awaited;
1503*38fd1498Szrj /* If depenency TSK itself has no dependencies and is
1504*38fd1498Szrj ready to run, move it up front so that we run it as
1505*38fd1498Szrj soon as possible. */
1506*38fd1498Szrj if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1507*38fd1498Szrj priority_queue_upgrade_task (tsk, task);
1508*38fd1498Szrj }
1509*38fd1498Szrj }
1510*38fd1498Szrj }
1511*38fd1498Szrj if (num_awaited == 0)
1512*38fd1498Szrj {
1513*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1514*38fd1498Szrj return;
1515*38fd1498Szrj }
1516*38fd1498Szrj
1517*38fd1498Szrj memset (&taskwait, 0, sizeof (taskwait));
1518*38fd1498Szrj taskwait.n_depend = num_awaited;
1519*38fd1498Szrj gomp_sem_init (&taskwait.taskwait_sem, 0);
1520*38fd1498Szrj task->taskwait = &taskwait;
1521*38fd1498Szrj
1522*38fd1498Szrj while (1)
1523*38fd1498Szrj {
1524*38fd1498Szrj bool cancelled = false;
1525*38fd1498Szrj if (taskwait.n_depend == 0)
1526*38fd1498Szrj {
1527*38fd1498Szrj task->taskwait = NULL;
1528*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1529*38fd1498Szrj if (to_free)
1530*38fd1498Szrj {
1531*38fd1498Szrj gomp_finish_task (to_free);
1532*38fd1498Szrj free (to_free);
1533*38fd1498Szrj }
1534*38fd1498Szrj gomp_sem_destroy (&taskwait.taskwait_sem);
1535*38fd1498Szrj return;
1536*38fd1498Szrj }
1537*38fd1498Szrj
1538*38fd1498Szrj /* Theoretically when we have multiple priorities, we should
1539*38fd1498Szrj chose between the highest priority item in
1540*38fd1498Szrj task->children_queue and team->task_queue here, so we should
1541*38fd1498Szrj use priority_queue_next_task(). However, since we are
1542*38fd1498Szrj running an undeferred task, perhaps that makes all tasks it
1543*38fd1498Szrj depends on undeferred, thus a priority of INF? This would
1544*38fd1498Szrj make it unnecessary to take anything into account here,
1545*38fd1498Szrj but the dependencies.
1546*38fd1498Szrj
1547*38fd1498Szrj On the other hand, if we want to use priority_queue_next_task(),
1548*38fd1498Szrj care should be taken to only use priority_queue_remove()
1549*38fd1498Szrj below if the task was actually removed from the children
1550*38fd1498Szrj queue. */
1551*38fd1498Szrj bool ignored;
1552*38fd1498Szrj struct gomp_task *next_task
1553*38fd1498Szrj = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1554*38fd1498Szrj PQ_IGNORED, NULL, &ignored);
1555*38fd1498Szrj
1556*38fd1498Szrj if (next_task->kind == GOMP_TASK_WAITING)
1557*38fd1498Szrj {
1558*38fd1498Szrj child_task = next_task;
1559*38fd1498Szrj cancelled
1560*38fd1498Szrj = gomp_task_run_pre (child_task, task, team);
1561*38fd1498Szrj if (__builtin_expect (cancelled, 0))
1562*38fd1498Szrj {
1563*38fd1498Szrj if (to_free)
1564*38fd1498Szrj {
1565*38fd1498Szrj gomp_finish_task (to_free);
1566*38fd1498Szrj free (to_free);
1567*38fd1498Szrj to_free = NULL;
1568*38fd1498Szrj }
1569*38fd1498Szrj goto finish_cancelled;
1570*38fd1498Szrj }
1571*38fd1498Szrj }
1572*38fd1498Szrj else
1573*38fd1498Szrj /* All tasks we are waiting for are either running in other
1574*38fd1498Szrj threads, or they are tasks that have not had their
1575*38fd1498Szrj dependencies met (so they're not even in the queue). Wait
1576*38fd1498Szrj for them. */
1577*38fd1498Szrj taskwait.in_depend_wait = true;
1578*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1579*38fd1498Szrj if (do_wake)
1580*38fd1498Szrj {
1581*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, do_wake);
1582*38fd1498Szrj do_wake = 0;
1583*38fd1498Szrj }
1584*38fd1498Szrj if (to_free)
1585*38fd1498Szrj {
1586*38fd1498Szrj gomp_finish_task (to_free);
1587*38fd1498Szrj free (to_free);
1588*38fd1498Szrj to_free = NULL;
1589*38fd1498Szrj }
1590*38fd1498Szrj if (child_task)
1591*38fd1498Szrj {
1592*38fd1498Szrj thr->task = child_task;
1593*38fd1498Szrj if (__builtin_expect (child_task->fn == NULL, 0))
1594*38fd1498Szrj {
1595*38fd1498Szrj if (gomp_target_task_fn (child_task->fn_data))
1596*38fd1498Szrj {
1597*38fd1498Szrj thr->task = task;
1598*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1599*38fd1498Szrj child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1600*38fd1498Szrj struct gomp_target_task *ttask
1601*38fd1498Szrj = (struct gomp_target_task *) child_task->fn_data;
1602*38fd1498Szrj /* If GOMP_PLUGIN_target_task_completion has run already
1603*38fd1498Szrj in between gomp_target_task_fn and the mutex lock,
1604*38fd1498Szrj perform the requeuing here. */
1605*38fd1498Szrj if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1606*38fd1498Szrj gomp_target_task_completion (team, child_task);
1607*38fd1498Szrj else
1608*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_RUNNING;
1609*38fd1498Szrj child_task = NULL;
1610*38fd1498Szrj continue;
1611*38fd1498Szrj }
1612*38fd1498Szrj }
1613*38fd1498Szrj else
1614*38fd1498Szrj child_task->fn (child_task->fn_data);
1615*38fd1498Szrj thr->task = task;
1616*38fd1498Szrj }
1617*38fd1498Szrj else
1618*38fd1498Szrj gomp_sem_wait (&taskwait.taskwait_sem);
1619*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1620*38fd1498Szrj if (child_task)
1621*38fd1498Szrj {
1622*38fd1498Szrj finish_cancelled:;
1623*38fd1498Szrj size_t new_tasks
1624*38fd1498Szrj = gomp_task_run_post_handle_depend (child_task, team);
1625*38fd1498Szrj if (child_task->parent_depends_on)
1626*38fd1498Szrj --taskwait.n_depend;
1627*38fd1498Szrj
1628*38fd1498Szrj priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1629*38fd1498Szrj child_task, MEMMODEL_RELAXED);
1630*38fd1498Szrj child_task->pnode[PQ_CHILDREN].next = NULL;
1631*38fd1498Szrj child_task->pnode[PQ_CHILDREN].prev = NULL;
1632*38fd1498Szrj
1633*38fd1498Szrj gomp_clear_parent (&child_task->children_queue);
1634*38fd1498Szrj gomp_task_run_post_remove_taskgroup (child_task);
1635*38fd1498Szrj to_free = child_task;
1636*38fd1498Szrj child_task = NULL;
1637*38fd1498Szrj team->task_count--;
1638*38fd1498Szrj if (new_tasks > 1)
1639*38fd1498Szrj {
1640*38fd1498Szrj do_wake = team->nthreads - team->task_running_count
1641*38fd1498Szrj - !task->in_tied_task;
1642*38fd1498Szrj if (do_wake > new_tasks)
1643*38fd1498Szrj do_wake = new_tasks;
1644*38fd1498Szrj }
1645*38fd1498Szrj }
1646*38fd1498Szrj }
1647*38fd1498Szrj }
1648*38fd1498Szrj
1649*38fd1498Szrj /* Called when encountering a taskyield directive. */
1650*38fd1498Szrj
1651*38fd1498Szrj void
GOMP_taskyield(void)1652*38fd1498Szrj GOMP_taskyield (void)
1653*38fd1498Szrj {
1654*38fd1498Szrj /* Nothing at the moment. */
1655*38fd1498Szrj }
1656*38fd1498Szrj
1657*38fd1498Szrj void
GOMP_taskgroup_start(void)1658*38fd1498Szrj GOMP_taskgroup_start (void)
1659*38fd1498Szrj {
1660*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1661*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1662*38fd1498Szrj struct gomp_task *task = thr->task;
1663*38fd1498Szrj struct gomp_taskgroup *taskgroup;
1664*38fd1498Szrj
1665*38fd1498Szrj /* If team is NULL, all tasks are executed as
1666*38fd1498Szrj GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1667*38fd1498Szrj taskgroup and their descendant tasks will be finished
1668*38fd1498Szrj by the time GOMP_taskgroup_end is called. */
1669*38fd1498Szrj if (team == NULL)
1670*38fd1498Szrj return;
1671*38fd1498Szrj taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1672*38fd1498Szrj taskgroup->prev = task->taskgroup;
1673*38fd1498Szrj priority_queue_init (&taskgroup->taskgroup_queue);
1674*38fd1498Szrj taskgroup->in_taskgroup_wait = false;
1675*38fd1498Szrj taskgroup->cancelled = false;
1676*38fd1498Szrj taskgroup->num_children = 0;
1677*38fd1498Szrj gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1678*38fd1498Szrj task->taskgroup = taskgroup;
1679*38fd1498Szrj }
1680*38fd1498Szrj
1681*38fd1498Szrj void
GOMP_taskgroup_end(void)1682*38fd1498Szrj GOMP_taskgroup_end (void)
1683*38fd1498Szrj {
1684*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1685*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1686*38fd1498Szrj struct gomp_task *task = thr->task;
1687*38fd1498Szrj struct gomp_taskgroup *taskgroup;
1688*38fd1498Szrj struct gomp_task *child_task = NULL;
1689*38fd1498Szrj struct gomp_task *to_free = NULL;
1690*38fd1498Szrj int do_wake = 0;
1691*38fd1498Szrj
1692*38fd1498Szrj if (team == NULL)
1693*38fd1498Szrj return;
1694*38fd1498Szrj taskgroup = task->taskgroup;
1695*38fd1498Szrj if (__builtin_expect (taskgroup == NULL, 0)
1696*38fd1498Szrj && thr->ts.level == 0)
1697*38fd1498Szrj {
1698*38fd1498Szrj /* This can happen if GOMP_taskgroup_start is called when
1699*38fd1498Szrj thr->ts.team == NULL, but inside of the taskgroup there
1700*38fd1498Szrj is #pragma omp target nowait that creates an implicit
1701*38fd1498Szrj team with a single thread. In this case, we want to wait
1702*38fd1498Szrj for all outstanding tasks in this team. */
1703*38fd1498Szrj gomp_team_barrier_wait (&team->barrier);
1704*38fd1498Szrj return;
1705*38fd1498Szrj }
1706*38fd1498Szrj
1707*38fd1498Szrj /* The acquire barrier on load of taskgroup->num_children here
1708*38fd1498Szrj synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1709*38fd1498Szrj It is not necessary that we synchronize with other non-0 writes at
1710*38fd1498Szrj this point, but we must ensure that all writes to memory by a
1711*38fd1498Szrj child thread task work function are seen before we exit from
1712*38fd1498Szrj GOMP_taskgroup_end. */
1713*38fd1498Szrj if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1714*38fd1498Szrj goto finish;
1715*38fd1498Szrj
1716*38fd1498Szrj bool unused;
1717*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1718*38fd1498Szrj while (1)
1719*38fd1498Szrj {
1720*38fd1498Szrj bool cancelled = false;
1721*38fd1498Szrj if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1722*38fd1498Szrj MEMMODEL_RELAXED))
1723*38fd1498Szrj {
1724*38fd1498Szrj if (taskgroup->num_children)
1725*38fd1498Szrj {
1726*38fd1498Szrj if (priority_queue_empty_p (&task->children_queue,
1727*38fd1498Szrj MEMMODEL_RELAXED))
1728*38fd1498Szrj goto do_wait;
1729*38fd1498Szrj child_task
1730*38fd1498Szrj = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1731*38fd1498Szrj PQ_TEAM, &team->task_queue,
1732*38fd1498Szrj &unused);
1733*38fd1498Szrj }
1734*38fd1498Szrj else
1735*38fd1498Szrj {
1736*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1737*38fd1498Szrj if (to_free)
1738*38fd1498Szrj {
1739*38fd1498Szrj gomp_finish_task (to_free);
1740*38fd1498Szrj free (to_free);
1741*38fd1498Szrj }
1742*38fd1498Szrj goto finish;
1743*38fd1498Szrj }
1744*38fd1498Szrj }
1745*38fd1498Szrj else
1746*38fd1498Szrj child_task
1747*38fd1498Szrj = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1748*38fd1498Szrj PQ_TEAM, &team->task_queue, &unused);
1749*38fd1498Szrj if (child_task->kind == GOMP_TASK_WAITING)
1750*38fd1498Szrj {
1751*38fd1498Szrj cancelled
1752*38fd1498Szrj = gomp_task_run_pre (child_task, child_task->parent, team);
1753*38fd1498Szrj if (__builtin_expect (cancelled, 0))
1754*38fd1498Szrj {
1755*38fd1498Szrj if (to_free)
1756*38fd1498Szrj {
1757*38fd1498Szrj gomp_finish_task (to_free);
1758*38fd1498Szrj free (to_free);
1759*38fd1498Szrj to_free = NULL;
1760*38fd1498Szrj }
1761*38fd1498Szrj goto finish_cancelled;
1762*38fd1498Szrj }
1763*38fd1498Szrj }
1764*38fd1498Szrj else
1765*38fd1498Szrj {
1766*38fd1498Szrj child_task = NULL;
1767*38fd1498Szrj do_wait:
1768*38fd1498Szrj /* All tasks we are waiting for are either running in other
1769*38fd1498Szrj threads, or they are tasks that have not had their
1770*38fd1498Szrj dependencies met (so they're not even in the queue). Wait
1771*38fd1498Szrj for them. */
1772*38fd1498Szrj taskgroup->in_taskgroup_wait = true;
1773*38fd1498Szrj }
1774*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
1775*38fd1498Szrj if (do_wake)
1776*38fd1498Szrj {
1777*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, do_wake);
1778*38fd1498Szrj do_wake = 0;
1779*38fd1498Szrj }
1780*38fd1498Szrj if (to_free)
1781*38fd1498Szrj {
1782*38fd1498Szrj gomp_finish_task (to_free);
1783*38fd1498Szrj free (to_free);
1784*38fd1498Szrj to_free = NULL;
1785*38fd1498Szrj }
1786*38fd1498Szrj if (child_task)
1787*38fd1498Szrj {
1788*38fd1498Szrj thr->task = child_task;
1789*38fd1498Szrj if (__builtin_expect (child_task->fn == NULL, 0))
1790*38fd1498Szrj {
1791*38fd1498Szrj if (gomp_target_task_fn (child_task->fn_data))
1792*38fd1498Szrj {
1793*38fd1498Szrj thr->task = task;
1794*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1795*38fd1498Szrj child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1796*38fd1498Szrj struct gomp_target_task *ttask
1797*38fd1498Szrj = (struct gomp_target_task *) child_task->fn_data;
1798*38fd1498Szrj /* If GOMP_PLUGIN_target_task_completion has run already
1799*38fd1498Szrj in between gomp_target_task_fn and the mutex lock,
1800*38fd1498Szrj perform the requeuing here. */
1801*38fd1498Szrj if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1802*38fd1498Szrj gomp_target_task_completion (team, child_task);
1803*38fd1498Szrj else
1804*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_RUNNING;
1805*38fd1498Szrj child_task = NULL;
1806*38fd1498Szrj continue;
1807*38fd1498Szrj }
1808*38fd1498Szrj }
1809*38fd1498Szrj else
1810*38fd1498Szrj child_task->fn (child_task->fn_data);
1811*38fd1498Szrj thr->task = task;
1812*38fd1498Szrj }
1813*38fd1498Szrj else
1814*38fd1498Szrj gomp_sem_wait (&taskgroup->taskgroup_sem);
1815*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
1816*38fd1498Szrj if (child_task)
1817*38fd1498Szrj {
1818*38fd1498Szrj finish_cancelled:;
1819*38fd1498Szrj size_t new_tasks
1820*38fd1498Szrj = gomp_task_run_post_handle_depend (child_task, team);
1821*38fd1498Szrj gomp_task_run_post_remove_parent (child_task);
1822*38fd1498Szrj gomp_clear_parent (&child_task->children_queue);
1823*38fd1498Szrj gomp_task_run_post_remove_taskgroup (child_task);
1824*38fd1498Szrj to_free = child_task;
1825*38fd1498Szrj child_task = NULL;
1826*38fd1498Szrj team->task_count--;
1827*38fd1498Szrj if (new_tasks > 1)
1828*38fd1498Szrj {
1829*38fd1498Szrj do_wake = team->nthreads - team->task_running_count
1830*38fd1498Szrj - !task->in_tied_task;
1831*38fd1498Szrj if (do_wake > new_tasks)
1832*38fd1498Szrj do_wake = new_tasks;
1833*38fd1498Szrj }
1834*38fd1498Szrj }
1835*38fd1498Szrj }
1836*38fd1498Szrj
1837*38fd1498Szrj finish:
1838*38fd1498Szrj task->taskgroup = taskgroup->prev;
1839*38fd1498Szrj gomp_sem_destroy (&taskgroup->taskgroup_sem);
1840*38fd1498Szrj free (taskgroup);
1841*38fd1498Szrj }
1842*38fd1498Szrj
1843*38fd1498Szrj int
omp_in_final(void)1844*38fd1498Szrj omp_in_final (void)
1845*38fd1498Szrj {
1846*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1847*38fd1498Szrj return thr->task && thr->task->final_task;
1848*38fd1498Szrj }
1849*38fd1498Szrj
1850*38fd1498Szrj ialias (omp_in_final)
1851