1 /* Copyright (C) 2007-2020 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include "gomp-constants.h"
33
34 typedef struct gomp_task_depend_entry *hash_entry_type;
35
36 static inline void *
htab_alloc(size_t size)37 htab_alloc (size_t size)
38 {
39 return gomp_malloc (size);
40 }
41
42 static inline void
htab_free(void * ptr)43 htab_free (void *ptr)
44 {
45 free (ptr);
46 }
47
48 #include "hashtab.h"
49
50 static inline hashval_t
htab_hash(hash_entry_type element)51 htab_hash (hash_entry_type element)
52 {
53 return hash_pointer (element->addr);
54 }
55
56 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)57 htab_eq (hash_entry_type x, hash_entry_type y)
58 {
59 return x->addr == y->addr;
60 }
61
62 /* Create a new task data structure. */
63
64 void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)65 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66 struct gomp_task_icv *prev_icv)
67 {
68 /* It would seem that using memset here would be a win, but it turns
69 out that partially filling gomp_task allows us to keep the
70 overhead of task creation low. In the nqueens-1.c test, for a
71 sufficiently large N, we drop the overhead from 5-6% to 1%.
72
73 Note, the nqueens-1.c test in serial mode is a good test to
74 benchmark the overhead of creating tasks as there are millions of
75 tiny tasks created that all run undeferred. */
76 task->parent = parent_task;
77 task->icv = *prev_icv;
78 task->kind = GOMP_TASK_IMPLICIT;
79 task->taskwait = NULL;
80 task->in_tied_task = false;
81 task->final_task = false;
82 task->copy_ctors_done = false;
83 task->parent_depends_on = false;
84 priority_queue_init (&task->children_queue);
85 task->taskgroup = NULL;
86 task->dependers = NULL;
87 task->depend_hash = NULL;
88 task->depend_count = 0;
89 }
90
91 /* Clean up a task, after completing it. */
92
93 void
gomp_end_task(void)94 gomp_end_task (void)
95 {
96 struct gomp_thread *thr = gomp_thread ();
97 struct gomp_task *task = thr->task;
98
99 gomp_finish_task (task);
100 thr->task = task->parent;
101 }
102
103 /* Clear the parent field of every task in LIST. */
104
105 static inline void
gomp_clear_parent_in_list(struct priority_list * list)106 gomp_clear_parent_in_list (struct priority_list *list)
107 {
108 struct priority_node *p = list->tasks;
109 if (p)
110 do
111 {
112 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113 p = p->next;
114 }
115 while (p != list->tasks);
116 }
117
118 /* Splay tree version of gomp_clear_parent_in_list.
119
120 Clear the parent field of every task in NODE within SP, and free
121 the node when done. */
122
123 static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)124 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125 {
126 if (!node)
127 return;
128 prio_splay_tree_node left = node->left, right = node->right;
129 gomp_clear_parent_in_list (&node->key.l);
130 #if _LIBGOMP_CHECKING_
131 memset (node, 0xaf, sizeof (*node));
132 #endif
133 /* No need to remove the node from the tree. We're nuking
134 everything, so just free the nodes and our caller can clear the
135 entire splay tree. */
136 free (node);
137 gomp_clear_parent_in_tree (sp, left);
138 gomp_clear_parent_in_tree (sp, right);
139 }
140
141 /* Clear the parent field of every task in Q and remove every task
142 from Q. */
143
144 static inline void
gomp_clear_parent(struct priority_queue * q)145 gomp_clear_parent (struct priority_queue *q)
146 {
147 if (priority_queue_multi_p (q))
148 {
149 gomp_clear_parent_in_tree (&q->t, q->t.root);
150 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151 No need to remove anything. We can just nuke everything. */
152 q->t.root = NULL;
153 }
154 else
155 gomp_clear_parent_in_list (&q->l);
156 }
157
158 /* Helper function for GOMP_task and gomp_create_target_task.
159
160 For a TASK with in/out dependencies, fill in the various dependency
161 queues. PARENT is the parent of said task. DEPEND is as in
162 GOMP_task. */
163
164 static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)165 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166 void **depend)
167 {
168 size_t ndepend = (uintptr_t) depend[0];
169 size_t i;
170 hash_entry_type ent;
171
172 if (ndepend)
173 {
174 /* depend[0] is total # */
175 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
176 /* ndepend - nout is # of in: */
177 for (i = 0; i < ndepend; i++)
178 {
179 task->depend[i].addr = depend[2 + i];
180 task->depend[i].is_in = i >= nout;
181 }
182 }
183 else
184 {
185 ndepend = (uintptr_t) depend[1]; /* total # */
186 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
187 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
188 /* For now we treat mutexinoutset like out, which is compliant, but
189 inefficient. */
190 size_t nin = (uintptr_t) depend[4]; /* # of in: */
191 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
192 size_t normal = nout + nmutexinoutset + nin;
193 size_t n = 0;
194 for (i = normal; i < ndepend; i++)
195 {
196 void **d = (void **) (uintptr_t) depend[5 + i];
197 switch ((uintptr_t) d[1])
198 {
199 case GOMP_DEPEND_OUT:
200 case GOMP_DEPEND_INOUT:
201 case GOMP_DEPEND_MUTEXINOUTSET:
202 break;
203 case GOMP_DEPEND_IN:
204 continue;
205 default:
206 gomp_fatal ("unknown omp_depend_t dependence type %d",
207 (int) (uintptr_t) d[1]);
208 }
209 task->depend[n].addr = d[0];
210 task->depend[n++].is_in = 0;
211 }
212 for (i = 0; i < normal; i++)
213 {
214 task->depend[n].addr = depend[5 + i];
215 task->depend[n++].is_in = i >= nout + nmutexinoutset;
216 }
217 for (i = normal; i < ndepend; i++)
218 {
219 void **d = (void **) (uintptr_t) depend[5 + i];
220 if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
221 continue;
222 task->depend[n].addr = d[0];
223 task->depend[n++].is_in = 1;
224 }
225 }
226 task->depend_count = ndepend;
227 task->num_dependees = 0;
228 if (parent->depend_hash == NULL)
229 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
230 for (i = 0; i < ndepend; i++)
231 {
232 task->depend[i].next = NULL;
233 task->depend[i].prev = NULL;
234 task->depend[i].task = task;
235 task->depend[i].redundant = false;
236 task->depend[i].redundant_out = false;
237
238 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
239 &task->depend[i], INSERT);
240 hash_entry_type out = NULL, last = NULL;
241 if (*slot)
242 {
243 /* If multiple depends on the same task are the same, all but the
244 first one are redundant. As inout/out come first, if any of them
245 is inout/out, it will win, which is the right semantics. */
246 if ((*slot)->task == task)
247 {
248 task->depend[i].redundant = true;
249 continue;
250 }
251 for (ent = *slot; ent; ent = ent->next)
252 {
253 if (ent->redundant_out)
254 break;
255
256 last = ent;
257
258 /* depend(in:...) doesn't depend on earlier depend(in:...). */
259 if (task->depend[i].is_in && ent->is_in)
260 continue;
261
262 if (!ent->is_in)
263 out = ent;
264
265 struct gomp_task *tsk = ent->task;
266 if (tsk->dependers == NULL)
267 {
268 tsk->dependers
269 = gomp_malloc (sizeof (struct gomp_dependers_vec)
270 + 6 * sizeof (struct gomp_task *));
271 tsk->dependers->n_elem = 1;
272 tsk->dependers->allocated = 6;
273 tsk->dependers->elem[0] = task;
274 task->num_dependees++;
275 continue;
276 }
277 /* We already have some other dependency on tsk from earlier
278 depend clause. */
279 else if (tsk->dependers->n_elem
280 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
281 == task))
282 continue;
283 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
284 {
285 tsk->dependers->allocated
286 = tsk->dependers->allocated * 2 + 2;
287 tsk->dependers
288 = gomp_realloc (tsk->dependers,
289 sizeof (struct gomp_dependers_vec)
290 + (tsk->dependers->allocated
291 * sizeof (struct gomp_task *)));
292 }
293 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
294 task->num_dependees++;
295 }
296 task->depend[i].next = *slot;
297 (*slot)->prev = &task->depend[i];
298 }
299 *slot = &task->depend[i];
300
301 /* There is no need to store more than one depend({,in}out:) task per
302 address in the hash table chain for the purpose of creation of
303 deferred tasks, because each out depends on all earlier outs, thus it
304 is enough to record just the last depend({,in}out:). For depend(in:),
305 we need to keep all of the previous ones not terminated yet, because
306 a later depend({,in}out:) might need to depend on all of them. So, if
307 the new task's clause is depend({,in}out:), we know there is at most
308 one other depend({,in}out:) clause in the list (out). For
309 non-deferred tasks we want to see all outs, so they are moved to the
310 end of the chain, after first redundant_out entry all following
311 entries should be redundant_out. */
312 if (!task->depend[i].is_in && out)
313 {
314 if (out != last)
315 {
316 out->next->prev = out->prev;
317 out->prev->next = out->next;
318 out->next = last->next;
319 out->prev = last;
320 last->next = out;
321 if (out->next)
322 out->next->prev = out;
323 }
324 out->redundant_out = true;
325 }
326 }
327 }
328
329 /* Called when encountering an explicit task directive. If IF_CLAUSE is
330 false, then we must not delay in executing the task. If UNTIED is true,
331 then the task may be executed by any member of the team.
332
333 DEPEND is an array containing:
334 if depend[0] is non-zero, then:
335 depend[0]: number of depend elements.
336 depend[1]: number of depend elements of type "out/inout".
337 depend[2..N+1]: address of [1..N]th depend element.
338 otherwise, when depend[0] is zero, then:
339 depend[1]: number of depend elements.
340 depend[2]: number of depend elements of type "out/inout".
341 depend[3]: number of depend elements of type "mutexinoutset".
342 depend[4]: number of depend elements of type "in".
343 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
344 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
345 omp_depend_t objects. */
346
347 void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority)348 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
349 long arg_size, long arg_align, bool if_clause, unsigned flags,
350 void **depend, int priority)
351 {
352 struct gomp_thread *thr = gomp_thread ();
353 struct gomp_team *team = thr->ts.team;
354
355 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
356 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
357 tied to one thread all the time. This means UNTIED tasks must be
358 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
359 might be running on different thread than FN. */
360 if (cpyfn)
361 if_clause = false;
362 flags &= ~GOMP_TASK_FLAG_UNTIED;
363 #endif
364
365 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
366 if (__builtin_expect (gomp_cancel_var, 0) && team)
367 {
368 if (gomp_team_barrier_cancelled (&team->barrier))
369 return;
370 if (thr->task->taskgroup)
371 {
372 if (thr->task->taskgroup->cancelled)
373 return;
374 if (thr->task->taskgroup->workshare
375 && thr->task->taskgroup->prev
376 && thr->task->taskgroup->prev->cancelled)
377 return;
378 }
379 }
380
381 if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
382 priority = 0;
383 else if (priority > gomp_max_task_priority_var)
384 priority = gomp_max_task_priority_var;
385
386 if (!if_clause || team == NULL
387 || (thr->task && thr->task->final_task)
388 || team->task_count > 64 * team->nthreads)
389 {
390 struct gomp_task task;
391
392 /* If there are depend clauses and earlier deferred sibling tasks
393 with depend clauses, check if there isn't a dependency. If there
394 is, we need to wait for them. There is no need to handle
395 depend clauses for non-deferred tasks other than this, because
396 the parent task is suspended until the child task finishes and thus
397 it can't start further child tasks. */
398 if ((flags & GOMP_TASK_FLAG_DEPEND)
399 && thr->task && thr->task->depend_hash)
400 gomp_task_maybe_wait_for_dependencies (depend);
401
402 gomp_init_task (&task, thr->task, gomp_icv (false));
403 task.kind = GOMP_TASK_UNDEFERRED;
404 task.final_task = (thr->task && thr->task->final_task)
405 || (flags & GOMP_TASK_FLAG_FINAL);
406 task.priority = priority;
407 if (thr->task)
408 {
409 task.in_tied_task = thr->task->in_tied_task;
410 task.taskgroup = thr->task->taskgroup;
411 }
412 thr->task = &task;
413 if (__builtin_expect (cpyfn != NULL, 0))
414 {
415 char buf[arg_size + arg_align - 1];
416 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
417 & ~(uintptr_t) (arg_align - 1));
418 cpyfn (arg, data);
419 fn (arg);
420 }
421 else
422 fn (data);
423 /* Access to "children" is normally done inside a task_lock
424 mutex region, but the only way this particular task.children
425 can be set is if this thread's task work function (fn)
426 creates children. So since the setter is *this* thread, we
427 need no barriers here when testing for non-NULL. We can have
428 task.children set by the current thread then changed by a
429 child thread, but seeing a stale non-NULL value is not a
430 problem. Once past the task_lock acquisition, this thread
431 will see the real value of task.children. */
432 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
433 {
434 gomp_mutex_lock (&team->task_lock);
435 gomp_clear_parent (&task.children_queue);
436 gomp_mutex_unlock (&team->task_lock);
437 }
438 gomp_end_task ();
439 }
440 else
441 {
442 struct gomp_task *task;
443 struct gomp_task *parent = thr->task;
444 struct gomp_taskgroup *taskgroup = parent->taskgroup;
445 char *arg;
446 bool do_wake;
447 size_t depend_size = 0;
448
449 if (flags & GOMP_TASK_FLAG_DEPEND)
450 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
451 * sizeof (struct gomp_task_depend_entry));
452 task = gomp_malloc (sizeof (*task) + depend_size
453 + arg_size + arg_align - 1);
454 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
455 & ~(uintptr_t) (arg_align - 1));
456 gomp_init_task (task, parent, gomp_icv (false));
457 task->priority = priority;
458 task->kind = GOMP_TASK_UNDEFERRED;
459 task->in_tied_task = parent->in_tied_task;
460 task->taskgroup = taskgroup;
461 thr->task = task;
462 if (cpyfn)
463 {
464 cpyfn (arg, data);
465 task->copy_ctors_done = true;
466 }
467 else
468 memcpy (arg, data, arg_size);
469 thr->task = parent;
470 task->kind = GOMP_TASK_WAITING;
471 task->fn = fn;
472 task->fn_data = arg;
473 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
474 gomp_mutex_lock (&team->task_lock);
475 /* If parallel or taskgroup has been cancelled, don't start new
476 tasks. */
477 if (__builtin_expect (gomp_cancel_var, 0)
478 && !task->copy_ctors_done)
479 {
480 if (gomp_team_barrier_cancelled (&team->barrier))
481 {
482 do_cancel:
483 gomp_mutex_unlock (&team->task_lock);
484 gomp_finish_task (task);
485 free (task);
486 return;
487 }
488 if (taskgroup)
489 {
490 if (taskgroup->cancelled)
491 goto do_cancel;
492 if (taskgroup->workshare
493 && taskgroup->prev
494 && taskgroup->prev->cancelled)
495 goto do_cancel;
496 }
497 }
498 if (taskgroup)
499 taskgroup->num_children++;
500 if (depend_size)
501 {
502 gomp_task_handle_depend (task, parent, depend);
503 if (task->num_dependees)
504 {
505 /* Tasks that depend on other tasks are not put into the
506 various waiting queues, so we are done for now. Said
507 tasks are instead put into the queues via
508 gomp_task_run_post_handle_dependers() after their
509 dependencies have been satisfied. After which, they
510 can be picked up by the various scheduling
511 points. */
512 gomp_mutex_unlock (&team->task_lock);
513 return;
514 }
515 }
516
517 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
518 task, priority,
519 PRIORITY_INSERT_BEGIN,
520 /*adjust_parent_depends_on=*/false,
521 task->parent_depends_on);
522 if (taskgroup)
523 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
524 task, priority,
525 PRIORITY_INSERT_BEGIN,
526 /*adjust_parent_depends_on=*/false,
527 task->parent_depends_on);
528
529 priority_queue_insert (PQ_TEAM, &team->task_queue,
530 task, priority,
531 PRIORITY_INSERT_END,
532 /*adjust_parent_depends_on=*/false,
533 task->parent_depends_on);
534
535 ++team->task_count;
536 ++team->task_queued_count;
537 gomp_team_barrier_set_task_pending (&team->barrier);
538 do_wake = team->task_running_count + !parent->in_tied_task
539 < team->nthreads;
540 gomp_mutex_unlock (&team->task_lock);
541 if (do_wake)
542 gomp_team_barrier_wake (&team->barrier, 1);
543 }
544 }
545
546 ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)547 ialias (GOMP_taskgroup_end)
548 ialias (GOMP_taskgroup_reduction_register)
549
550 #define TYPE long
551 #define UTYPE unsigned long
552 #define TYPE_is_long 1
553 #include "taskloop.c"
554 #undef TYPE
555 #undef UTYPE
556 #undef TYPE_is_long
557
558 #define TYPE unsigned long long
559 #define UTYPE TYPE
560 #define GOMP_taskloop GOMP_taskloop_ull
561 #include "taskloop.c"
562 #undef TYPE
563 #undef UTYPE
564 #undef GOMP_taskloop
565
566 static void inline
567 priority_queue_move_task_first (enum priority_queue_type type,
568 struct priority_queue *head,
569 struct gomp_task *task)
570 {
571 #if _LIBGOMP_CHECKING_
572 if (!priority_queue_task_in_queue_p (type, head, task))
573 gomp_fatal ("Attempt to move first missing task %p", task);
574 #endif
575 struct priority_list *list;
576 if (priority_queue_multi_p (head))
577 {
578 list = priority_queue_lookup_priority (head, task->priority);
579 #if _LIBGOMP_CHECKING_
580 if (!list)
581 gomp_fatal ("Unable to find priority %d", task->priority);
582 #endif
583 }
584 else
585 list = &head->l;
586 priority_list_remove (list, task_to_priority_node (type, task), 0);
587 priority_list_insert (type, list, task, task->priority,
588 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
589 task->parent_depends_on);
590 }
591
592 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
593 with team->task_lock held, or is executed in the thread that called
594 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
595 run before it acquires team->task_lock. */
596
597 static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)598 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
599 {
600 struct gomp_task *parent = task->parent;
601 if (parent)
602 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
603 task);
604
605 struct gomp_taskgroup *taskgroup = task->taskgroup;
606 if (taskgroup)
607 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
608 task);
609
610 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
611 PRIORITY_INSERT_BEGIN, false,
612 task->parent_depends_on);
613 task->kind = GOMP_TASK_WAITING;
614 if (parent && parent->taskwait)
615 {
616 if (parent->taskwait->in_taskwait)
617 {
618 /* One more task has had its dependencies met.
619 Inform any waiters. */
620 parent->taskwait->in_taskwait = false;
621 gomp_sem_post (&parent->taskwait->taskwait_sem);
622 }
623 else if (parent->taskwait->in_depend_wait)
624 {
625 /* One more task has had its dependencies met.
626 Inform any waiters. */
627 parent->taskwait->in_depend_wait = false;
628 gomp_sem_post (&parent->taskwait->taskwait_sem);
629 }
630 }
631 if (taskgroup && taskgroup->in_taskgroup_wait)
632 {
633 /* One more task has had its dependencies met.
634 Inform any waiters. */
635 taskgroup->in_taskgroup_wait = false;
636 gomp_sem_post (&taskgroup->taskgroup_sem);
637 }
638
639 ++team->task_queued_count;
640 gomp_team_barrier_set_task_pending (&team->barrier);
641 /* I'm afraid this can't be done after releasing team->task_lock,
642 as gomp_target_task_completion is run from unrelated thread and
643 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
644 the team could be gone already. */
645 if (team->nthreads > team->task_running_count)
646 gomp_team_barrier_wake (&team->barrier, 1);
647 }
648
649 /* Signal that a target task TTASK has completed the asynchronously
650 running phase and should be requeued as a task to handle the
651 variable unmapping. */
652
653 void
GOMP_PLUGIN_target_task_completion(void * data)654 GOMP_PLUGIN_target_task_completion (void *data)
655 {
656 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
657 struct gomp_task *task = ttask->task;
658 struct gomp_team *team = ttask->team;
659
660 gomp_mutex_lock (&team->task_lock);
661 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
662 {
663 ttask->state = GOMP_TARGET_TASK_FINISHED;
664 gomp_mutex_unlock (&team->task_lock);
665 return;
666 }
667 ttask->state = GOMP_TARGET_TASK_FINISHED;
668 gomp_target_task_completion (team, task);
669 gomp_mutex_unlock (&team->task_lock);
670 }
671
672 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
673
674 /* Called for nowait target tasks. */
675
676 bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)677 gomp_create_target_task (struct gomp_device_descr *devicep,
678 void (*fn) (void *), size_t mapnum, void **hostaddrs,
679 size_t *sizes, unsigned short *kinds,
680 unsigned int flags, void **depend, void **args,
681 enum gomp_target_task_state state)
682 {
683 struct gomp_thread *thr = gomp_thread ();
684 struct gomp_team *team = thr->ts.team;
685
686 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
687 if (__builtin_expect (gomp_cancel_var, 0) && team)
688 {
689 if (gomp_team_barrier_cancelled (&team->barrier))
690 return true;
691 if (thr->task->taskgroup)
692 {
693 if (thr->task->taskgroup->cancelled)
694 return true;
695 if (thr->task->taskgroup->workshare
696 && thr->task->taskgroup->prev
697 && thr->task->taskgroup->prev->cancelled)
698 return true;
699 }
700 }
701
702 struct gomp_target_task *ttask;
703 struct gomp_task *task;
704 struct gomp_task *parent = thr->task;
705 struct gomp_taskgroup *taskgroup = parent->taskgroup;
706 bool do_wake;
707 size_t depend_size = 0;
708 uintptr_t depend_cnt = 0;
709 size_t tgt_align = 0, tgt_size = 0;
710
711 if (depend != NULL)
712 {
713 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
714 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
715 }
716 if (fn)
717 {
718 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
719 firstprivate on the target task. */
720 size_t i;
721 for (i = 0; i < mapnum; i++)
722 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
723 {
724 size_t align = (size_t) 1 << (kinds[i] >> 8);
725 if (tgt_align < align)
726 tgt_align = align;
727 tgt_size = (tgt_size + align - 1) & ~(align - 1);
728 tgt_size += sizes[i];
729 }
730 if (tgt_align)
731 tgt_size += tgt_align - 1;
732 else
733 tgt_size = 0;
734 }
735
736 task = gomp_malloc (sizeof (*task) + depend_size
737 + sizeof (*ttask)
738 + mapnum * (sizeof (void *) + sizeof (size_t)
739 + sizeof (unsigned short))
740 + tgt_size);
741 gomp_init_task (task, parent, gomp_icv (false));
742 task->priority = 0;
743 task->kind = GOMP_TASK_WAITING;
744 task->in_tied_task = parent->in_tied_task;
745 task->taskgroup = taskgroup;
746 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
747 ttask->devicep = devicep;
748 ttask->fn = fn;
749 ttask->mapnum = mapnum;
750 ttask->args = args;
751 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
752 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
753 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
754 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
755 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
756 if (tgt_align)
757 {
758 char *tgt = (char *) &ttask->kinds[mapnum];
759 size_t i;
760 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
761 if (al)
762 tgt += tgt_align - al;
763 tgt_size = 0;
764 for (i = 0; i < mapnum; i++)
765 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
766 {
767 size_t align = (size_t) 1 << (kinds[i] >> 8);
768 tgt_size = (tgt_size + align - 1) & ~(align - 1);
769 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
770 ttask->hostaddrs[i] = tgt + tgt_size;
771 tgt_size = tgt_size + sizes[i];
772 }
773 }
774 ttask->flags = flags;
775 ttask->state = state;
776 ttask->task = task;
777 ttask->team = team;
778 task->fn = NULL;
779 task->fn_data = ttask;
780 task->final_task = 0;
781 gomp_mutex_lock (&team->task_lock);
782 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
783 if (__builtin_expect (gomp_cancel_var, 0))
784 {
785 if (gomp_team_barrier_cancelled (&team->barrier))
786 {
787 do_cancel:
788 gomp_mutex_unlock (&team->task_lock);
789 gomp_finish_task (task);
790 free (task);
791 return true;
792 }
793 if (taskgroup)
794 {
795 if (taskgroup->cancelled)
796 goto do_cancel;
797 if (taskgroup->workshare
798 && taskgroup->prev
799 && taskgroup->prev->cancelled)
800 goto do_cancel;
801 }
802 }
803 if (depend_size)
804 {
805 gomp_task_handle_depend (task, parent, depend);
806 if (task->num_dependees)
807 {
808 if (taskgroup)
809 taskgroup->num_children++;
810 gomp_mutex_unlock (&team->task_lock);
811 return true;
812 }
813 }
814 if (state == GOMP_TARGET_TASK_DATA)
815 {
816 gomp_task_run_post_handle_depend_hash (task);
817 gomp_mutex_unlock (&team->task_lock);
818 gomp_finish_task (task);
819 free (task);
820 return false;
821 }
822 if (taskgroup)
823 taskgroup->num_children++;
824 /* For async offloading, if we don't need to wait for dependencies,
825 run the gomp_target_task_fn right away, essentially schedule the
826 mapping part of the task in the current thread. */
827 if (devicep != NULL
828 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
829 {
830 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
831 PRIORITY_INSERT_END,
832 /*adjust_parent_depends_on=*/false,
833 task->parent_depends_on);
834 if (taskgroup)
835 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
836 task, 0, PRIORITY_INSERT_END,
837 /*adjust_parent_depends_on=*/false,
838 task->parent_depends_on);
839 task->pnode[PQ_TEAM].next = NULL;
840 task->pnode[PQ_TEAM].prev = NULL;
841 task->kind = GOMP_TASK_TIED;
842 ++team->task_count;
843 gomp_mutex_unlock (&team->task_lock);
844
845 thr->task = task;
846 gomp_target_task_fn (task->fn_data);
847 thr->task = parent;
848
849 gomp_mutex_lock (&team->task_lock);
850 task->kind = GOMP_TASK_ASYNC_RUNNING;
851 /* If GOMP_PLUGIN_target_task_completion has run already
852 in between gomp_target_task_fn and the mutex lock,
853 perform the requeuing here. */
854 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
855 gomp_target_task_completion (team, task);
856 else
857 ttask->state = GOMP_TARGET_TASK_RUNNING;
858 gomp_mutex_unlock (&team->task_lock);
859 return true;
860 }
861 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
862 PRIORITY_INSERT_BEGIN,
863 /*adjust_parent_depends_on=*/false,
864 task->parent_depends_on);
865 if (taskgroup)
866 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
867 PRIORITY_INSERT_BEGIN,
868 /*adjust_parent_depends_on=*/false,
869 task->parent_depends_on);
870 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
871 PRIORITY_INSERT_END,
872 /*adjust_parent_depends_on=*/false,
873 task->parent_depends_on);
874 ++team->task_count;
875 ++team->task_queued_count;
876 gomp_team_barrier_set_task_pending (&team->barrier);
877 do_wake = team->task_running_count + !parent->in_tied_task
878 < team->nthreads;
879 gomp_mutex_unlock (&team->task_lock);
880 if (do_wake)
881 gomp_team_barrier_wake (&team->barrier, 1);
882 return true;
883 }
884
885 /* Given a parent_depends_on task in LIST, move it to the front of its
886 priority so it is run as soon as possible.
887
888 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
889
890 We rearrange the queue such that all parent_depends_on tasks are
891 first, and last_parent_depends_on points to the last such task we
892 rearranged. For example, given the following tasks in a queue
893 where PD[123] are the parent_depends_on tasks:
894
895 task->children
896 |
897 V
898 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
899
900 We rearrange such that:
901
902 task->children
903 | +--- last_parent_depends_on
904 | |
905 V V
906 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
907
908 static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)909 priority_list_upgrade_task (struct priority_list *list,
910 struct priority_node *node)
911 {
912 struct priority_node *last_parent_depends_on
913 = list->last_parent_depends_on;
914 if (last_parent_depends_on)
915 {
916 node->prev->next = node->next;
917 node->next->prev = node->prev;
918 node->prev = last_parent_depends_on;
919 node->next = last_parent_depends_on->next;
920 node->prev->next = node;
921 node->next->prev = node;
922 }
923 else if (node != list->tasks)
924 {
925 node->prev->next = node->next;
926 node->next->prev = node->prev;
927 node->prev = list->tasks->prev;
928 node->next = list->tasks;
929 list->tasks = node;
930 node->prev->next = node;
931 node->next->prev = node;
932 }
933 list->last_parent_depends_on = node;
934 }
935
936 /* Given a parent_depends_on TASK in its parent's children_queue, move
937 it to the front of its priority so it is run as soon as possible.
938
939 PARENT is passed as an optimization.
940
941 (This function could be defined in priority_queue.c, but we want it
942 inlined, and putting it in priority_queue.h is not an option, given
943 that gomp_task has not been properly defined at that point). */
944
945 static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)946 priority_queue_upgrade_task (struct gomp_task *task,
947 struct gomp_task *parent)
948 {
949 struct priority_queue *head = &parent->children_queue;
950 struct priority_node *node = &task->pnode[PQ_CHILDREN];
951 #if _LIBGOMP_CHECKING_
952 if (!task->parent_depends_on)
953 gomp_fatal ("priority_queue_upgrade_task: task must be a "
954 "parent_depends_on task");
955 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
956 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
957 #endif
958 if (priority_queue_multi_p (head))
959 {
960 struct priority_list *list
961 = priority_queue_lookup_priority (head, task->priority);
962 priority_list_upgrade_task (list, node);
963 }
964 else
965 priority_list_upgrade_task (&head->l, node);
966 }
967
968 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
969 the way in LIST so that other tasks can be considered for
970 execution. LIST contains tasks of type TYPE.
971
972 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
973 if applicable. */
974
975 static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)976 priority_list_downgrade_task (enum priority_queue_type type,
977 struct priority_list *list,
978 struct gomp_task *child_task)
979 {
980 struct priority_node *node = task_to_priority_node (type, child_task);
981 if (list->tasks == node)
982 list->tasks = node->next;
983 else if (node->next != list->tasks)
984 {
985 /* The task in NODE is about to become TIED and TIED tasks
986 cannot come before WAITING tasks. If we're about to
987 leave the queue in such an indeterminate state, rewire
988 things appropriately. However, a TIED task at the end is
989 perfectly fine. */
990 struct gomp_task *next_task = priority_node_to_task (type, node->next);
991 if (next_task->kind == GOMP_TASK_WAITING)
992 {
993 /* Remove from list. */
994 node->prev->next = node->next;
995 node->next->prev = node->prev;
996 /* Rewire at the end. */
997 node->next = list->tasks;
998 node->prev = list->tasks->prev;
999 list->tasks->prev->next = node;
1000 list->tasks->prev = node;
1001 }
1002 }
1003
1004 /* If the current task is the last_parent_depends_on for its
1005 priority, adjust last_parent_depends_on appropriately. */
1006 if (__builtin_expect (child_task->parent_depends_on, 0)
1007 && list->last_parent_depends_on == node)
1008 {
1009 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1010 if (node->prev != node
1011 && prev_child->kind == GOMP_TASK_WAITING
1012 && prev_child->parent_depends_on)
1013 list->last_parent_depends_on = node->prev;
1014 else
1015 {
1016 /* There are no more parent_depends_on entries waiting
1017 to run, clear the list. */
1018 list->last_parent_depends_on = NULL;
1019 }
1020 }
1021 }
1022
1023 /* Given a TASK in HEAD that is about to be executed, move it out of
1024 the way so that other tasks can be considered for execution. HEAD
1025 contains tasks of type TYPE.
1026
1027 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1028 if applicable.
1029
1030 (This function could be defined in priority_queue.c, but we want it
1031 inlined, and putting it in priority_queue.h is not an option, given
1032 that gomp_task has not been properly defined at that point). */
1033
1034 static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)1035 priority_queue_downgrade_task (enum priority_queue_type type,
1036 struct priority_queue *head,
1037 struct gomp_task *task)
1038 {
1039 #if _LIBGOMP_CHECKING_
1040 if (!priority_queue_task_in_queue_p (type, head, task))
1041 gomp_fatal ("Attempt to downgrade missing task %p", task);
1042 #endif
1043 if (priority_queue_multi_p (head))
1044 {
1045 struct priority_list *list
1046 = priority_queue_lookup_priority (head, task->priority);
1047 priority_list_downgrade_task (type, list, task);
1048 }
1049 else
1050 priority_list_downgrade_task (type, &head->l, task);
1051 }
1052
1053 /* Setup CHILD_TASK to execute. This is done by setting the task to
1054 TIED, and updating all relevant queues so that CHILD_TASK is no
1055 longer chosen for scheduling. Also, remove CHILD_TASK from the
1056 overall team task queue entirely.
1057
1058 Return TRUE if task or its containing taskgroup has been
1059 cancelled. */
1060
1061 static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)1062 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1063 struct gomp_team *team)
1064 {
1065 #if _LIBGOMP_CHECKING_
1066 if (child_task->parent)
1067 priority_queue_verify (PQ_CHILDREN,
1068 &child_task->parent->children_queue, true);
1069 if (child_task->taskgroup)
1070 priority_queue_verify (PQ_TASKGROUP,
1071 &child_task->taskgroup->taskgroup_queue, false);
1072 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1073 #endif
1074
1075 /* Task is about to go tied, move it out of the way. */
1076 if (parent)
1077 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1078 child_task);
1079
1080 /* Task is about to go tied, move it out of the way. */
1081 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1082 if (taskgroup)
1083 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1084 child_task);
1085
1086 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1087 MEMMODEL_RELAXED);
1088 child_task->pnode[PQ_TEAM].next = NULL;
1089 child_task->pnode[PQ_TEAM].prev = NULL;
1090 child_task->kind = GOMP_TASK_TIED;
1091
1092 if (--team->task_queued_count == 0)
1093 gomp_team_barrier_clear_task_pending (&team->barrier);
1094 if (__builtin_expect (gomp_cancel_var, 0)
1095 && !child_task->copy_ctors_done)
1096 {
1097 if (gomp_team_barrier_cancelled (&team->barrier))
1098 return true;
1099 if (taskgroup)
1100 {
1101 if (taskgroup->cancelled)
1102 return true;
1103 if (taskgroup->workshare
1104 && taskgroup->prev
1105 && taskgroup->prev->cancelled)
1106 return true;
1107 }
1108 }
1109 return false;
1110 }
1111
1112 static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)1113 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1114 {
1115 struct gomp_task *parent = child_task->parent;
1116 size_t i;
1117
1118 for (i = 0; i < child_task->depend_count; i++)
1119 if (!child_task->depend[i].redundant)
1120 {
1121 if (child_task->depend[i].next)
1122 child_task->depend[i].next->prev = child_task->depend[i].prev;
1123 if (child_task->depend[i].prev)
1124 child_task->depend[i].prev->next = child_task->depend[i].next;
1125 else
1126 {
1127 hash_entry_type *slot
1128 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1129 NO_INSERT);
1130 if (*slot != &child_task->depend[i])
1131 abort ();
1132 if (child_task->depend[i].next)
1133 *slot = child_task->depend[i].next;
1134 else
1135 htab_clear_slot (parent->depend_hash, slot);
1136 }
1137 }
1138 }
1139
1140 /* After a CHILD_TASK has been run, adjust the dependency queue for
1141 each task that depends on CHILD_TASK, to record the fact that there
1142 is one less dependency to worry about. If a task that depended on
1143 CHILD_TASK now has no dependencies, place it in the various queues
1144 so it gets scheduled to run.
1145
1146 TEAM is the team to which CHILD_TASK belongs to. */
1147
1148 static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1149 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1150 struct gomp_team *team)
1151 {
1152 struct gomp_task *parent = child_task->parent;
1153 size_t i, count = child_task->dependers->n_elem, ret = 0;
1154 for (i = 0; i < count; i++)
1155 {
1156 struct gomp_task *task = child_task->dependers->elem[i];
1157
1158 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1159 TASK's remaining dependencies. Once TASK has no other
1160 dependencies, put it into the various queues so it will get
1161 scheduled for execution. */
1162 if (--task->num_dependees != 0)
1163 continue;
1164
1165 struct gomp_taskgroup *taskgroup = task->taskgroup;
1166 if (parent)
1167 {
1168 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1169 task, task->priority,
1170 PRIORITY_INSERT_BEGIN,
1171 /*adjust_parent_depends_on=*/true,
1172 task->parent_depends_on);
1173 if (parent->taskwait)
1174 {
1175 if (parent->taskwait->in_taskwait)
1176 {
1177 /* One more task has had its dependencies met.
1178 Inform any waiters. */
1179 parent->taskwait->in_taskwait = false;
1180 gomp_sem_post (&parent->taskwait->taskwait_sem);
1181 }
1182 else if (parent->taskwait->in_depend_wait)
1183 {
1184 /* One more task has had its dependencies met.
1185 Inform any waiters. */
1186 parent->taskwait->in_depend_wait = false;
1187 gomp_sem_post (&parent->taskwait->taskwait_sem);
1188 }
1189 }
1190 }
1191 else
1192 task->parent = NULL;
1193 if (taskgroup)
1194 {
1195 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1196 task, task->priority,
1197 PRIORITY_INSERT_BEGIN,
1198 /*adjust_parent_depends_on=*/false,
1199 task->parent_depends_on);
1200 if (taskgroup->in_taskgroup_wait)
1201 {
1202 /* One more task has had its dependencies met.
1203 Inform any waiters. */
1204 taskgroup->in_taskgroup_wait = false;
1205 gomp_sem_post (&taskgroup->taskgroup_sem);
1206 }
1207 }
1208 priority_queue_insert (PQ_TEAM, &team->task_queue,
1209 task, task->priority,
1210 PRIORITY_INSERT_END,
1211 /*adjust_parent_depends_on=*/false,
1212 task->parent_depends_on);
1213 ++team->task_count;
1214 ++team->task_queued_count;
1215 ++ret;
1216 }
1217 free (child_task->dependers);
1218 child_task->dependers = NULL;
1219 if (ret > 1)
1220 gomp_team_barrier_set_task_pending (&team->barrier);
1221 return ret;
1222 }
1223
1224 static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1225 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1226 struct gomp_team *team)
1227 {
1228 if (child_task->depend_count == 0)
1229 return 0;
1230
1231 /* If parent is gone already, the hash table is freed and nothing
1232 will use the hash table anymore, no need to remove anything from it. */
1233 if (child_task->parent != NULL)
1234 gomp_task_run_post_handle_depend_hash (child_task);
1235
1236 if (child_task->dependers == NULL)
1237 return 0;
1238
1239 return gomp_task_run_post_handle_dependers (child_task, team);
1240 }
1241
1242 /* Remove CHILD_TASK from its parent. */
1243
1244 static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1245 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1246 {
1247 struct gomp_task *parent = child_task->parent;
1248 if (parent == NULL)
1249 return;
1250
1251 /* If this was the last task the parent was depending on,
1252 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1253 clean up and return. */
1254 if (__builtin_expect (child_task->parent_depends_on, 0)
1255 && --parent->taskwait->n_depend == 0
1256 && parent->taskwait->in_depend_wait)
1257 {
1258 parent->taskwait->in_depend_wait = false;
1259 gomp_sem_post (&parent->taskwait->taskwait_sem);
1260 }
1261
1262 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1263 child_task, MEMMODEL_RELEASE)
1264 && parent->taskwait && parent->taskwait->in_taskwait)
1265 {
1266 parent->taskwait->in_taskwait = false;
1267 gomp_sem_post (&parent->taskwait->taskwait_sem);
1268 }
1269 child_task->pnode[PQ_CHILDREN].next = NULL;
1270 child_task->pnode[PQ_CHILDREN].prev = NULL;
1271 }
1272
1273 /* Remove CHILD_TASK from its taskgroup. */
1274
1275 static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1276 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1277 {
1278 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1279 if (taskgroup == NULL)
1280 return;
1281 bool empty = priority_queue_remove (PQ_TASKGROUP,
1282 &taskgroup->taskgroup_queue,
1283 child_task, MEMMODEL_RELAXED);
1284 child_task->pnode[PQ_TASKGROUP].next = NULL;
1285 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1286 if (taskgroup->num_children > 1)
1287 --taskgroup->num_children;
1288 else
1289 {
1290 /* We access taskgroup->num_children in GOMP_taskgroup_end
1291 outside of the task lock mutex region, so
1292 need a release barrier here to ensure memory
1293 written by child_task->fn above is flushed
1294 before the NULL is written. */
1295 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1296 }
1297 if (empty && taskgroup->in_taskgroup_wait)
1298 {
1299 taskgroup->in_taskgroup_wait = false;
1300 gomp_sem_post (&taskgroup->taskgroup_sem);
1301 }
1302 }
1303
1304 void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1305 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1306 {
1307 struct gomp_thread *thr = gomp_thread ();
1308 struct gomp_team *team = thr->ts.team;
1309 struct gomp_task *task = thr->task;
1310 struct gomp_task *child_task = NULL;
1311 struct gomp_task *to_free = NULL;
1312 int do_wake = 0;
1313
1314 gomp_mutex_lock (&team->task_lock);
1315 if (gomp_barrier_last_thread (state))
1316 {
1317 if (team->task_count == 0)
1318 {
1319 gomp_team_barrier_done (&team->barrier, state);
1320 gomp_mutex_unlock (&team->task_lock);
1321 gomp_team_barrier_wake (&team->barrier, 0);
1322 return;
1323 }
1324 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1325 }
1326
1327 while (1)
1328 {
1329 bool cancelled = false;
1330 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1331 {
1332 bool ignored;
1333 child_task
1334 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1335 PQ_IGNORED, NULL,
1336 &ignored);
1337 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1338 team);
1339 if (__builtin_expect (cancelled, 0))
1340 {
1341 if (to_free)
1342 {
1343 gomp_finish_task (to_free);
1344 free (to_free);
1345 to_free = NULL;
1346 }
1347 goto finish_cancelled;
1348 }
1349 team->task_running_count++;
1350 child_task->in_tied_task = true;
1351 }
1352 gomp_mutex_unlock (&team->task_lock);
1353 if (do_wake)
1354 {
1355 gomp_team_barrier_wake (&team->barrier, do_wake);
1356 do_wake = 0;
1357 }
1358 if (to_free)
1359 {
1360 gomp_finish_task (to_free);
1361 free (to_free);
1362 to_free = NULL;
1363 }
1364 if (child_task)
1365 {
1366 thr->task = child_task;
1367 if (__builtin_expect (child_task->fn == NULL, 0))
1368 {
1369 if (gomp_target_task_fn (child_task->fn_data))
1370 {
1371 thr->task = task;
1372 gomp_mutex_lock (&team->task_lock);
1373 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1374 team->task_running_count--;
1375 struct gomp_target_task *ttask
1376 = (struct gomp_target_task *) child_task->fn_data;
1377 /* If GOMP_PLUGIN_target_task_completion has run already
1378 in between gomp_target_task_fn and the mutex lock,
1379 perform the requeuing here. */
1380 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1381 gomp_target_task_completion (team, child_task);
1382 else
1383 ttask->state = GOMP_TARGET_TASK_RUNNING;
1384 child_task = NULL;
1385 continue;
1386 }
1387 }
1388 else
1389 child_task->fn (child_task->fn_data);
1390 thr->task = task;
1391 }
1392 else
1393 return;
1394 gomp_mutex_lock (&team->task_lock);
1395 if (child_task)
1396 {
1397 finish_cancelled:;
1398 size_t new_tasks
1399 = gomp_task_run_post_handle_depend (child_task, team);
1400 gomp_task_run_post_remove_parent (child_task);
1401 gomp_clear_parent (&child_task->children_queue);
1402 gomp_task_run_post_remove_taskgroup (child_task);
1403 to_free = child_task;
1404 child_task = NULL;
1405 if (!cancelled)
1406 team->task_running_count--;
1407 if (new_tasks > 1)
1408 {
1409 do_wake = team->nthreads - team->task_running_count;
1410 if (do_wake > new_tasks)
1411 do_wake = new_tasks;
1412 }
1413 if (--team->task_count == 0
1414 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1415 {
1416 gomp_team_barrier_done (&team->barrier, state);
1417 gomp_mutex_unlock (&team->task_lock);
1418 gomp_team_barrier_wake (&team->barrier, 0);
1419 gomp_mutex_lock (&team->task_lock);
1420 }
1421 }
1422 }
1423 }
1424
1425 /* Called when encountering a taskwait directive.
1426
1427 Wait for all children of the current task. */
1428
1429 void
GOMP_taskwait(void)1430 GOMP_taskwait (void)
1431 {
1432 struct gomp_thread *thr = gomp_thread ();
1433 struct gomp_team *team = thr->ts.team;
1434 struct gomp_task *task = thr->task;
1435 struct gomp_task *child_task = NULL;
1436 struct gomp_task *to_free = NULL;
1437 struct gomp_taskwait taskwait;
1438 int do_wake = 0;
1439
1440 /* The acquire barrier on load of task->children here synchronizes
1441 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1442 not necessary that we synchronize with other non-NULL writes at
1443 this point, but we must ensure that all writes to memory by a
1444 child thread task work function are seen before we exit from
1445 GOMP_taskwait. */
1446 if (task == NULL
1447 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1448 return;
1449
1450 memset (&taskwait, 0, sizeof (taskwait));
1451 bool child_q = false;
1452 gomp_mutex_lock (&team->task_lock);
1453 while (1)
1454 {
1455 bool cancelled = false;
1456 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1457 {
1458 bool destroy_taskwait = task->taskwait != NULL;
1459 task->taskwait = NULL;
1460 gomp_mutex_unlock (&team->task_lock);
1461 if (to_free)
1462 {
1463 gomp_finish_task (to_free);
1464 free (to_free);
1465 }
1466 if (destroy_taskwait)
1467 gomp_sem_destroy (&taskwait.taskwait_sem);
1468 return;
1469 }
1470 struct gomp_task *next_task
1471 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1472 PQ_TEAM, &team->task_queue, &child_q);
1473 if (next_task->kind == GOMP_TASK_WAITING)
1474 {
1475 child_task = next_task;
1476 cancelled
1477 = gomp_task_run_pre (child_task, task, team);
1478 if (__builtin_expect (cancelled, 0))
1479 {
1480 if (to_free)
1481 {
1482 gomp_finish_task (to_free);
1483 free (to_free);
1484 to_free = NULL;
1485 }
1486 goto finish_cancelled;
1487 }
1488 }
1489 else
1490 {
1491 /* All tasks we are waiting for are either running in other
1492 threads, or they are tasks that have not had their
1493 dependencies met (so they're not even in the queue). Wait
1494 for them. */
1495 if (task->taskwait == NULL)
1496 {
1497 taskwait.in_depend_wait = false;
1498 gomp_sem_init (&taskwait.taskwait_sem, 0);
1499 task->taskwait = &taskwait;
1500 }
1501 taskwait.in_taskwait = true;
1502 }
1503 gomp_mutex_unlock (&team->task_lock);
1504 if (do_wake)
1505 {
1506 gomp_team_barrier_wake (&team->barrier, do_wake);
1507 do_wake = 0;
1508 }
1509 if (to_free)
1510 {
1511 gomp_finish_task (to_free);
1512 free (to_free);
1513 to_free = NULL;
1514 }
1515 if (child_task)
1516 {
1517 thr->task = child_task;
1518 if (__builtin_expect (child_task->fn == NULL, 0))
1519 {
1520 if (gomp_target_task_fn (child_task->fn_data))
1521 {
1522 thr->task = task;
1523 gomp_mutex_lock (&team->task_lock);
1524 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1525 struct gomp_target_task *ttask
1526 = (struct gomp_target_task *) child_task->fn_data;
1527 /* If GOMP_PLUGIN_target_task_completion has run already
1528 in between gomp_target_task_fn and the mutex lock,
1529 perform the requeuing here. */
1530 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1531 gomp_target_task_completion (team, child_task);
1532 else
1533 ttask->state = GOMP_TARGET_TASK_RUNNING;
1534 child_task = NULL;
1535 continue;
1536 }
1537 }
1538 else
1539 child_task->fn (child_task->fn_data);
1540 thr->task = task;
1541 }
1542 else
1543 gomp_sem_wait (&taskwait.taskwait_sem);
1544 gomp_mutex_lock (&team->task_lock);
1545 if (child_task)
1546 {
1547 finish_cancelled:;
1548 size_t new_tasks
1549 = gomp_task_run_post_handle_depend (child_task, team);
1550
1551 if (child_q)
1552 {
1553 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1554 child_task, MEMMODEL_RELAXED);
1555 child_task->pnode[PQ_CHILDREN].next = NULL;
1556 child_task->pnode[PQ_CHILDREN].prev = NULL;
1557 }
1558
1559 gomp_clear_parent (&child_task->children_queue);
1560
1561 gomp_task_run_post_remove_taskgroup (child_task);
1562
1563 to_free = child_task;
1564 child_task = NULL;
1565 team->task_count--;
1566 if (new_tasks > 1)
1567 {
1568 do_wake = team->nthreads - team->task_running_count
1569 - !task->in_tied_task;
1570 if (do_wake > new_tasks)
1571 do_wake = new_tasks;
1572 }
1573 }
1574 }
1575 }
1576
1577 /* Called when encountering a taskwait directive with depend clause(s).
1578 Wait as if it was an mergeable included task construct with empty body. */
1579
1580 void
GOMP_taskwait_depend(void ** depend)1581 GOMP_taskwait_depend (void **depend)
1582 {
1583 struct gomp_thread *thr = gomp_thread ();
1584 struct gomp_team *team = thr->ts.team;
1585
1586 /* If parallel or taskgroup has been cancelled, return early. */
1587 if (__builtin_expect (gomp_cancel_var, 0) && team)
1588 {
1589 if (gomp_team_barrier_cancelled (&team->barrier))
1590 return;
1591 if (thr->task->taskgroup)
1592 {
1593 if (thr->task->taskgroup->cancelled)
1594 return;
1595 if (thr->task->taskgroup->workshare
1596 && thr->task->taskgroup->prev
1597 && thr->task->taskgroup->prev->cancelled)
1598 return;
1599 }
1600 }
1601
1602 if (thr->task && thr->task->depend_hash)
1603 gomp_task_maybe_wait_for_dependencies (depend);
1604 }
1605
1606 /* An undeferred task is about to run. Wait for all tasks that this
1607 undeferred task depends on.
1608
1609 This is done by first putting all known ready dependencies
1610 (dependencies that have their own dependencies met) at the top of
1611 the scheduling queues. Then we iterate through these imminently
1612 ready tasks (and possibly other high priority tasks), and run them.
1613 If we run out of ready dependencies to execute, we either wait for
1614 the remaining dependencies to finish, or wait for them to get
1615 scheduled so we can run them.
1616
1617 DEPEND is as in GOMP_task. */
1618
1619 void
gomp_task_maybe_wait_for_dependencies(void ** depend)1620 gomp_task_maybe_wait_for_dependencies (void **depend)
1621 {
1622 struct gomp_thread *thr = gomp_thread ();
1623 struct gomp_task *task = thr->task;
1624 struct gomp_team *team = thr->ts.team;
1625 struct gomp_task_depend_entry elem, *ent = NULL;
1626 struct gomp_taskwait taskwait;
1627 size_t orig_ndepend = (uintptr_t) depend[0];
1628 size_t nout = (uintptr_t) depend[1];
1629 size_t ndepend = orig_ndepend;
1630 size_t normal = ndepend;
1631 size_t n = 2;
1632 size_t i;
1633 size_t num_awaited = 0;
1634 struct gomp_task *child_task = NULL;
1635 struct gomp_task *to_free = NULL;
1636 int do_wake = 0;
1637
1638 if (ndepend == 0)
1639 {
1640 ndepend = nout;
1641 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1642 normal = nout + (uintptr_t) depend[4];
1643 n = 5;
1644 }
1645 gomp_mutex_lock (&team->task_lock);
1646 for (i = 0; i < ndepend; i++)
1647 {
1648 elem.addr = depend[i + n];
1649 elem.is_in = i >= nout;
1650 if (__builtin_expect (i >= normal, 0))
1651 {
1652 void **d = (void **) elem.addr;
1653 switch ((uintptr_t) d[1])
1654 {
1655 case GOMP_DEPEND_IN:
1656 break;
1657 case GOMP_DEPEND_OUT:
1658 case GOMP_DEPEND_INOUT:
1659 case GOMP_DEPEND_MUTEXINOUTSET:
1660 elem.is_in = 0;
1661 break;
1662 default:
1663 gomp_fatal ("unknown omp_depend_t dependence type %d",
1664 (int) (uintptr_t) d[1]);
1665 }
1666 elem.addr = d[0];
1667 }
1668 ent = htab_find (task->depend_hash, &elem);
1669 for (; ent; ent = ent->next)
1670 if (elem.is_in && ent->is_in)
1671 continue;
1672 else
1673 {
1674 struct gomp_task *tsk = ent->task;
1675 if (!tsk->parent_depends_on)
1676 {
1677 tsk->parent_depends_on = true;
1678 ++num_awaited;
1679 /* If dependency TSK itself has no dependencies and is
1680 ready to run, move it up front so that we run it as
1681 soon as possible. */
1682 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1683 priority_queue_upgrade_task (tsk, task);
1684 }
1685 }
1686 }
1687 if (num_awaited == 0)
1688 {
1689 gomp_mutex_unlock (&team->task_lock);
1690 return;
1691 }
1692
1693 memset (&taskwait, 0, sizeof (taskwait));
1694 taskwait.n_depend = num_awaited;
1695 gomp_sem_init (&taskwait.taskwait_sem, 0);
1696 task->taskwait = &taskwait;
1697
1698 while (1)
1699 {
1700 bool cancelled = false;
1701 if (taskwait.n_depend == 0)
1702 {
1703 task->taskwait = NULL;
1704 gomp_mutex_unlock (&team->task_lock);
1705 if (to_free)
1706 {
1707 gomp_finish_task (to_free);
1708 free (to_free);
1709 }
1710 gomp_sem_destroy (&taskwait.taskwait_sem);
1711 return;
1712 }
1713
1714 /* Theoretically when we have multiple priorities, we should
1715 chose between the highest priority item in
1716 task->children_queue and team->task_queue here, so we should
1717 use priority_queue_next_task(). However, since we are
1718 running an undeferred task, perhaps that makes all tasks it
1719 depends on undeferred, thus a priority of INF? This would
1720 make it unnecessary to take anything into account here,
1721 but the dependencies.
1722
1723 On the other hand, if we want to use priority_queue_next_task(),
1724 care should be taken to only use priority_queue_remove()
1725 below if the task was actually removed from the children
1726 queue. */
1727 bool ignored;
1728 struct gomp_task *next_task
1729 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1730 PQ_IGNORED, NULL, &ignored);
1731
1732 if (next_task->kind == GOMP_TASK_WAITING)
1733 {
1734 child_task = next_task;
1735 cancelled
1736 = gomp_task_run_pre (child_task, task, team);
1737 if (__builtin_expect (cancelled, 0))
1738 {
1739 if (to_free)
1740 {
1741 gomp_finish_task (to_free);
1742 free (to_free);
1743 to_free = NULL;
1744 }
1745 goto finish_cancelled;
1746 }
1747 }
1748 else
1749 /* All tasks we are waiting for are either running in other
1750 threads, or they are tasks that have not had their
1751 dependencies met (so they're not even in the queue). Wait
1752 for them. */
1753 taskwait.in_depend_wait = true;
1754 gomp_mutex_unlock (&team->task_lock);
1755 if (do_wake)
1756 {
1757 gomp_team_barrier_wake (&team->barrier, do_wake);
1758 do_wake = 0;
1759 }
1760 if (to_free)
1761 {
1762 gomp_finish_task (to_free);
1763 free (to_free);
1764 to_free = NULL;
1765 }
1766 if (child_task)
1767 {
1768 thr->task = child_task;
1769 if (__builtin_expect (child_task->fn == NULL, 0))
1770 {
1771 if (gomp_target_task_fn (child_task->fn_data))
1772 {
1773 thr->task = task;
1774 gomp_mutex_lock (&team->task_lock);
1775 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1776 struct gomp_target_task *ttask
1777 = (struct gomp_target_task *) child_task->fn_data;
1778 /* If GOMP_PLUGIN_target_task_completion has run already
1779 in between gomp_target_task_fn and the mutex lock,
1780 perform the requeuing here. */
1781 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1782 gomp_target_task_completion (team, child_task);
1783 else
1784 ttask->state = GOMP_TARGET_TASK_RUNNING;
1785 child_task = NULL;
1786 continue;
1787 }
1788 }
1789 else
1790 child_task->fn (child_task->fn_data);
1791 thr->task = task;
1792 }
1793 else
1794 gomp_sem_wait (&taskwait.taskwait_sem);
1795 gomp_mutex_lock (&team->task_lock);
1796 if (child_task)
1797 {
1798 finish_cancelled:;
1799 size_t new_tasks
1800 = gomp_task_run_post_handle_depend (child_task, team);
1801 if (child_task->parent_depends_on)
1802 --taskwait.n_depend;
1803
1804 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1805 child_task, MEMMODEL_RELAXED);
1806 child_task->pnode[PQ_CHILDREN].next = NULL;
1807 child_task->pnode[PQ_CHILDREN].prev = NULL;
1808
1809 gomp_clear_parent (&child_task->children_queue);
1810 gomp_task_run_post_remove_taskgroup (child_task);
1811 to_free = child_task;
1812 child_task = NULL;
1813 team->task_count--;
1814 if (new_tasks > 1)
1815 {
1816 do_wake = team->nthreads - team->task_running_count
1817 - !task->in_tied_task;
1818 if (do_wake > new_tasks)
1819 do_wake = new_tasks;
1820 }
1821 }
1822 }
1823 }
1824
1825 /* Called when encountering a taskyield directive. */
1826
1827 void
GOMP_taskyield(void)1828 GOMP_taskyield (void)
1829 {
1830 /* Nothing at the moment. */
1831 }
1832
1833 static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1834 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1835 {
1836 struct gomp_taskgroup *taskgroup
1837 = gomp_malloc (sizeof (struct gomp_taskgroup));
1838 taskgroup->prev = prev;
1839 priority_queue_init (&taskgroup->taskgroup_queue);
1840 taskgroup->reductions = prev ? prev->reductions : NULL;
1841 taskgroup->in_taskgroup_wait = false;
1842 taskgroup->cancelled = false;
1843 taskgroup->workshare = false;
1844 taskgroup->num_children = 0;
1845 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1846 return taskgroup;
1847 }
1848
1849 void
GOMP_taskgroup_start(void)1850 GOMP_taskgroup_start (void)
1851 {
1852 struct gomp_thread *thr = gomp_thread ();
1853 struct gomp_team *team = thr->ts.team;
1854 struct gomp_task *task = thr->task;
1855
1856 /* If team is NULL, all tasks are executed as
1857 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1858 taskgroup and their descendant tasks will be finished
1859 by the time GOMP_taskgroup_end is called. */
1860 if (team == NULL)
1861 return;
1862 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1863 }
1864
1865 void
GOMP_taskgroup_end(void)1866 GOMP_taskgroup_end (void)
1867 {
1868 struct gomp_thread *thr = gomp_thread ();
1869 struct gomp_team *team = thr->ts.team;
1870 struct gomp_task *task = thr->task;
1871 struct gomp_taskgroup *taskgroup;
1872 struct gomp_task *child_task = NULL;
1873 struct gomp_task *to_free = NULL;
1874 int do_wake = 0;
1875
1876 if (team == NULL)
1877 return;
1878 taskgroup = task->taskgroup;
1879 if (__builtin_expect (taskgroup == NULL, 0)
1880 && thr->ts.level == 0)
1881 {
1882 /* This can happen if GOMP_taskgroup_start is called when
1883 thr->ts.team == NULL, but inside of the taskgroup there
1884 is #pragma omp target nowait that creates an implicit
1885 team with a single thread. In this case, we want to wait
1886 for all outstanding tasks in this team. */
1887 gomp_team_barrier_wait (&team->barrier);
1888 return;
1889 }
1890
1891 /* The acquire barrier on load of taskgroup->num_children here
1892 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1893 It is not necessary that we synchronize with other non-0 writes at
1894 this point, but we must ensure that all writes to memory by a
1895 child thread task work function are seen before we exit from
1896 GOMP_taskgroup_end. */
1897 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1898 goto finish;
1899
1900 bool unused;
1901 gomp_mutex_lock (&team->task_lock);
1902 while (1)
1903 {
1904 bool cancelled = false;
1905 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1906 MEMMODEL_RELAXED))
1907 {
1908 if (taskgroup->num_children)
1909 {
1910 if (priority_queue_empty_p (&task->children_queue,
1911 MEMMODEL_RELAXED))
1912 goto do_wait;
1913 child_task
1914 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1915 PQ_TEAM, &team->task_queue,
1916 &unused);
1917 }
1918 else
1919 {
1920 gomp_mutex_unlock (&team->task_lock);
1921 if (to_free)
1922 {
1923 gomp_finish_task (to_free);
1924 free (to_free);
1925 }
1926 goto finish;
1927 }
1928 }
1929 else
1930 child_task
1931 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1932 PQ_TEAM, &team->task_queue, &unused);
1933 if (child_task->kind == GOMP_TASK_WAITING)
1934 {
1935 cancelled
1936 = gomp_task_run_pre (child_task, child_task->parent, team);
1937 if (__builtin_expect (cancelled, 0))
1938 {
1939 if (to_free)
1940 {
1941 gomp_finish_task (to_free);
1942 free (to_free);
1943 to_free = NULL;
1944 }
1945 goto finish_cancelled;
1946 }
1947 }
1948 else
1949 {
1950 child_task = NULL;
1951 do_wait:
1952 /* All tasks we are waiting for are either running in other
1953 threads, or they are tasks that have not had their
1954 dependencies met (so they're not even in the queue). Wait
1955 for them. */
1956 taskgroup->in_taskgroup_wait = true;
1957 }
1958 gomp_mutex_unlock (&team->task_lock);
1959 if (do_wake)
1960 {
1961 gomp_team_barrier_wake (&team->barrier, do_wake);
1962 do_wake = 0;
1963 }
1964 if (to_free)
1965 {
1966 gomp_finish_task (to_free);
1967 free (to_free);
1968 to_free = NULL;
1969 }
1970 if (child_task)
1971 {
1972 thr->task = child_task;
1973 if (__builtin_expect (child_task->fn == NULL, 0))
1974 {
1975 if (gomp_target_task_fn (child_task->fn_data))
1976 {
1977 thr->task = task;
1978 gomp_mutex_lock (&team->task_lock);
1979 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1980 struct gomp_target_task *ttask
1981 = (struct gomp_target_task *) child_task->fn_data;
1982 /* If GOMP_PLUGIN_target_task_completion has run already
1983 in between gomp_target_task_fn and the mutex lock,
1984 perform the requeuing here. */
1985 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1986 gomp_target_task_completion (team, child_task);
1987 else
1988 ttask->state = GOMP_TARGET_TASK_RUNNING;
1989 child_task = NULL;
1990 continue;
1991 }
1992 }
1993 else
1994 child_task->fn (child_task->fn_data);
1995 thr->task = task;
1996 }
1997 else
1998 gomp_sem_wait (&taskgroup->taskgroup_sem);
1999 gomp_mutex_lock (&team->task_lock);
2000 if (child_task)
2001 {
2002 finish_cancelled:;
2003 size_t new_tasks
2004 = gomp_task_run_post_handle_depend (child_task, team);
2005 gomp_task_run_post_remove_parent (child_task);
2006 gomp_clear_parent (&child_task->children_queue);
2007 gomp_task_run_post_remove_taskgroup (child_task);
2008 to_free = child_task;
2009 child_task = NULL;
2010 team->task_count--;
2011 if (new_tasks > 1)
2012 {
2013 do_wake = team->nthreads - team->task_running_count
2014 - !task->in_tied_task;
2015 if (do_wake > new_tasks)
2016 do_wake = new_tasks;
2017 }
2018 }
2019 }
2020
2021 finish:
2022 task->taskgroup = taskgroup->prev;
2023 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2024 free (taskgroup);
2025 }
2026
2027 static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2028 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2029 unsigned nthreads)
2030 {
2031 size_t total_cnt = 0;
2032 uintptr_t *d = data;
2033 struct htab *old_htab = NULL, *new_htab;
2034 do
2035 {
2036 if (__builtin_expect (orig != NULL, 0))
2037 {
2038 /* For worksharing task reductions, memory has been allocated
2039 already by some other thread that encountered the construct
2040 earlier. */
2041 d[2] = orig[2];
2042 d[6] = orig[6];
2043 orig = (uintptr_t *) orig[4];
2044 }
2045 else
2046 {
2047 size_t sz = d[1] * nthreads;
2048 /* Should use omp_alloc if d[3] is not -1. */
2049 void *ptr = gomp_aligned_alloc (d[2], sz);
2050 memset (ptr, '\0', sz);
2051 d[2] = (uintptr_t) ptr;
2052 d[6] = d[2] + sz;
2053 }
2054 d[5] = 0;
2055 total_cnt += d[0];
2056 if (d[4] == 0)
2057 {
2058 d[4] = (uintptr_t) old;
2059 break;
2060 }
2061 else
2062 d = (uintptr_t *) d[4];
2063 }
2064 while (1);
2065 if (old && old[5])
2066 {
2067 old_htab = (struct htab *) old[5];
2068 total_cnt += htab_elements (old_htab);
2069 }
2070 new_htab = htab_create (total_cnt);
2071 if (old_htab)
2072 {
2073 /* Copy old hash table, like in htab_expand. */
2074 hash_entry_type *p, *olimit;
2075 new_htab->n_elements = htab_elements (old_htab);
2076 olimit = old_htab->entries + old_htab->size;
2077 p = old_htab->entries;
2078 do
2079 {
2080 hash_entry_type x = *p;
2081 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2082 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2083 p++;
2084 }
2085 while (p < olimit);
2086 }
2087 d = data;
2088 do
2089 {
2090 size_t j;
2091 for (j = 0; j < d[0]; ++j)
2092 {
2093 uintptr_t *p = d + 7 + j * 3;
2094 p[2] = (uintptr_t) d;
2095 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2096 which hash on the first element which is a pointer. We need
2097 to hash also on the first sizeof (uintptr_t) bytes which contain
2098 a pointer. Hide the cast from the compiler. */
2099 hash_entry_type n;
2100 __asm ("" : "=g" (n) : "0" (p));
2101 *htab_find_slot (&new_htab, n, INSERT) = n;
2102 }
2103 if (d[4] == (uintptr_t) old)
2104 break;
2105 else
2106 d = (uintptr_t *) d[4];
2107 }
2108 while (1);
2109 d[5] = (uintptr_t) new_htab;
2110 }
2111
2112 static void
gomp_create_artificial_team(void)2113 gomp_create_artificial_team (void)
2114 {
2115 struct gomp_thread *thr = gomp_thread ();
2116 struct gomp_task_icv *icv;
2117 struct gomp_team *team = gomp_new_team (1);
2118 struct gomp_task *task = thr->task;
2119 struct gomp_task **implicit_task = &task;
2120 icv = task ? &task->icv : &gomp_global_icv;
2121 team->prev_ts = thr->ts;
2122 thr->ts.team = team;
2123 thr->ts.team_id = 0;
2124 thr->ts.work_share = &team->work_shares[0];
2125 thr->ts.last_work_share = NULL;
2126 #ifdef HAVE_SYNC_BUILTINS
2127 thr->ts.single_count = 0;
2128 #endif
2129 thr->ts.static_trip = 0;
2130 thr->task = &team->implicit_task[0];
2131 gomp_init_task (thr->task, NULL, icv);
2132 while (*implicit_task
2133 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2134 implicit_task = &(*implicit_task)->parent;
2135 if (*implicit_task)
2136 {
2137 thr->task = *implicit_task;
2138 gomp_end_task ();
2139 free (*implicit_task);
2140 thr->task = &team->implicit_task[0];
2141 }
2142 #ifdef LIBGOMP_USE_PTHREADS
2143 else
2144 pthread_setspecific (gomp_thread_destructor, thr);
2145 #endif
2146 if (implicit_task != &task)
2147 {
2148 *implicit_task = thr->task;
2149 thr->task = task;
2150 }
2151 }
2152
2153 /* The format of data is:
2154 data[0] cnt
2155 data[1] size
2156 data[2] alignment (on output array pointer)
2157 data[3] allocator (-1 if malloc allocator)
2158 data[4] next pointer
2159 data[5] used internally (htab pointer)
2160 data[6] used internally (end of array)
2161 cnt times
2162 ent[0] address
2163 ent[1] offset
2164 ent[2] used internally (pointer to data[0])
2165 The entries are sorted by increasing offset, so that a binary
2166 search can be performed. Normally, data[8] is 0, exception is
2167 for worksharing construct task reductions in cancellable parallel,
2168 where at offset 0 there should be space for a pointer and an integer
2169 which are used internally. */
2170
2171 void
GOMP_taskgroup_reduction_register(uintptr_t * data)2172 GOMP_taskgroup_reduction_register (uintptr_t *data)
2173 {
2174 struct gomp_thread *thr = gomp_thread ();
2175 struct gomp_team *team = thr->ts.team;
2176 struct gomp_task *task;
2177 unsigned nthreads;
2178 if (__builtin_expect (team == NULL, 0))
2179 {
2180 /* The task reduction code needs a team and task, so for
2181 orphaned taskgroups just create the implicit team. */
2182 gomp_create_artificial_team ();
2183 ialias_call (GOMP_taskgroup_start) ();
2184 team = thr->ts.team;
2185 }
2186 nthreads = team->nthreads;
2187 task = thr->task;
2188 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2189 task->taskgroup->reductions = data;
2190 }
2191
2192 void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2193 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2194 {
2195 uintptr_t *d = data;
2196 htab_free ((struct htab *) data[5]);
2197 do
2198 {
2199 gomp_aligned_free ((void *) d[2]);
2200 d = (uintptr_t *) d[4];
2201 }
2202 while (d && !d[5]);
2203 }
ialias(GOMP_taskgroup_reduction_unregister)2204 ialias (GOMP_taskgroup_reduction_unregister)
2205
2206 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2207 original list item or address of previously remapped original list
2208 item to address of the private copy, store that to ptrs[i].
2209 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2210 the original list item. */
2211
2212 void
2213 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2214 {
2215 struct gomp_thread *thr = gomp_thread ();
2216 struct gomp_task *task = thr->task;
2217 unsigned id = thr->ts.team_id;
2218 uintptr_t *data = task->taskgroup->reductions;
2219 uintptr_t *d;
2220 struct htab *reduction_htab = (struct htab *) data[5];
2221 size_t i;
2222 for (i = 0; i < cnt; ++i)
2223 {
2224 hash_entry_type ent, n;
2225 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2226 n = htab_find (reduction_htab, ent);
2227 if (n)
2228 {
2229 uintptr_t *p;
2230 __asm ("" : "=g" (p) : "0" (n));
2231 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2232 p[1] is the offset within the allocated chunk for each
2233 thread, p[2] is the array registered with
2234 GOMP_taskgroup_reduction_register, d[2] is the base of the
2235 allocated memory and d[1] is the size of the allocated chunk
2236 for one thread. */
2237 d = (uintptr_t *) p[2];
2238 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2239 if (__builtin_expect (i < cntorig, 0))
2240 ptrs[cnt + i] = (void *) p[0];
2241 continue;
2242 }
2243 d = data;
2244 while (d != NULL)
2245 {
2246 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2247 break;
2248 d = (uintptr_t *) d[4];
2249 }
2250 if (d == NULL)
2251 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2252 "task modifier for %p", ptrs[i]);
2253 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2254 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2255 if (__builtin_expect (i < cntorig, 0))
2256 {
2257 size_t lo = 0, hi = d[0] - 1;
2258 while (lo <= hi)
2259 {
2260 size_t m = (lo + hi) / 2;
2261 if (d[7 + 3 * m + 1] < off)
2262 lo = m + 1;
2263 else if (d[7 + 3 * m + 1] == off)
2264 {
2265 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2266 break;
2267 }
2268 else
2269 hi = m - 1;
2270 }
2271 if (lo > hi)
2272 gomp_fatal ("couldn't find matching task_reduction or reduction "
2273 "with task modifier for %p", ptrs[i]);
2274 }
2275 }
2276 }
2277
2278 struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2279 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2280 {
2281 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2282 gomp_reduction_register (data, NULL, NULL, nthreads);
2283 taskgroup->reductions = data;
2284 return taskgroup;
2285 }
2286
2287 void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2288 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2289 {
2290 struct gomp_thread *thr = gomp_thread ();
2291 struct gomp_team *team = thr->ts.team;
2292 struct gomp_task *task = thr->task;
2293 unsigned nthreads = team->nthreads;
2294 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2295 task->taskgroup->reductions = data;
2296 }
2297
2298 void
gomp_workshare_taskgroup_start(void)2299 gomp_workshare_taskgroup_start (void)
2300 {
2301 struct gomp_thread *thr = gomp_thread ();
2302 struct gomp_team *team = thr->ts.team;
2303 struct gomp_task *task;
2304
2305 if (team == NULL)
2306 {
2307 gomp_create_artificial_team ();
2308 team = thr->ts.team;
2309 }
2310 task = thr->task;
2311 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2312 task->taskgroup->workshare = true;
2313 }
2314
2315 void
GOMP_workshare_task_reduction_unregister(bool cancelled)2316 GOMP_workshare_task_reduction_unregister (bool cancelled)
2317 {
2318 struct gomp_thread *thr = gomp_thread ();
2319 struct gomp_task *task = thr->task;
2320 struct gomp_team *team = thr->ts.team;
2321 uintptr_t *data = task->taskgroup->reductions;
2322 ialias_call (GOMP_taskgroup_end) ();
2323 if (thr->ts.team_id == 0)
2324 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2325 else
2326 htab_free ((struct htab *) data[5]);
2327
2328 if (!cancelled)
2329 gomp_team_barrier_wait (&team->barrier);
2330 }
2331
2332 int
omp_in_final(void)2333 omp_in_final (void)
2334 {
2335 struct gomp_thread *thr = gomp_thread ();
2336 return thr->task && thr->task->final_task;
2337 }
2338
2339 ialias (omp_in_final)
2340