1 /* Copyright (C) 2007-2022 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This file handles the maintenance of tasks in response to task
27 creation and termination. */
28
29 #include "libgomp.h"
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include "gomp-constants.h"
34
35 typedef struct gomp_task_depend_entry *hash_entry_type;
36
37 static inline void *
htab_alloc(size_t size)38 htab_alloc (size_t size)
39 {
40 return gomp_malloc (size);
41 }
42
43 static inline void
htab_free(void * ptr)44 htab_free (void *ptr)
45 {
46 free (ptr);
47 }
48
49 #include "hashtab.h"
50
51 static inline hashval_t
htab_hash(hash_entry_type element)52 htab_hash (hash_entry_type element)
53 {
54 return hash_pointer (element->addr);
55 }
56
57 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)58 htab_eq (hash_entry_type x, hash_entry_type y)
59 {
60 return x->addr == y->addr;
61 }
62
63 /* Create a new task data structure. */
64
65 void
gomp_init_task(struct gomp_task * task,struct gomp_task * parent_task,struct gomp_task_icv * prev_icv)66 gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
67 struct gomp_task_icv *prev_icv)
68 {
69 /* It would seem that using memset here would be a win, but it turns
70 out that partially filling gomp_task allows us to keep the
71 overhead of task creation low. In the nqueens-1.c test, for a
72 sufficiently large N, we drop the overhead from 5-6% to 1%.
73
74 Note, the nqueens-1.c test in serial mode is a good test to
75 benchmark the overhead of creating tasks as there are millions of
76 tiny tasks created that all run undeferred. */
77 task->parent = parent_task;
78 priority_queue_init (&task->children_queue);
79 task->taskgroup = NULL;
80 task->dependers = NULL;
81 task->depend_hash = NULL;
82 task->taskwait = NULL;
83 task->depend_count = 0;
84 task->completion_sem = NULL;
85 task->deferred_p = false;
86 task->icv = *prev_icv;
87 task->kind = GOMP_TASK_IMPLICIT;
88 task->in_tied_task = false;
89 task->final_task = false;
90 task->copy_ctors_done = false;
91 task->parent_depends_on = false;
92 }
93
94 /* Clean up a task, after completing it. */
95
96 void
gomp_end_task(void)97 gomp_end_task (void)
98 {
99 struct gomp_thread *thr = gomp_thread ();
100 struct gomp_task *task = thr->task;
101
102 gomp_finish_task (task);
103 thr->task = task->parent;
104 }
105
106 /* Clear the parent field of every task in LIST. */
107
108 static inline void
gomp_clear_parent_in_list(struct priority_list * list)109 gomp_clear_parent_in_list (struct priority_list *list)
110 {
111 struct priority_node *p = list->tasks;
112 if (p)
113 do
114 {
115 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
116 p = p->next;
117 }
118 while (p != list->tasks);
119 }
120
121 /* Splay tree version of gomp_clear_parent_in_list.
122
123 Clear the parent field of every task in NODE within SP, and free
124 the node when done. */
125
126 static void
gomp_clear_parent_in_tree(prio_splay_tree sp,prio_splay_tree_node node)127 gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
128 {
129 if (!node)
130 return;
131 prio_splay_tree_node left = node->left, right = node->right;
132 gomp_clear_parent_in_list (&node->key.l);
133 #if _LIBGOMP_CHECKING_
134 memset (node, 0xaf, sizeof (*node));
135 #endif
136 /* No need to remove the node from the tree. We're nuking
137 everything, so just free the nodes and our caller can clear the
138 entire splay tree. */
139 free (node);
140 gomp_clear_parent_in_tree (sp, left);
141 gomp_clear_parent_in_tree (sp, right);
142 }
143
144 /* Clear the parent field of every task in Q and remove every task
145 from Q. */
146
147 static inline void
gomp_clear_parent(struct priority_queue * q)148 gomp_clear_parent (struct priority_queue *q)
149 {
150 if (priority_queue_multi_p (q))
151 {
152 gomp_clear_parent_in_tree (&q->t, q->t.root);
153 /* All the nodes have been cleared in gomp_clear_parent_in_tree.
154 No need to remove anything. We can just nuke everything. */
155 q->t.root = NULL;
156 }
157 else
158 gomp_clear_parent_in_list (&q->l);
159 }
160
161 /* Helper function for GOMP_task and gomp_create_target_task.
162
163 For a TASK with in/out dependencies, fill in the various dependency
164 queues. PARENT is the parent of said task. DEPEND is as in
165 GOMP_task. */
166
167 static void
gomp_task_handle_depend(struct gomp_task * task,struct gomp_task * parent,void ** depend)168 gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
169 void **depend)
170 {
171 size_t ndepend = (uintptr_t) depend[0];
172 size_t i;
173 hash_entry_type ent;
174
175 if (ndepend)
176 {
177 /* depend[0] is total # */
178 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */
179 /* ndepend - nout is # of in: */
180 for (i = 0; i < ndepend; i++)
181 {
182 task->depend[i].addr = depend[2 + i];
183 task->depend[i].is_in = i >= nout;
184 }
185 }
186 else
187 {
188 ndepend = (uintptr_t) depend[1]; /* total # */
189 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */
190 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */
191 /* For now we treat mutexinoutset like out, which is compliant, but
192 inefficient. */
193 size_t nin = (uintptr_t) depend[4]; /* # of in: */
194 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */
195 size_t normal = nout + nmutexinoutset + nin;
196 size_t n = 0;
197 for (i = normal; i < ndepend; i++)
198 {
199 void **d = (void **) (uintptr_t) depend[5 + i];
200 switch ((uintptr_t) d[1])
201 {
202 case GOMP_DEPEND_OUT:
203 case GOMP_DEPEND_INOUT:
204 case GOMP_DEPEND_MUTEXINOUTSET:
205 break;
206 case GOMP_DEPEND_IN:
207 continue;
208 default:
209 gomp_fatal ("unknown omp_depend_t dependence type %d",
210 (int) (uintptr_t) d[1]);
211 }
212 task->depend[n].addr = d[0];
213 task->depend[n++].is_in = 0;
214 }
215 for (i = 0; i < normal; i++)
216 {
217 task->depend[n].addr = depend[5 + i];
218 task->depend[n++].is_in = i >= nout + nmutexinoutset;
219 }
220 for (i = normal; i < ndepend; i++)
221 {
222 void **d = (void **) (uintptr_t) depend[5 + i];
223 if ((uintptr_t) d[1] != GOMP_DEPEND_IN)
224 continue;
225 task->depend[n].addr = d[0];
226 task->depend[n++].is_in = 1;
227 }
228 }
229 task->depend_count = ndepend;
230 task->num_dependees = 0;
231 if (parent->depend_hash == NULL)
232 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
233 for (i = 0; i < ndepend; i++)
234 {
235 task->depend[i].next = NULL;
236 task->depend[i].prev = NULL;
237 task->depend[i].task = task;
238 task->depend[i].redundant = false;
239 task->depend[i].redundant_out = false;
240
241 hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
242 &task->depend[i], INSERT);
243 hash_entry_type out = NULL, last = NULL;
244 if (*slot)
245 {
246 /* If multiple depends on the same task are the same, all but the
247 first one are redundant. As inout/out come first, if any of them
248 is inout/out, it will win, which is the right semantics. */
249 if ((*slot)->task == task)
250 {
251 task->depend[i].redundant = true;
252 continue;
253 }
254 for (ent = *slot; ent; ent = ent->next)
255 {
256 if (ent->redundant_out)
257 break;
258
259 last = ent;
260
261 /* depend(in:...) doesn't depend on earlier depend(in:...). */
262 if (task->depend[i].is_in && ent->is_in)
263 continue;
264
265 if (!ent->is_in)
266 out = ent;
267
268 struct gomp_task *tsk = ent->task;
269 if (tsk->dependers == NULL)
270 {
271 tsk->dependers
272 = gomp_malloc (sizeof (struct gomp_dependers_vec)
273 + 6 * sizeof (struct gomp_task *));
274 tsk->dependers->n_elem = 1;
275 tsk->dependers->allocated = 6;
276 tsk->dependers->elem[0] = task;
277 task->num_dependees++;
278 continue;
279 }
280 /* We already have some other dependency on tsk from earlier
281 depend clause. */
282 else if (tsk->dependers->n_elem
283 && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
284 == task))
285 continue;
286 else if (tsk->dependers->n_elem == tsk->dependers->allocated)
287 {
288 tsk->dependers->allocated
289 = tsk->dependers->allocated * 2 + 2;
290 tsk->dependers
291 = gomp_realloc (tsk->dependers,
292 sizeof (struct gomp_dependers_vec)
293 + (tsk->dependers->allocated
294 * sizeof (struct gomp_task *)));
295 }
296 tsk->dependers->elem[tsk->dependers->n_elem++] = task;
297 task->num_dependees++;
298 }
299 task->depend[i].next = *slot;
300 (*slot)->prev = &task->depend[i];
301 }
302 *slot = &task->depend[i];
303
304 /* There is no need to store more than one depend({,in}out:) task per
305 address in the hash table chain for the purpose of creation of
306 deferred tasks, because each out depends on all earlier outs, thus it
307 is enough to record just the last depend({,in}out:). For depend(in:),
308 we need to keep all of the previous ones not terminated yet, because
309 a later depend({,in}out:) might need to depend on all of them. So, if
310 the new task's clause is depend({,in}out:), we know there is at most
311 one other depend({,in}out:) clause in the list (out). For
312 non-deferred tasks we want to see all outs, so they are moved to the
313 end of the chain, after first redundant_out entry all following
314 entries should be redundant_out. */
315 if (!task->depend[i].is_in && out)
316 {
317 if (out != last)
318 {
319 out->next->prev = out->prev;
320 out->prev->next = out->next;
321 out->next = last->next;
322 out->prev = last;
323 last->next = out;
324 if (out->next)
325 out->next->prev = out;
326 }
327 out->redundant_out = true;
328 }
329 }
330 }
331
332 /* Called when encountering an explicit task directive. If IF_CLAUSE is
333 false, then we must not delay in executing the task. If UNTIED is true,
334 then the task may be executed by any member of the team.
335
336 DEPEND is an array containing:
337 if depend[0] is non-zero, then:
338 depend[0]: number of depend elements.
339 depend[1]: number of depend elements of type "out/inout".
340 depend[2..N+1]: address of [1..N]th depend element.
341 otherwise, when depend[0] is zero, then:
342 depend[1]: number of depend elements.
343 depend[2]: number of depend elements of type "out/inout".
344 depend[3]: number of depend elements of type "mutexinoutset".
345 depend[4]: number of depend elements of type "in".
346 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements
347 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of
348 omp_depend_t objects. */
349
350 void
GOMP_task(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,bool if_clause,unsigned flags,void ** depend,int priority_arg,void * detach)351 GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
352 long arg_size, long arg_align, bool if_clause, unsigned flags,
353 void **depend, int priority_arg, void *detach)
354 {
355 struct gomp_thread *thr = gomp_thread ();
356 struct gomp_team *team = thr->ts.team;
357 int priority = 0;
358
359 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
360 /* If pthread_mutex_* is used for omp_*lock*, then each task must be
361 tied to one thread all the time. This means UNTIED tasks must be
362 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
363 might be running on different thread than FN. */
364 if (cpyfn)
365 if_clause = false;
366 flags &= ~GOMP_TASK_FLAG_UNTIED;
367 #endif
368
369 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
370 if (__builtin_expect (gomp_cancel_var, 0) && team)
371 {
372 if (gomp_team_barrier_cancelled (&team->barrier))
373 return;
374 if (thr->task->taskgroup)
375 {
376 if (thr->task->taskgroup->cancelled)
377 return;
378 if (thr->task->taskgroup->workshare
379 && thr->task->taskgroup->prev
380 && thr->task->taskgroup->prev->cancelled)
381 return;
382 }
383 }
384
385 if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0))
386 {
387 priority = priority_arg;
388 if (priority > gomp_max_task_priority_var)
389 priority = gomp_max_task_priority_var;
390 }
391
392 if (!if_clause || team == NULL
393 || (thr->task && thr->task->final_task)
394 || team->task_count > 64 * team->nthreads)
395 {
396 struct gomp_task task;
397 gomp_sem_t completion_sem;
398
399 /* If there are depend clauses and earlier deferred sibling tasks
400 with depend clauses, check if there isn't a dependency. If there
401 is, we need to wait for them. There is no need to handle
402 depend clauses for non-deferred tasks other than this, because
403 the parent task is suspended until the child task finishes and thus
404 it can't start further child tasks. */
405 if ((flags & GOMP_TASK_FLAG_DEPEND)
406 && thr->task && thr->task->depend_hash)
407 gomp_task_maybe_wait_for_dependencies (depend);
408
409 gomp_init_task (&task, thr->task, gomp_icv (false));
410 task.kind = GOMP_TASK_UNDEFERRED;
411 task.final_task = (thr->task && thr->task->final_task)
412 || (flags & GOMP_TASK_FLAG_FINAL);
413 task.priority = priority;
414
415 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
416 {
417 gomp_sem_init (&completion_sem, 0);
418 task.completion_sem = &completion_sem;
419 *(void **) detach = &task;
420 if (data)
421 *(void **) data = &task;
422
423 gomp_debug (0, "Thread %d: new event: %p\n",
424 thr->ts.team_id, &task);
425 }
426
427 if (thr->task)
428 {
429 task.in_tied_task = thr->task->in_tied_task;
430 task.taskgroup = thr->task->taskgroup;
431 }
432 thr->task = &task;
433 if (__builtin_expect (cpyfn != NULL, 0))
434 {
435 char buf[arg_size + arg_align - 1];
436 char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
437 & ~(uintptr_t) (arg_align - 1));
438 cpyfn (arg, data);
439 fn (arg);
440 }
441 else
442 fn (data);
443
444 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
445 {
446 gomp_sem_wait (&completion_sem);
447 gomp_sem_destroy (&completion_sem);
448 }
449
450 /* Access to "children" is normally done inside a task_lock
451 mutex region, but the only way this particular task.children
452 can be set is if this thread's task work function (fn)
453 creates children. So since the setter is *this* thread, we
454 need no barriers here when testing for non-NULL. We can have
455 task.children set by the current thread then changed by a
456 child thread, but seeing a stale non-NULL value is not a
457 problem. Once past the task_lock acquisition, this thread
458 will see the real value of task.children. */
459 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
460 {
461 gomp_mutex_lock (&team->task_lock);
462 gomp_clear_parent (&task.children_queue);
463 gomp_mutex_unlock (&team->task_lock);
464 }
465 gomp_end_task ();
466 }
467 else
468 {
469 struct gomp_task *task;
470 struct gomp_task *parent = thr->task;
471 struct gomp_taskgroup *taskgroup = parent->taskgroup;
472 char *arg;
473 bool do_wake;
474 size_t depend_size = 0;
475
476 if (flags & GOMP_TASK_FLAG_DEPEND)
477 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1])
478 * sizeof (struct gomp_task_depend_entry));
479 task = gomp_malloc (sizeof (*task) + depend_size
480 + arg_size + arg_align - 1);
481 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
482 & ~(uintptr_t) (arg_align - 1));
483 gomp_init_task (task, parent, gomp_icv (false));
484 task->priority = priority;
485 task->kind = GOMP_TASK_UNDEFERRED;
486 task->in_tied_task = parent->in_tied_task;
487 task->taskgroup = taskgroup;
488 task->deferred_p = true;
489 if ((flags & GOMP_TASK_FLAG_DETACH) != 0)
490 {
491 task->detach_team = team;
492
493 *(void **) detach = task;
494 if (data)
495 *(void **) data = task;
496
497 gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task);
498 }
499 thr->task = task;
500 if (cpyfn)
501 {
502 cpyfn (arg, data);
503 task->copy_ctors_done = true;
504 }
505 else
506 memcpy (arg, data, arg_size);
507 thr->task = parent;
508 task->kind = GOMP_TASK_WAITING;
509 task->fn = fn;
510 task->fn_data = arg;
511 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
512 gomp_mutex_lock (&team->task_lock);
513 /* If parallel or taskgroup has been cancelled, don't start new
514 tasks. */
515 if (__builtin_expect (gomp_cancel_var, 0)
516 && !task->copy_ctors_done)
517 {
518 if (gomp_team_barrier_cancelled (&team->barrier))
519 {
520 do_cancel:
521 gomp_mutex_unlock (&team->task_lock);
522 gomp_finish_task (task);
523 free (task);
524 return;
525 }
526 if (taskgroup)
527 {
528 if (taskgroup->cancelled)
529 goto do_cancel;
530 if (taskgroup->workshare
531 && taskgroup->prev
532 && taskgroup->prev->cancelled)
533 goto do_cancel;
534 }
535 }
536 if (taskgroup)
537 taskgroup->num_children++;
538 if (depend_size)
539 {
540 gomp_task_handle_depend (task, parent, depend);
541 if (task->num_dependees)
542 {
543 /* Tasks that depend on other tasks are not put into the
544 various waiting queues, so we are done for now. Said
545 tasks are instead put into the queues via
546 gomp_task_run_post_handle_dependers() after their
547 dependencies have been satisfied. After which, they
548 can be picked up by the various scheduling
549 points. */
550 gomp_mutex_unlock (&team->task_lock);
551 return;
552 }
553 }
554
555 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
556 task, priority,
557 PRIORITY_INSERT_BEGIN,
558 /*adjust_parent_depends_on=*/false,
559 task->parent_depends_on);
560 if (taskgroup)
561 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
562 task, priority,
563 PRIORITY_INSERT_BEGIN,
564 /*adjust_parent_depends_on=*/false,
565 task->parent_depends_on);
566
567 priority_queue_insert (PQ_TEAM, &team->task_queue,
568 task, priority,
569 PRIORITY_INSERT_END,
570 /*adjust_parent_depends_on=*/false,
571 task->parent_depends_on);
572
573 ++team->task_count;
574 ++team->task_queued_count;
575 gomp_team_barrier_set_task_pending (&team->barrier);
576 do_wake = team->task_running_count + !parent->in_tied_task
577 < team->nthreads;
578 gomp_mutex_unlock (&team->task_lock);
579 if (do_wake)
580 gomp_team_barrier_wake (&team->barrier, 1);
581 }
582 }
583
584 ialias (GOMP_taskgroup_start)
ialias(GOMP_taskgroup_end)585 ialias (GOMP_taskgroup_end)
586 ialias (GOMP_taskgroup_reduction_register)
587
588 #define TYPE long
589 #define UTYPE unsigned long
590 #define TYPE_is_long 1
591 #include "taskloop.c"
592 #undef TYPE
593 #undef UTYPE
594 #undef TYPE_is_long
595
596 #define TYPE unsigned long long
597 #define UTYPE TYPE
598 #define GOMP_taskloop GOMP_taskloop_ull
599 #include "taskloop.c"
600 #undef TYPE
601 #undef UTYPE
602 #undef GOMP_taskloop
603
604 static void inline
605 priority_queue_move_task_first (enum priority_queue_type type,
606 struct priority_queue *head,
607 struct gomp_task *task)
608 {
609 #if _LIBGOMP_CHECKING_
610 if (!priority_queue_task_in_queue_p (type, head, task))
611 gomp_fatal ("Attempt to move first missing task %p", task);
612 #endif
613 struct priority_list *list;
614 if (priority_queue_multi_p (head))
615 {
616 list = priority_queue_lookup_priority (head, task->priority);
617 #if _LIBGOMP_CHECKING_
618 if (!list)
619 gomp_fatal ("Unable to find priority %d", task->priority);
620 #endif
621 }
622 else
623 list = &head->l;
624 priority_list_remove (list, task_to_priority_node (type, task), 0);
625 priority_list_insert (type, list, task, task->priority,
626 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
627 task->parent_depends_on);
628 }
629
630 /* Actual body of GOMP_PLUGIN_target_task_completion that is executed
631 with team->task_lock held, or is executed in the thread that called
632 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
633 run before it acquires team->task_lock. */
634
635 static void
gomp_target_task_completion(struct gomp_team * team,struct gomp_task * task)636 gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
637 {
638 struct gomp_task *parent = task->parent;
639 if (parent)
640 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
641 task);
642
643 struct gomp_taskgroup *taskgroup = task->taskgroup;
644 if (taskgroup)
645 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
646 task);
647
648 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
649 PRIORITY_INSERT_BEGIN, false,
650 task->parent_depends_on);
651 task->kind = GOMP_TASK_WAITING;
652 if (parent && parent->taskwait)
653 {
654 if (parent->taskwait->in_taskwait)
655 {
656 /* One more task has had its dependencies met.
657 Inform any waiters. */
658 parent->taskwait->in_taskwait = false;
659 gomp_sem_post (&parent->taskwait->taskwait_sem);
660 }
661 else if (parent->taskwait->in_depend_wait)
662 {
663 /* One more task has had its dependencies met.
664 Inform any waiters. */
665 parent->taskwait->in_depend_wait = false;
666 gomp_sem_post (&parent->taskwait->taskwait_sem);
667 }
668 }
669 if (taskgroup && taskgroup->in_taskgroup_wait)
670 {
671 /* One more task has had its dependencies met.
672 Inform any waiters. */
673 taskgroup->in_taskgroup_wait = false;
674 gomp_sem_post (&taskgroup->taskgroup_sem);
675 }
676
677 ++team->task_queued_count;
678 gomp_team_barrier_set_task_pending (&team->barrier);
679 /* I'm afraid this can't be done after releasing team->task_lock,
680 as gomp_target_task_completion is run from unrelated thread and
681 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
682 the team could be gone already. */
683 if (team->nthreads > team->task_running_count)
684 gomp_team_barrier_wake (&team->barrier, 1);
685 }
686
687 /* Signal that a target task TTASK has completed the asynchronously
688 running phase and should be requeued as a task to handle the
689 variable unmapping. */
690
691 void
GOMP_PLUGIN_target_task_completion(void * data)692 GOMP_PLUGIN_target_task_completion (void *data)
693 {
694 struct gomp_target_task *ttask = (struct gomp_target_task *) data;
695 struct gomp_task *task = ttask->task;
696 struct gomp_team *team = ttask->team;
697
698 gomp_mutex_lock (&team->task_lock);
699 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
700 {
701 ttask->state = GOMP_TARGET_TASK_FINISHED;
702 gomp_mutex_unlock (&team->task_lock);
703 return;
704 }
705 ttask->state = GOMP_TARGET_TASK_FINISHED;
706 gomp_target_task_completion (team, task);
707 gomp_mutex_unlock (&team->task_lock);
708 }
709
710 static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
711
712 /* Called for nowait target tasks. */
713
714 bool
gomp_create_target_task(struct gomp_device_descr * devicep,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args,enum gomp_target_task_state state)715 gomp_create_target_task (struct gomp_device_descr *devicep,
716 void (*fn) (void *), size_t mapnum, void **hostaddrs,
717 size_t *sizes, unsigned short *kinds,
718 unsigned int flags, void **depend, void **args,
719 enum gomp_target_task_state state)
720 {
721 struct gomp_thread *thr = gomp_thread ();
722 struct gomp_team *team = thr->ts.team;
723
724 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
725 if (__builtin_expect (gomp_cancel_var, 0) && team)
726 {
727 if (gomp_team_barrier_cancelled (&team->barrier))
728 return true;
729 if (thr->task->taskgroup)
730 {
731 if (thr->task->taskgroup->cancelled)
732 return true;
733 if (thr->task->taskgroup->workshare
734 && thr->task->taskgroup->prev
735 && thr->task->taskgroup->prev->cancelled)
736 return true;
737 }
738 }
739
740 struct gomp_target_task *ttask;
741 struct gomp_task *task;
742 struct gomp_task *parent = thr->task;
743 struct gomp_taskgroup *taskgroup = parent->taskgroup;
744 bool do_wake;
745 size_t depend_size = 0;
746 uintptr_t depend_cnt = 0;
747 size_t tgt_align = 0, tgt_size = 0;
748 uintptr_t args_cnt = 0;
749
750 if (depend != NULL)
751 {
752 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]);
753 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
754 }
755 if (fn)
756 {
757 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
758 firstprivate on the target task. */
759 size_t i;
760 for (i = 0; i < mapnum; i++)
761 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
762 {
763 size_t align = (size_t) 1 << (kinds[i] >> 8);
764 if (tgt_align < align)
765 tgt_align = align;
766 tgt_size = (tgt_size + align - 1) & ~(align - 1);
767 tgt_size += sizes[i];
768 }
769 if (tgt_align)
770 tgt_size += tgt_align - 1;
771 else
772 tgt_size = 0;
773 if (args)
774 {
775 void **cargs = args;
776 while (*cargs)
777 {
778 intptr_t id = (intptr_t) *cargs++;
779 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
780 cargs++;
781 }
782 args_cnt = cargs + 1 - args;
783 }
784 }
785
786 task = gomp_malloc (sizeof (*task) + depend_size
787 + sizeof (*ttask)
788 + args_cnt * sizeof (void *)
789 + mapnum * (sizeof (void *) + sizeof (size_t)
790 + sizeof (unsigned short))
791 + tgt_size);
792 gomp_init_task (task, parent, gomp_icv (false));
793 task->priority = 0;
794 task->kind = GOMP_TASK_WAITING;
795 task->in_tied_task = parent->in_tied_task;
796 task->taskgroup = taskgroup;
797 ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
798 ttask->devicep = devicep;
799 ttask->fn = fn;
800 ttask->mapnum = mapnum;
801 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
802 if (args_cnt)
803 {
804 ttask->args = (void **) &ttask->hostaddrs[mapnum];
805 memcpy (ttask->args, args, args_cnt * sizeof (void *));
806 ttask->sizes = (size_t *) &ttask->args[args_cnt];
807 }
808 else
809 {
810 ttask->args = args;
811 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
812 }
813 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
814 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
815 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
816 if (tgt_align)
817 {
818 char *tgt = (char *) &ttask->kinds[mapnum];
819 size_t i;
820 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
821 if (al)
822 tgt += tgt_align - al;
823 tgt_size = 0;
824 for (i = 0; i < mapnum; i++)
825 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
826 {
827 size_t align = (size_t) 1 << (kinds[i] >> 8);
828 tgt_size = (tgt_size + align - 1) & ~(align - 1);
829 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
830 ttask->hostaddrs[i] = tgt + tgt_size;
831 tgt_size = tgt_size + sizes[i];
832 }
833 }
834 ttask->flags = flags;
835 ttask->state = state;
836 ttask->task = task;
837 ttask->team = team;
838 task->fn = NULL;
839 task->fn_data = ttask;
840 task->final_task = 0;
841 gomp_mutex_lock (&team->task_lock);
842 /* If parallel or taskgroup has been cancelled, don't start new tasks. */
843 if (__builtin_expect (gomp_cancel_var, 0))
844 {
845 if (gomp_team_barrier_cancelled (&team->barrier))
846 {
847 do_cancel:
848 gomp_mutex_unlock (&team->task_lock);
849 gomp_finish_task (task);
850 free (task);
851 return true;
852 }
853 if (taskgroup)
854 {
855 if (taskgroup->cancelled)
856 goto do_cancel;
857 if (taskgroup->workshare
858 && taskgroup->prev
859 && taskgroup->prev->cancelled)
860 goto do_cancel;
861 }
862 }
863 if (depend_size)
864 {
865 gomp_task_handle_depend (task, parent, depend);
866 if (task->num_dependees)
867 {
868 if (taskgroup)
869 taskgroup->num_children++;
870 gomp_mutex_unlock (&team->task_lock);
871 return true;
872 }
873 }
874 if (state == GOMP_TARGET_TASK_DATA)
875 {
876 gomp_task_run_post_handle_depend_hash (task);
877 gomp_mutex_unlock (&team->task_lock);
878 gomp_finish_task (task);
879 free (task);
880 return false;
881 }
882 if (taskgroup)
883 taskgroup->num_children++;
884 /* For async offloading, if we don't need to wait for dependencies,
885 run the gomp_target_task_fn right away, essentially schedule the
886 mapping part of the task in the current thread. */
887 if (devicep != NULL
888 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
889 {
890 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
891 PRIORITY_INSERT_END,
892 /*adjust_parent_depends_on=*/false,
893 task->parent_depends_on);
894 if (taskgroup)
895 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
896 task, 0, PRIORITY_INSERT_END,
897 /*adjust_parent_depends_on=*/false,
898 task->parent_depends_on);
899 task->pnode[PQ_TEAM].next = NULL;
900 task->pnode[PQ_TEAM].prev = NULL;
901 task->kind = GOMP_TASK_TIED;
902 ++team->task_count;
903 gomp_mutex_unlock (&team->task_lock);
904
905 thr->task = task;
906 gomp_target_task_fn (task->fn_data);
907 thr->task = parent;
908
909 gomp_mutex_lock (&team->task_lock);
910 task->kind = GOMP_TASK_ASYNC_RUNNING;
911 /* If GOMP_PLUGIN_target_task_completion has run already
912 in between gomp_target_task_fn and the mutex lock,
913 perform the requeuing here. */
914 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
915 gomp_target_task_completion (team, task);
916 else
917 ttask->state = GOMP_TARGET_TASK_RUNNING;
918 gomp_mutex_unlock (&team->task_lock);
919 return true;
920 }
921 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
922 PRIORITY_INSERT_BEGIN,
923 /*adjust_parent_depends_on=*/false,
924 task->parent_depends_on);
925 if (taskgroup)
926 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
927 PRIORITY_INSERT_BEGIN,
928 /*adjust_parent_depends_on=*/false,
929 task->parent_depends_on);
930 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
931 PRIORITY_INSERT_END,
932 /*adjust_parent_depends_on=*/false,
933 task->parent_depends_on);
934 ++team->task_count;
935 ++team->task_queued_count;
936 gomp_team_barrier_set_task_pending (&team->barrier);
937 do_wake = team->task_running_count + !parent->in_tied_task
938 < team->nthreads;
939 gomp_mutex_unlock (&team->task_lock);
940 if (do_wake)
941 gomp_team_barrier_wake (&team->barrier, 1);
942 return true;
943 }
944
945 /* Given a parent_depends_on task in LIST, move it to the front of its
946 priority so it is run as soon as possible.
947
948 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
949
950 We rearrange the queue such that all parent_depends_on tasks are
951 first, and last_parent_depends_on points to the last such task we
952 rearranged. For example, given the following tasks in a queue
953 where PD[123] are the parent_depends_on tasks:
954
955 task->children
956 |
957 V
958 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
959
960 We rearrange such that:
961
962 task->children
963 | +--- last_parent_depends_on
964 | |
965 V V
966 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */
967
968 static void inline
priority_list_upgrade_task(struct priority_list * list,struct priority_node * node)969 priority_list_upgrade_task (struct priority_list *list,
970 struct priority_node *node)
971 {
972 struct priority_node *last_parent_depends_on
973 = list->last_parent_depends_on;
974 if (last_parent_depends_on)
975 {
976 node->prev->next = node->next;
977 node->next->prev = node->prev;
978 node->prev = last_parent_depends_on;
979 node->next = last_parent_depends_on->next;
980 node->prev->next = node;
981 node->next->prev = node;
982 }
983 else if (node != list->tasks)
984 {
985 node->prev->next = node->next;
986 node->next->prev = node->prev;
987 node->prev = list->tasks->prev;
988 node->next = list->tasks;
989 list->tasks = node;
990 node->prev->next = node;
991 node->next->prev = node;
992 }
993 list->last_parent_depends_on = node;
994 }
995
996 /* Given a parent_depends_on TASK in its parent's children_queue, move
997 it to the front of its priority so it is run as soon as possible.
998
999 PARENT is passed as an optimization.
1000
1001 (This function could be defined in priority_queue.c, but we want it
1002 inlined, and putting it in priority_queue.h is not an option, given
1003 that gomp_task has not been properly defined at that point). */
1004
1005 static void inline
priority_queue_upgrade_task(struct gomp_task * task,struct gomp_task * parent)1006 priority_queue_upgrade_task (struct gomp_task *task,
1007 struct gomp_task *parent)
1008 {
1009 struct priority_queue *head = &parent->children_queue;
1010 struct priority_node *node = &task->pnode[PQ_CHILDREN];
1011 #if _LIBGOMP_CHECKING_
1012 if (!task->parent_depends_on)
1013 gomp_fatal ("priority_queue_upgrade_task: task must be a "
1014 "parent_depends_on task");
1015 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
1016 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
1017 #endif
1018 if (priority_queue_multi_p (head))
1019 {
1020 struct priority_list *list
1021 = priority_queue_lookup_priority (head, task->priority);
1022 priority_list_upgrade_task (list, node);
1023 }
1024 else
1025 priority_list_upgrade_task (&head->l, node);
1026 }
1027
1028 /* Given a CHILD_TASK in LIST that is about to be executed, move it out of
1029 the way in LIST so that other tasks can be considered for
1030 execution. LIST contains tasks of type TYPE.
1031
1032 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1033 if applicable. */
1034
1035 static void inline
priority_list_downgrade_task(enum priority_queue_type type,struct priority_list * list,struct gomp_task * child_task)1036 priority_list_downgrade_task (enum priority_queue_type type,
1037 struct priority_list *list,
1038 struct gomp_task *child_task)
1039 {
1040 struct priority_node *node = task_to_priority_node (type, child_task);
1041 if (list->tasks == node)
1042 list->tasks = node->next;
1043 else if (node->next != list->tasks)
1044 {
1045 /* The task in NODE is about to become TIED and TIED tasks
1046 cannot come before WAITING tasks. If we're about to
1047 leave the queue in such an indeterminate state, rewire
1048 things appropriately. However, a TIED task at the end is
1049 perfectly fine. */
1050 struct gomp_task *next_task = priority_node_to_task (type, node->next);
1051 if (next_task->kind == GOMP_TASK_WAITING)
1052 {
1053 /* Remove from list. */
1054 node->prev->next = node->next;
1055 node->next->prev = node->prev;
1056 /* Rewire at the end. */
1057 node->next = list->tasks;
1058 node->prev = list->tasks->prev;
1059 list->tasks->prev->next = node;
1060 list->tasks->prev = node;
1061 }
1062 }
1063
1064 /* If the current task is the last_parent_depends_on for its
1065 priority, adjust last_parent_depends_on appropriately. */
1066 if (__builtin_expect (child_task->parent_depends_on, 0)
1067 && list->last_parent_depends_on == node)
1068 {
1069 struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
1070 if (node->prev != node
1071 && prev_child->kind == GOMP_TASK_WAITING
1072 && prev_child->parent_depends_on)
1073 list->last_parent_depends_on = node->prev;
1074 else
1075 {
1076 /* There are no more parent_depends_on entries waiting
1077 to run, clear the list. */
1078 list->last_parent_depends_on = NULL;
1079 }
1080 }
1081 }
1082
1083 /* Given a TASK in HEAD that is about to be executed, move it out of
1084 the way so that other tasks can be considered for execution. HEAD
1085 contains tasks of type TYPE.
1086
1087 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
1088 if applicable.
1089
1090 (This function could be defined in priority_queue.c, but we want it
1091 inlined, and putting it in priority_queue.h is not an option, given
1092 that gomp_task has not been properly defined at that point). */
1093
1094 static void inline
priority_queue_downgrade_task(enum priority_queue_type type,struct priority_queue * head,struct gomp_task * task)1095 priority_queue_downgrade_task (enum priority_queue_type type,
1096 struct priority_queue *head,
1097 struct gomp_task *task)
1098 {
1099 #if _LIBGOMP_CHECKING_
1100 if (!priority_queue_task_in_queue_p (type, head, task))
1101 gomp_fatal ("Attempt to downgrade missing task %p", task);
1102 #endif
1103 if (priority_queue_multi_p (head))
1104 {
1105 struct priority_list *list
1106 = priority_queue_lookup_priority (head, task->priority);
1107 priority_list_downgrade_task (type, list, task);
1108 }
1109 else
1110 priority_list_downgrade_task (type, &head->l, task);
1111 }
1112
1113 /* Setup CHILD_TASK to execute. This is done by setting the task to
1114 TIED, and updating all relevant queues so that CHILD_TASK is no
1115 longer chosen for scheduling. Also, remove CHILD_TASK from the
1116 overall team task queue entirely.
1117
1118 Return TRUE if task or its containing taskgroup has been
1119 cancelled. */
1120
1121 static inline bool
gomp_task_run_pre(struct gomp_task * child_task,struct gomp_task * parent,struct gomp_team * team)1122 gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
1123 struct gomp_team *team)
1124 {
1125 #if _LIBGOMP_CHECKING_
1126 if (child_task->parent)
1127 priority_queue_verify (PQ_CHILDREN,
1128 &child_task->parent->children_queue, true);
1129 if (child_task->taskgroup)
1130 priority_queue_verify (PQ_TASKGROUP,
1131 &child_task->taskgroup->taskgroup_queue, false);
1132 priority_queue_verify (PQ_TEAM, &team->task_queue, false);
1133 #endif
1134
1135 /* Task is about to go tied, move it out of the way. */
1136 if (parent)
1137 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
1138 child_task);
1139
1140 /* Task is about to go tied, move it out of the way. */
1141 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1142 if (taskgroup)
1143 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1144 child_task);
1145
1146 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
1147 MEMMODEL_RELAXED);
1148 child_task->pnode[PQ_TEAM].next = NULL;
1149 child_task->pnode[PQ_TEAM].prev = NULL;
1150 child_task->kind = GOMP_TASK_TIED;
1151
1152 if (--team->task_queued_count == 0)
1153 gomp_team_barrier_clear_task_pending (&team->barrier);
1154 if (__builtin_expect (gomp_cancel_var, 0)
1155 && !child_task->copy_ctors_done)
1156 {
1157 if (gomp_team_barrier_cancelled (&team->barrier))
1158 return true;
1159 if (taskgroup)
1160 {
1161 if (taskgroup->cancelled)
1162 return true;
1163 if (taskgroup->workshare
1164 && taskgroup->prev
1165 && taskgroup->prev->cancelled)
1166 return true;
1167 }
1168 }
1169 return false;
1170 }
1171
1172 static void
gomp_task_run_post_handle_depend_hash(struct gomp_task * child_task)1173 gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
1174 {
1175 struct gomp_task *parent = child_task->parent;
1176 size_t i;
1177
1178 for (i = 0; i < child_task->depend_count; i++)
1179 if (!child_task->depend[i].redundant)
1180 {
1181 if (child_task->depend[i].next)
1182 child_task->depend[i].next->prev = child_task->depend[i].prev;
1183 if (child_task->depend[i].prev)
1184 child_task->depend[i].prev->next = child_task->depend[i].next;
1185 else
1186 {
1187 hash_entry_type *slot
1188 = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1189 NO_INSERT);
1190 if (*slot != &child_task->depend[i])
1191 abort ();
1192 if (child_task->depend[i].next)
1193 *slot = child_task->depend[i].next;
1194 else
1195 htab_clear_slot (parent->depend_hash, slot);
1196 }
1197 }
1198 }
1199
1200 /* After a CHILD_TASK has been run, adjust the dependency queue for
1201 each task that depends on CHILD_TASK, to record the fact that there
1202 is one less dependency to worry about. If a task that depended on
1203 CHILD_TASK now has no dependencies, place it in the various queues
1204 so it gets scheduled to run.
1205
1206 TEAM is the team to which CHILD_TASK belongs to. */
1207
1208 static size_t
gomp_task_run_post_handle_dependers(struct gomp_task * child_task,struct gomp_team * team)1209 gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1210 struct gomp_team *team)
1211 {
1212 struct gomp_task *parent = child_task->parent;
1213 size_t i, count = child_task->dependers->n_elem, ret = 0;
1214 for (i = 0; i < count; i++)
1215 {
1216 struct gomp_task *task = child_task->dependers->elem[i];
1217
1218 /* CHILD_TASK satisfies a dependency for TASK. Keep track of
1219 TASK's remaining dependencies. Once TASK has no other
1220 dependencies, put it into the various queues so it will get
1221 scheduled for execution. */
1222 if (--task->num_dependees != 0)
1223 continue;
1224
1225 struct gomp_taskgroup *taskgroup = task->taskgroup;
1226 if (parent)
1227 {
1228 priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1229 task, task->priority,
1230 PRIORITY_INSERT_BEGIN,
1231 /*adjust_parent_depends_on=*/true,
1232 task->parent_depends_on);
1233 if (parent->taskwait)
1234 {
1235 if (parent->taskwait->in_taskwait)
1236 {
1237 /* One more task has had its dependencies met.
1238 Inform any waiters. */
1239 parent->taskwait->in_taskwait = false;
1240 gomp_sem_post (&parent->taskwait->taskwait_sem);
1241 }
1242 else if (parent->taskwait->in_depend_wait)
1243 {
1244 /* One more task has had its dependencies met.
1245 Inform any waiters. */
1246 parent->taskwait->in_depend_wait = false;
1247 gomp_sem_post (&parent->taskwait->taskwait_sem);
1248 }
1249 }
1250 }
1251 else
1252 task->parent = NULL;
1253 if (taskgroup)
1254 {
1255 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1256 task, task->priority,
1257 PRIORITY_INSERT_BEGIN,
1258 /*adjust_parent_depends_on=*/false,
1259 task->parent_depends_on);
1260 if (taskgroup->in_taskgroup_wait)
1261 {
1262 /* One more task has had its dependencies met.
1263 Inform any waiters. */
1264 taskgroup->in_taskgroup_wait = false;
1265 gomp_sem_post (&taskgroup->taskgroup_sem);
1266 }
1267 }
1268 priority_queue_insert (PQ_TEAM, &team->task_queue,
1269 task, task->priority,
1270 PRIORITY_INSERT_END,
1271 /*adjust_parent_depends_on=*/false,
1272 task->parent_depends_on);
1273 ++team->task_count;
1274 ++team->task_queued_count;
1275 ++ret;
1276 }
1277 free (child_task->dependers);
1278 child_task->dependers = NULL;
1279 if (ret > 1)
1280 gomp_team_barrier_set_task_pending (&team->barrier);
1281 return ret;
1282 }
1283
1284 static inline size_t
gomp_task_run_post_handle_depend(struct gomp_task * child_task,struct gomp_team * team)1285 gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1286 struct gomp_team *team)
1287 {
1288 if (child_task->depend_count == 0)
1289 return 0;
1290
1291 /* If parent is gone already, the hash table is freed and nothing
1292 will use the hash table anymore, no need to remove anything from it. */
1293 if (child_task->parent != NULL)
1294 gomp_task_run_post_handle_depend_hash (child_task);
1295
1296 if (child_task->dependers == NULL)
1297 return 0;
1298
1299 return gomp_task_run_post_handle_dependers (child_task, team);
1300 }
1301
1302 /* Remove CHILD_TASK from its parent. */
1303
1304 static inline void
gomp_task_run_post_remove_parent(struct gomp_task * child_task)1305 gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1306 {
1307 struct gomp_task *parent = child_task->parent;
1308 if (parent == NULL)
1309 return;
1310
1311 /* If this was the last task the parent was depending on,
1312 synchronize with gomp_task_maybe_wait_for_dependencies so it can
1313 clean up and return. */
1314 if (__builtin_expect (child_task->parent_depends_on, 0)
1315 && --parent->taskwait->n_depend == 0
1316 && parent->taskwait->in_depend_wait)
1317 {
1318 parent->taskwait->in_depend_wait = false;
1319 gomp_sem_post (&parent->taskwait->taskwait_sem);
1320 }
1321
1322 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1323 child_task, MEMMODEL_RELEASE)
1324 && parent->taskwait && parent->taskwait->in_taskwait)
1325 {
1326 parent->taskwait->in_taskwait = false;
1327 gomp_sem_post (&parent->taskwait->taskwait_sem);
1328 }
1329 child_task->pnode[PQ_CHILDREN].next = NULL;
1330 child_task->pnode[PQ_CHILDREN].prev = NULL;
1331 }
1332
1333 /* Remove CHILD_TASK from its taskgroup. */
1334
1335 static inline void
gomp_task_run_post_remove_taskgroup(struct gomp_task * child_task)1336 gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1337 {
1338 struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1339 if (taskgroup == NULL)
1340 return;
1341 bool empty = priority_queue_remove (PQ_TASKGROUP,
1342 &taskgroup->taskgroup_queue,
1343 child_task, MEMMODEL_RELAXED);
1344 child_task->pnode[PQ_TASKGROUP].next = NULL;
1345 child_task->pnode[PQ_TASKGROUP].prev = NULL;
1346 if (taskgroup->num_children > 1)
1347 --taskgroup->num_children;
1348 else
1349 {
1350 /* We access taskgroup->num_children in GOMP_taskgroup_end
1351 outside of the task lock mutex region, so
1352 need a release barrier here to ensure memory
1353 written by child_task->fn above is flushed
1354 before the NULL is written. */
1355 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1356 }
1357 if (empty && taskgroup->in_taskgroup_wait)
1358 {
1359 taskgroup->in_taskgroup_wait = false;
1360 gomp_sem_post (&taskgroup->taskgroup_sem);
1361 }
1362 }
1363
1364 void
gomp_barrier_handle_tasks(gomp_barrier_state_t state)1365 gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1366 {
1367 struct gomp_thread *thr = gomp_thread ();
1368 struct gomp_team *team = thr->ts.team;
1369 struct gomp_task *task = thr->task;
1370 struct gomp_task *child_task = NULL;
1371 struct gomp_task *to_free = NULL;
1372 int do_wake = 0;
1373
1374 gomp_mutex_lock (&team->task_lock);
1375 if (gomp_barrier_last_thread (state))
1376 {
1377 if (team->task_count == 0)
1378 {
1379 gomp_team_barrier_done (&team->barrier, state);
1380 gomp_mutex_unlock (&team->task_lock);
1381 gomp_team_barrier_wake (&team->barrier, 0);
1382 return;
1383 }
1384 gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1385 }
1386
1387 while (1)
1388 {
1389 bool cancelled = false;
1390
1391 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1392 {
1393 bool ignored;
1394 child_task
1395 = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1396 PQ_IGNORED, NULL,
1397 &ignored);
1398 cancelled = gomp_task_run_pre (child_task, child_task->parent,
1399 team);
1400 if (__builtin_expect (cancelled, 0))
1401 {
1402 if (to_free)
1403 {
1404 gomp_finish_task (to_free);
1405 free (to_free);
1406 to_free = NULL;
1407 }
1408 goto finish_cancelled;
1409 }
1410 team->task_running_count++;
1411 child_task->in_tied_task = true;
1412 }
1413 else if (team->task_count == 0
1414 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1415 {
1416 gomp_team_barrier_done (&team->barrier, state);
1417 gomp_mutex_unlock (&team->task_lock);
1418 gomp_team_barrier_wake (&team->barrier, 0);
1419 if (to_free)
1420 {
1421 gomp_finish_task (to_free);
1422 free (to_free);
1423 }
1424 return;
1425 }
1426 gomp_mutex_unlock (&team->task_lock);
1427 if (do_wake)
1428 {
1429 gomp_team_barrier_wake (&team->barrier, do_wake);
1430 do_wake = 0;
1431 }
1432 if (to_free)
1433 {
1434 gomp_finish_task (to_free);
1435 free (to_free);
1436 to_free = NULL;
1437 }
1438 if (child_task)
1439 {
1440 thr->task = child_task;
1441 if (__builtin_expect (child_task->fn == NULL, 0))
1442 {
1443 if (gomp_target_task_fn (child_task->fn_data))
1444 {
1445 thr->task = task;
1446 gomp_mutex_lock (&team->task_lock);
1447 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1448 team->task_running_count--;
1449 struct gomp_target_task *ttask
1450 = (struct gomp_target_task *) child_task->fn_data;
1451 /* If GOMP_PLUGIN_target_task_completion has run already
1452 in between gomp_target_task_fn and the mutex lock,
1453 perform the requeuing here. */
1454 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1455 gomp_target_task_completion (team, child_task);
1456 else
1457 ttask->state = GOMP_TARGET_TASK_RUNNING;
1458 child_task = NULL;
1459 continue;
1460 }
1461 }
1462 else
1463 child_task->fn (child_task->fn_data);
1464 thr->task = task;
1465 }
1466 else
1467 return;
1468 gomp_mutex_lock (&team->task_lock);
1469 if (child_task)
1470 {
1471 if (child_task->detach_team)
1472 {
1473 assert (child_task->detach_team == team);
1474 child_task->kind = GOMP_TASK_DETACHED;
1475 ++team->task_detach_count;
1476 --team->task_running_count;
1477 gomp_debug (0,
1478 "thread %d: task with event %p finished without "
1479 "completion event fulfilled in team barrier\n",
1480 thr->ts.team_id, child_task);
1481 child_task = NULL;
1482 continue;
1483 }
1484
1485 finish_cancelled:;
1486 size_t new_tasks
1487 = gomp_task_run_post_handle_depend (child_task, team);
1488 gomp_task_run_post_remove_parent (child_task);
1489 gomp_clear_parent (&child_task->children_queue);
1490 gomp_task_run_post_remove_taskgroup (child_task);
1491 to_free = child_task;
1492 if (!cancelled)
1493 team->task_running_count--;
1494 child_task = NULL;
1495 if (new_tasks > 1)
1496 {
1497 do_wake = team->nthreads - team->task_running_count;
1498 if (do_wake > new_tasks)
1499 do_wake = new_tasks;
1500 }
1501 --team->task_count;
1502 }
1503 }
1504 }
1505
1506 /* Called when encountering a taskwait directive.
1507
1508 Wait for all children of the current task. */
1509
1510 void
GOMP_taskwait(void)1511 GOMP_taskwait (void)
1512 {
1513 struct gomp_thread *thr = gomp_thread ();
1514 struct gomp_team *team = thr->ts.team;
1515 struct gomp_task *task = thr->task;
1516 struct gomp_task *child_task = NULL;
1517 struct gomp_task *to_free = NULL;
1518 struct gomp_taskwait taskwait;
1519 int do_wake = 0;
1520
1521 /* The acquire barrier on load of task->children here synchronizes
1522 with the write of a NULL in gomp_task_run_post_remove_parent. It is
1523 not necessary that we synchronize with other non-NULL writes at
1524 this point, but we must ensure that all writes to memory by a
1525 child thread task work function are seen before we exit from
1526 GOMP_taskwait. */
1527 if (task == NULL
1528 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1529 return;
1530
1531 memset (&taskwait, 0, sizeof (taskwait));
1532 bool child_q = false;
1533 gomp_mutex_lock (&team->task_lock);
1534 while (1)
1535 {
1536 bool cancelled = false;
1537 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1538 {
1539 bool destroy_taskwait = task->taskwait != NULL;
1540 task->taskwait = NULL;
1541 gomp_mutex_unlock (&team->task_lock);
1542 if (to_free)
1543 {
1544 gomp_finish_task (to_free);
1545 free (to_free);
1546 }
1547 if (destroy_taskwait)
1548 gomp_sem_destroy (&taskwait.taskwait_sem);
1549 return;
1550 }
1551 struct gomp_task *next_task
1552 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1553 PQ_TEAM, &team->task_queue, &child_q);
1554 if (next_task->kind == GOMP_TASK_WAITING)
1555 {
1556 child_task = next_task;
1557 cancelled
1558 = gomp_task_run_pre (child_task, task, team);
1559 if (__builtin_expect (cancelled, 0))
1560 {
1561 if (to_free)
1562 {
1563 gomp_finish_task (to_free);
1564 free (to_free);
1565 to_free = NULL;
1566 }
1567 goto finish_cancelled;
1568 }
1569 }
1570 else
1571 {
1572 /* All tasks we are waiting for are either running in other
1573 threads, are detached and waiting for the completion event to be
1574 fulfilled, or they are tasks that have not had their
1575 dependencies met (so they're not even in the queue). Wait
1576 for them. */
1577 if (task->taskwait == NULL)
1578 {
1579 taskwait.in_depend_wait = false;
1580 gomp_sem_init (&taskwait.taskwait_sem, 0);
1581 task->taskwait = &taskwait;
1582 }
1583 taskwait.in_taskwait = true;
1584 }
1585 gomp_mutex_unlock (&team->task_lock);
1586 if (do_wake)
1587 {
1588 gomp_team_barrier_wake (&team->barrier, do_wake);
1589 do_wake = 0;
1590 }
1591 if (to_free)
1592 {
1593 gomp_finish_task (to_free);
1594 free (to_free);
1595 to_free = NULL;
1596 }
1597 if (child_task)
1598 {
1599 thr->task = child_task;
1600 if (__builtin_expect (child_task->fn == NULL, 0))
1601 {
1602 if (gomp_target_task_fn (child_task->fn_data))
1603 {
1604 thr->task = task;
1605 gomp_mutex_lock (&team->task_lock);
1606 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1607 struct gomp_target_task *ttask
1608 = (struct gomp_target_task *) child_task->fn_data;
1609 /* If GOMP_PLUGIN_target_task_completion has run already
1610 in between gomp_target_task_fn and the mutex lock,
1611 perform the requeuing here. */
1612 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1613 gomp_target_task_completion (team, child_task);
1614 else
1615 ttask->state = GOMP_TARGET_TASK_RUNNING;
1616 child_task = NULL;
1617 continue;
1618 }
1619 }
1620 else
1621 child_task->fn (child_task->fn_data);
1622 thr->task = task;
1623 }
1624 else
1625 gomp_sem_wait (&taskwait.taskwait_sem);
1626 gomp_mutex_lock (&team->task_lock);
1627 if (child_task)
1628 {
1629 if (child_task->detach_team)
1630 {
1631 assert (child_task->detach_team == team);
1632 child_task->kind = GOMP_TASK_DETACHED;
1633 ++team->task_detach_count;
1634 gomp_debug (0,
1635 "thread %d: task with event %p finished without "
1636 "completion event fulfilled in taskwait\n",
1637 thr->ts.team_id, child_task);
1638 child_task = NULL;
1639 continue;
1640 }
1641
1642 finish_cancelled:;
1643 size_t new_tasks
1644 = gomp_task_run_post_handle_depend (child_task, team);
1645
1646 if (child_q)
1647 {
1648 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1649 child_task, MEMMODEL_RELAXED);
1650 child_task->pnode[PQ_CHILDREN].next = NULL;
1651 child_task->pnode[PQ_CHILDREN].prev = NULL;
1652 }
1653
1654 gomp_clear_parent (&child_task->children_queue);
1655
1656 gomp_task_run_post_remove_taskgroup (child_task);
1657
1658 to_free = child_task;
1659 child_task = NULL;
1660 team->task_count--;
1661 if (new_tasks > 1)
1662 {
1663 do_wake = team->nthreads - team->task_running_count
1664 - !task->in_tied_task;
1665 if (do_wake > new_tasks)
1666 do_wake = new_tasks;
1667 }
1668 }
1669 }
1670 }
1671
1672 /* Called when encountering a taskwait directive with depend clause(s).
1673 Wait as if it was an mergeable included task construct with empty body. */
1674
1675 void
GOMP_taskwait_depend(void ** depend)1676 GOMP_taskwait_depend (void **depend)
1677 {
1678 struct gomp_thread *thr = gomp_thread ();
1679 struct gomp_team *team = thr->ts.team;
1680
1681 /* If parallel or taskgroup has been cancelled, return early. */
1682 if (__builtin_expect (gomp_cancel_var, 0) && team)
1683 {
1684 if (gomp_team_barrier_cancelled (&team->barrier))
1685 return;
1686 if (thr->task->taskgroup)
1687 {
1688 if (thr->task->taskgroup->cancelled)
1689 return;
1690 if (thr->task->taskgroup->workshare
1691 && thr->task->taskgroup->prev
1692 && thr->task->taskgroup->prev->cancelled)
1693 return;
1694 }
1695 }
1696
1697 if (thr->task && thr->task->depend_hash)
1698 gomp_task_maybe_wait_for_dependencies (depend);
1699 }
1700
1701 /* An undeferred task is about to run. Wait for all tasks that this
1702 undeferred task depends on.
1703
1704 This is done by first putting all known ready dependencies
1705 (dependencies that have their own dependencies met) at the top of
1706 the scheduling queues. Then we iterate through these imminently
1707 ready tasks (and possibly other high priority tasks), and run them.
1708 If we run out of ready dependencies to execute, we either wait for
1709 the remaining dependencies to finish, or wait for them to get
1710 scheduled so we can run them.
1711
1712 DEPEND is as in GOMP_task. */
1713
1714 void
gomp_task_maybe_wait_for_dependencies(void ** depend)1715 gomp_task_maybe_wait_for_dependencies (void **depend)
1716 {
1717 struct gomp_thread *thr = gomp_thread ();
1718 struct gomp_task *task = thr->task;
1719 struct gomp_team *team = thr->ts.team;
1720 struct gomp_task_depend_entry elem, *ent = NULL;
1721 struct gomp_taskwait taskwait;
1722 size_t orig_ndepend = (uintptr_t) depend[0];
1723 size_t nout = (uintptr_t) depend[1];
1724 size_t ndepend = orig_ndepend;
1725 size_t normal = ndepend;
1726 size_t n = 2;
1727 size_t i;
1728 size_t num_awaited = 0;
1729 struct gomp_task *child_task = NULL;
1730 struct gomp_task *to_free = NULL;
1731 int do_wake = 0;
1732
1733 if (ndepend == 0)
1734 {
1735 ndepend = nout;
1736 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3];
1737 normal = nout + (uintptr_t) depend[4];
1738 n = 5;
1739 }
1740 gomp_mutex_lock (&team->task_lock);
1741 for (i = 0; i < ndepend; i++)
1742 {
1743 elem.addr = depend[i + n];
1744 elem.is_in = i >= nout;
1745 if (__builtin_expect (i >= normal, 0))
1746 {
1747 void **d = (void **) elem.addr;
1748 switch ((uintptr_t) d[1])
1749 {
1750 case GOMP_DEPEND_IN:
1751 break;
1752 case GOMP_DEPEND_OUT:
1753 case GOMP_DEPEND_INOUT:
1754 case GOMP_DEPEND_MUTEXINOUTSET:
1755 elem.is_in = 0;
1756 break;
1757 default:
1758 gomp_fatal ("unknown omp_depend_t dependence type %d",
1759 (int) (uintptr_t) d[1]);
1760 }
1761 elem.addr = d[0];
1762 }
1763 ent = htab_find (task->depend_hash, &elem);
1764 for (; ent; ent = ent->next)
1765 if (elem.is_in && ent->is_in)
1766 continue;
1767 else
1768 {
1769 struct gomp_task *tsk = ent->task;
1770 if (!tsk->parent_depends_on)
1771 {
1772 tsk->parent_depends_on = true;
1773 ++num_awaited;
1774 /* If dependency TSK itself has no dependencies and is
1775 ready to run, move it up front so that we run it as
1776 soon as possible. */
1777 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1778 priority_queue_upgrade_task (tsk, task);
1779 }
1780 }
1781 }
1782 if (num_awaited == 0)
1783 {
1784 gomp_mutex_unlock (&team->task_lock);
1785 return;
1786 }
1787
1788 memset (&taskwait, 0, sizeof (taskwait));
1789 taskwait.n_depend = num_awaited;
1790 gomp_sem_init (&taskwait.taskwait_sem, 0);
1791 task->taskwait = &taskwait;
1792
1793 while (1)
1794 {
1795 bool cancelled = false;
1796 if (taskwait.n_depend == 0)
1797 {
1798 task->taskwait = NULL;
1799 gomp_mutex_unlock (&team->task_lock);
1800 if (to_free)
1801 {
1802 gomp_finish_task (to_free);
1803 free (to_free);
1804 }
1805 gomp_sem_destroy (&taskwait.taskwait_sem);
1806 return;
1807 }
1808
1809 /* Theoretically when we have multiple priorities, we should
1810 chose between the highest priority item in
1811 task->children_queue and team->task_queue here, so we should
1812 use priority_queue_next_task(). However, since we are
1813 running an undeferred task, perhaps that makes all tasks it
1814 depends on undeferred, thus a priority of INF? This would
1815 make it unnecessary to take anything into account here,
1816 but the dependencies.
1817
1818 On the other hand, if we want to use priority_queue_next_task(),
1819 care should be taken to only use priority_queue_remove()
1820 below if the task was actually removed from the children
1821 queue. */
1822 bool ignored;
1823 struct gomp_task *next_task
1824 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1825 PQ_IGNORED, NULL, &ignored);
1826
1827 if (next_task->kind == GOMP_TASK_WAITING)
1828 {
1829 child_task = next_task;
1830 cancelled
1831 = gomp_task_run_pre (child_task, task, team);
1832 if (__builtin_expect (cancelled, 0))
1833 {
1834 if (to_free)
1835 {
1836 gomp_finish_task (to_free);
1837 free (to_free);
1838 to_free = NULL;
1839 }
1840 goto finish_cancelled;
1841 }
1842 }
1843 else
1844 /* All tasks we are waiting for are either running in other
1845 threads, or they are tasks that have not had their
1846 dependencies met (so they're not even in the queue). Wait
1847 for them. */
1848 taskwait.in_depend_wait = true;
1849 gomp_mutex_unlock (&team->task_lock);
1850 if (do_wake)
1851 {
1852 gomp_team_barrier_wake (&team->barrier, do_wake);
1853 do_wake = 0;
1854 }
1855 if (to_free)
1856 {
1857 gomp_finish_task (to_free);
1858 free (to_free);
1859 to_free = NULL;
1860 }
1861 if (child_task)
1862 {
1863 thr->task = child_task;
1864 if (__builtin_expect (child_task->fn == NULL, 0))
1865 {
1866 if (gomp_target_task_fn (child_task->fn_data))
1867 {
1868 thr->task = task;
1869 gomp_mutex_lock (&team->task_lock);
1870 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1871 struct gomp_target_task *ttask
1872 = (struct gomp_target_task *) child_task->fn_data;
1873 /* If GOMP_PLUGIN_target_task_completion has run already
1874 in between gomp_target_task_fn and the mutex lock,
1875 perform the requeuing here. */
1876 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1877 gomp_target_task_completion (team, child_task);
1878 else
1879 ttask->state = GOMP_TARGET_TASK_RUNNING;
1880 child_task = NULL;
1881 continue;
1882 }
1883 }
1884 else
1885 child_task->fn (child_task->fn_data);
1886 thr->task = task;
1887 }
1888 else
1889 gomp_sem_wait (&taskwait.taskwait_sem);
1890 gomp_mutex_lock (&team->task_lock);
1891 if (child_task)
1892 {
1893 finish_cancelled:;
1894 size_t new_tasks
1895 = gomp_task_run_post_handle_depend (child_task, team);
1896 if (child_task->parent_depends_on)
1897 --taskwait.n_depend;
1898
1899 priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1900 child_task, MEMMODEL_RELAXED);
1901 child_task->pnode[PQ_CHILDREN].next = NULL;
1902 child_task->pnode[PQ_CHILDREN].prev = NULL;
1903
1904 gomp_clear_parent (&child_task->children_queue);
1905 gomp_task_run_post_remove_taskgroup (child_task);
1906 to_free = child_task;
1907 child_task = NULL;
1908 team->task_count--;
1909 if (new_tasks > 1)
1910 {
1911 do_wake = team->nthreads - team->task_running_count
1912 - !task->in_tied_task;
1913 if (do_wake > new_tasks)
1914 do_wake = new_tasks;
1915 }
1916 }
1917 }
1918 }
1919
1920 /* Called when encountering a taskyield directive. */
1921
1922 void
GOMP_taskyield(void)1923 GOMP_taskyield (void)
1924 {
1925 /* Nothing at the moment. */
1926 }
1927
1928 static inline struct gomp_taskgroup *
gomp_taskgroup_init(struct gomp_taskgroup * prev)1929 gomp_taskgroup_init (struct gomp_taskgroup *prev)
1930 {
1931 struct gomp_taskgroup *taskgroup
1932 = gomp_malloc (sizeof (struct gomp_taskgroup));
1933 taskgroup->prev = prev;
1934 priority_queue_init (&taskgroup->taskgroup_queue);
1935 taskgroup->reductions = prev ? prev->reductions : NULL;
1936 taskgroup->in_taskgroup_wait = false;
1937 taskgroup->cancelled = false;
1938 taskgroup->workshare = false;
1939 taskgroup->num_children = 0;
1940 gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1941 return taskgroup;
1942 }
1943
1944 void
GOMP_taskgroup_start(void)1945 GOMP_taskgroup_start (void)
1946 {
1947 struct gomp_thread *thr = gomp_thread ();
1948 struct gomp_team *team = thr->ts.team;
1949 struct gomp_task *task = thr->task;
1950
1951 /* If team is NULL, all tasks are executed as
1952 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1953 taskgroup and their descendant tasks will be finished
1954 by the time GOMP_taskgroup_end is called. */
1955 if (team == NULL)
1956 return;
1957 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
1958 }
1959
1960 void
GOMP_taskgroup_end(void)1961 GOMP_taskgroup_end (void)
1962 {
1963 struct gomp_thread *thr = gomp_thread ();
1964 struct gomp_team *team = thr->ts.team;
1965 struct gomp_task *task = thr->task;
1966 struct gomp_taskgroup *taskgroup;
1967 struct gomp_task *child_task = NULL;
1968 struct gomp_task *to_free = NULL;
1969 int do_wake = 0;
1970
1971 if (team == NULL)
1972 return;
1973 taskgroup = task->taskgroup;
1974 if (__builtin_expect (taskgroup == NULL, 0)
1975 && thr->ts.level == 0)
1976 {
1977 /* This can happen if GOMP_taskgroup_start is called when
1978 thr->ts.team == NULL, but inside of the taskgroup there
1979 is #pragma omp target nowait that creates an implicit
1980 team with a single thread. In this case, we want to wait
1981 for all outstanding tasks in this team. */
1982 gomp_team_barrier_wait (&team->barrier);
1983 return;
1984 }
1985
1986 /* The acquire barrier on load of taskgroup->num_children here
1987 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1988 It is not necessary that we synchronize with other non-0 writes at
1989 this point, but we must ensure that all writes to memory by a
1990 child thread task work function are seen before we exit from
1991 GOMP_taskgroup_end. */
1992 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1993 goto finish;
1994
1995 bool unused;
1996 gomp_mutex_lock (&team->task_lock);
1997 while (1)
1998 {
1999 bool cancelled = false;
2000 if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
2001 MEMMODEL_RELAXED))
2002 {
2003 if (taskgroup->num_children)
2004 {
2005 if (priority_queue_empty_p (&task->children_queue,
2006 MEMMODEL_RELAXED))
2007 goto do_wait;
2008 child_task
2009 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
2010 PQ_TEAM, &team->task_queue,
2011 &unused);
2012 }
2013 else
2014 {
2015 gomp_mutex_unlock (&team->task_lock);
2016 if (to_free)
2017 {
2018 gomp_finish_task (to_free);
2019 free (to_free);
2020 }
2021 goto finish;
2022 }
2023 }
2024 else
2025 child_task
2026 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
2027 PQ_TEAM, &team->task_queue, &unused);
2028 if (child_task->kind == GOMP_TASK_WAITING)
2029 {
2030 cancelled
2031 = gomp_task_run_pre (child_task, child_task->parent, team);
2032 if (__builtin_expect (cancelled, 0))
2033 {
2034 if (to_free)
2035 {
2036 gomp_finish_task (to_free);
2037 free (to_free);
2038 to_free = NULL;
2039 }
2040 goto finish_cancelled;
2041 }
2042 }
2043 else
2044 {
2045 child_task = NULL;
2046 do_wait:
2047 /* All tasks we are waiting for are either running in other
2048 threads, or they are tasks that have not had their
2049 dependencies met (so they're not even in the queue). Wait
2050 for them. */
2051 taskgroup->in_taskgroup_wait = true;
2052 }
2053 gomp_mutex_unlock (&team->task_lock);
2054 if (do_wake)
2055 {
2056 gomp_team_barrier_wake (&team->barrier, do_wake);
2057 do_wake = 0;
2058 }
2059 if (to_free)
2060 {
2061 gomp_finish_task (to_free);
2062 free (to_free);
2063 to_free = NULL;
2064 }
2065 if (child_task)
2066 {
2067 thr->task = child_task;
2068 if (__builtin_expect (child_task->fn == NULL, 0))
2069 {
2070 if (gomp_target_task_fn (child_task->fn_data))
2071 {
2072 thr->task = task;
2073 gomp_mutex_lock (&team->task_lock);
2074 child_task->kind = GOMP_TASK_ASYNC_RUNNING;
2075 struct gomp_target_task *ttask
2076 = (struct gomp_target_task *) child_task->fn_data;
2077 /* If GOMP_PLUGIN_target_task_completion has run already
2078 in between gomp_target_task_fn and the mutex lock,
2079 perform the requeuing here. */
2080 if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2081 gomp_target_task_completion (team, child_task);
2082 else
2083 ttask->state = GOMP_TARGET_TASK_RUNNING;
2084 child_task = NULL;
2085 continue;
2086 }
2087 }
2088 else
2089 child_task->fn (child_task->fn_data);
2090 thr->task = task;
2091 }
2092 else
2093 gomp_sem_wait (&taskgroup->taskgroup_sem);
2094 gomp_mutex_lock (&team->task_lock);
2095 if (child_task)
2096 {
2097 if (child_task->detach_team)
2098 {
2099 assert (child_task->detach_team == team);
2100 child_task->kind = GOMP_TASK_DETACHED;
2101 ++team->task_detach_count;
2102 gomp_debug (0,
2103 "thread %d: task with event %p finished without "
2104 "completion event fulfilled in taskgroup\n",
2105 thr->ts.team_id, child_task);
2106 child_task = NULL;
2107 continue;
2108 }
2109
2110 finish_cancelled:;
2111 size_t new_tasks
2112 = gomp_task_run_post_handle_depend (child_task, team);
2113 gomp_task_run_post_remove_parent (child_task);
2114 gomp_clear_parent (&child_task->children_queue);
2115 gomp_task_run_post_remove_taskgroup (child_task);
2116 to_free = child_task;
2117 child_task = NULL;
2118 team->task_count--;
2119 if (new_tasks > 1)
2120 {
2121 do_wake = team->nthreads - team->task_running_count
2122 - !task->in_tied_task;
2123 if (do_wake > new_tasks)
2124 do_wake = new_tasks;
2125 }
2126 }
2127 }
2128
2129 finish:
2130 task->taskgroup = taskgroup->prev;
2131 gomp_sem_destroy (&taskgroup->taskgroup_sem);
2132 free (taskgroup);
2133 }
2134
2135 static inline __attribute__((always_inline)) void
gomp_reduction_register(uintptr_t * data,uintptr_t * old,uintptr_t * orig,unsigned nthreads)2136 gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig,
2137 unsigned nthreads)
2138 {
2139 size_t total_cnt = 0;
2140 uintptr_t *d = data;
2141 struct htab *old_htab = NULL, *new_htab;
2142 do
2143 {
2144 if (__builtin_expect (orig != NULL, 0))
2145 {
2146 /* For worksharing task reductions, memory has been allocated
2147 already by some other thread that encountered the construct
2148 earlier. */
2149 d[2] = orig[2];
2150 d[6] = orig[6];
2151 orig = (uintptr_t *) orig[4];
2152 }
2153 else
2154 {
2155 size_t sz = d[1] * nthreads;
2156 /* Should use omp_alloc if d[3] is not -1. */
2157 void *ptr = gomp_aligned_alloc (d[2], sz);
2158 memset (ptr, '\0', sz);
2159 d[2] = (uintptr_t) ptr;
2160 d[6] = d[2] + sz;
2161 }
2162 d[5] = 0;
2163 total_cnt += d[0];
2164 if (d[4] == 0)
2165 {
2166 d[4] = (uintptr_t) old;
2167 break;
2168 }
2169 else
2170 d = (uintptr_t *) d[4];
2171 }
2172 while (1);
2173 if (old && old[5])
2174 {
2175 old_htab = (struct htab *) old[5];
2176 total_cnt += htab_elements (old_htab);
2177 }
2178 new_htab = htab_create (total_cnt);
2179 if (old_htab)
2180 {
2181 /* Copy old hash table, like in htab_expand. */
2182 hash_entry_type *p, *olimit;
2183 new_htab->n_elements = htab_elements (old_htab);
2184 olimit = old_htab->entries + old_htab->size;
2185 p = old_htab->entries;
2186 do
2187 {
2188 hash_entry_type x = *p;
2189 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
2190 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x;
2191 p++;
2192 }
2193 while (p < olimit);
2194 }
2195 d = data;
2196 do
2197 {
2198 size_t j;
2199 for (j = 0; j < d[0]; ++j)
2200 {
2201 uintptr_t *p = d + 7 + j * 3;
2202 p[2] = (uintptr_t) d;
2203 /* Ugly hack, hash_entry_type is defined for the task dependencies,
2204 which hash on the first element which is a pointer. We need
2205 to hash also on the first sizeof (uintptr_t) bytes which contain
2206 a pointer. Hide the cast from the compiler. */
2207 hash_entry_type n;
2208 __asm ("" : "=g" (n) : "0" (p));
2209 *htab_find_slot (&new_htab, n, INSERT) = n;
2210 }
2211 if (d[4] == (uintptr_t) old)
2212 break;
2213 else
2214 d = (uintptr_t *) d[4];
2215 }
2216 while (1);
2217 d[5] = (uintptr_t) new_htab;
2218 }
2219
2220 static void
gomp_create_artificial_team(void)2221 gomp_create_artificial_team (void)
2222 {
2223 struct gomp_thread *thr = gomp_thread ();
2224 struct gomp_task_icv *icv;
2225 struct gomp_team *team = gomp_new_team (1);
2226 struct gomp_task *task = thr->task;
2227 struct gomp_task **implicit_task = &task;
2228 icv = task ? &task->icv : &gomp_global_icv;
2229 team->prev_ts = thr->ts;
2230 thr->ts.team = team;
2231 thr->ts.team_id = 0;
2232 thr->ts.work_share = &team->work_shares[0];
2233 thr->ts.last_work_share = NULL;
2234 #ifdef HAVE_SYNC_BUILTINS
2235 thr->ts.single_count = 0;
2236 #endif
2237 thr->ts.static_trip = 0;
2238 thr->task = &team->implicit_task[0];
2239 gomp_init_task (thr->task, NULL, icv);
2240 while (*implicit_task
2241 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2242 implicit_task = &(*implicit_task)->parent;
2243 if (*implicit_task)
2244 {
2245 thr->task = *implicit_task;
2246 gomp_end_task ();
2247 free (*implicit_task);
2248 thr->task = &team->implicit_task[0];
2249 }
2250 #ifdef LIBGOMP_USE_PTHREADS
2251 else
2252 pthread_setspecific (gomp_thread_destructor, thr);
2253 #endif
2254 if (implicit_task != &task)
2255 {
2256 *implicit_task = thr->task;
2257 thr->task = task;
2258 }
2259 }
2260
2261 /* The format of data is:
2262 data[0] cnt
2263 data[1] size
2264 data[2] alignment (on output array pointer)
2265 data[3] allocator (-1 if malloc allocator)
2266 data[4] next pointer
2267 data[5] used internally (htab pointer)
2268 data[6] used internally (end of array)
2269 cnt times
2270 ent[0] address
2271 ent[1] offset
2272 ent[2] used internally (pointer to data[0])
2273 The entries are sorted by increasing offset, so that a binary
2274 search can be performed. Normally, data[8] is 0, exception is
2275 for worksharing construct task reductions in cancellable parallel,
2276 where at offset 0 there should be space for a pointer and an integer
2277 which are used internally. */
2278
2279 void
GOMP_taskgroup_reduction_register(uintptr_t * data)2280 GOMP_taskgroup_reduction_register (uintptr_t *data)
2281 {
2282 struct gomp_thread *thr = gomp_thread ();
2283 struct gomp_team *team = thr->ts.team;
2284 struct gomp_task *task;
2285 unsigned nthreads;
2286 if (__builtin_expect (team == NULL, 0))
2287 {
2288 /* The task reduction code needs a team and task, so for
2289 orphaned taskgroups just create the implicit team. */
2290 gomp_create_artificial_team ();
2291 ialias_call (GOMP_taskgroup_start) ();
2292 team = thr->ts.team;
2293 }
2294 nthreads = team->nthreads;
2295 task = thr->task;
2296 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads);
2297 task->taskgroup->reductions = data;
2298 }
2299
2300 void
GOMP_taskgroup_reduction_unregister(uintptr_t * data)2301 GOMP_taskgroup_reduction_unregister (uintptr_t *data)
2302 {
2303 uintptr_t *d = data;
2304 htab_free ((struct htab *) data[5]);
2305 do
2306 {
2307 gomp_aligned_free ((void *) d[2]);
2308 d = (uintptr_t *) d[4];
2309 }
2310 while (d && !d[5]);
2311 }
ialias(GOMP_taskgroup_reduction_unregister)2312 ialias (GOMP_taskgroup_reduction_unregister)
2313
2314 /* For i = 0 to cnt-1, remap ptrs[i] which is either address of the
2315 original list item or address of previously remapped original list
2316 item to address of the private copy, store that to ptrs[i].
2317 For i < cntorig, additionally set ptrs[cnt+i] to the address of
2318 the original list item. */
2319
2320 void
2321 GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs)
2322 {
2323 struct gomp_thread *thr = gomp_thread ();
2324 struct gomp_task *task = thr->task;
2325 unsigned id = thr->ts.team_id;
2326 uintptr_t *data = task->taskgroup->reductions;
2327 uintptr_t *d;
2328 struct htab *reduction_htab = (struct htab *) data[5];
2329 size_t i;
2330 for (i = 0; i < cnt; ++i)
2331 {
2332 hash_entry_type ent, n;
2333 __asm ("" : "=g" (ent) : "0" (ptrs + i));
2334 n = htab_find (reduction_htab, ent);
2335 if (n)
2336 {
2337 uintptr_t *p;
2338 __asm ("" : "=g" (p) : "0" (n));
2339 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i],
2340 p[1] is the offset within the allocated chunk for each
2341 thread, p[2] is the array registered with
2342 GOMP_taskgroup_reduction_register, d[2] is the base of the
2343 allocated memory and d[1] is the size of the allocated chunk
2344 for one thread. */
2345 d = (uintptr_t *) p[2];
2346 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]);
2347 if (__builtin_expect (i < cntorig, 0))
2348 ptrs[cnt + i] = (void *) p[0];
2349 continue;
2350 }
2351 d = data;
2352 while (d != NULL)
2353 {
2354 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6])
2355 break;
2356 d = (uintptr_t *) d[4];
2357 }
2358 if (d == NULL)
2359 gomp_fatal ("couldn't find matching task_reduction or reduction with "
2360 "task modifier for %p", ptrs[i]);
2361 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1];
2362 ptrs[i] = (void *) (d[2] + id * d[1] + off);
2363 if (__builtin_expect (i < cntorig, 0))
2364 {
2365 size_t lo = 0, hi = d[0] - 1;
2366 while (lo <= hi)
2367 {
2368 size_t m = (lo + hi) / 2;
2369 if (d[7 + 3 * m + 1] < off)
2370 lo = m + 1;
2371 else if (d[7 + 3 * m + 1] == off)
2372 {
2373 ptrs[cnt + i] = (void *) d[7 + 3 * m];
2374 break;
2375 }
2376 else
2377 hi = m - 1;
2378 }
2379 if (lo > hi)
2380 gomp_fatal ("couldn't find matching task_reduction or reduction "
2381 "with task modifier for %p", ptrs[i]);
2382 }
2383 }
2384 }
2385
2386 struct gomp_taskgroup *
gomp_parallel_reduction_register(uintptr_t * data,unsigned nthreads)2387 gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads)
2388 {
2389 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL);
2390 gomp_reduction_register (data, NULL, NULL, nthreads);
2391 taskgroup->reductions = data;
2392 return taskgroup;
2393 }
2394
2395 void
gomp_workshare_task_reduction_register(uintptr_t * data,uintptr_t * orig)2396 gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig)
2397 {
2398 struct gomp_thread *thr = gomp_thread ();
2399 struct gomp_team *team = thr->ts.team;
2400 struct gomp_task *task = thr->task;
2401 unsigned nthreads = team->nthreads;
2402 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads);
2403 task->taskgroup->reductions = data;
2404 }
2405
2406 void
gomp_workshare_taskgroup_start(void)2407 gomp_workshare_taskgroup_start (void)
2408 {
2409 struct gomp_thread *thr = gomp_thread ();
2410 struct gomp_team *team = thr->ts.team;
2411 struct gomp_task *task;
2412
2413 if (team == NULL)
2414 {
2415 gomp_create_artificial_team ();
2416 team = thr->ts.team;
2417 }
2418 task = thr->task;
2419 task->taskgroup = gomp_taskgroup_init (task->taskgroup);
2420 task->taskgroup->workshare = true;
2421 }
2422
2423 void
GOMP_workshare_task_reduction_unregister(bool cancelled)2424 GOMP_workshare_task_reduction_unregister (bool cancelled)
2425 {
2426 struct gomp_thread *thr = gomp_thread ();
2427 struct gomp_task *task = thr->task;
2428 struct gomp_team *team = thr->ts.team;
2429 uintptr_t *data = task->taskgroup->reductions;
2430 ialias_call (GOMP_taskgroup_end) ();
2431 if (thr->ts.team_id == 0)
2432 ialias_call (GOMP_taskgroup_reduction_unregister) (data);
2433 else
2434 htab_free ((struct htab *) data[5]);
2435
2436 if (!cancelled)
2437 gomp_team_barrier_wait (&team->barrier);
2438 }
2439
2440 int
omp_in_final(void)2441 omp_in_final (void)
2442 {
2443 struct gomp_thread *thr = gomp_thread ();
2444 return thr->task && thr->task->final_task;
2445 }
2446
ialias(omp_in_final)2447 ialias (omp_in_final)
2448
2449 void
2450 omp_fulfill_event (omp_event_handle_t event)
2451 {
2452 struct gomp_task *task = (struct gomp_task *) event;
2453 if (!task->deferred_p)
2454 {
2455 if (gomp_sem_getcount (task->completion_sem) > 0)
2456 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task);
2457
2458 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n",
2459 task);
2460 gomp_sem_post (task->completion_sem);
2461 return;
2462 }
2463
2464 struct gomp_team *team = __atomic_load_n (&task->detach_team,
2465 MEMMODEL_RELAXED);
2466 if (!team)
2467 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already "
2468 "been fulfilled!\n", task);
2469
2470 gomp_mutex_lock (&team->task_lock);
2471 if (task->kind != GOMP_TASK_DETACHED)
2472 {
2473 /* The task has not finished running yet. */
2474 gomp_debug (0,
2475 "omp_fulfill_event: %p event fulfilled for unfinished "
2476 "task\n", task);
2477 __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED);
2478 gomp_mutex_unlock (&team->task_lock);
2479 return;
2480 }
2481
2482 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n",
2483 task);
2484 size_t new_tasks = gomp_task_run_post_handle_depend (task, team);
2485 gomp_task_run_post_remove_parent (task);
2486 gomp_clear_parent (&task->children_queue);
2487 gomp_task_run_post_remove_taskgroup (task);
2488 team->task_count--;
2489 team->task_detach_count--;
2490
2491 int do_wake = 0;
2492 bool shackled_thread_p = team == gomp_thread ()->ts.team;
2493 if (new_tasks > 0)
2494 {
2495 /* Wake up threads to run new tasks. */
2496 gomp_team_barrier_set_task_pending (&team->barrier);
2497 do_wake = team->nthreads - team->task_running_count;
2498 if (do_wake > new_tasks)
2499 do_wake = new_tasks;
2500 }
2501
2502 if (!shackled_thread_p
2503 && !do_wake
2504 && team->task_detach_count == 0
2505 && gomp_team_barrier_waiting_for_tasks (&team->barrier))
2506 /* Ensure that at least one thread is woken up to signal that the
2507 barrier can finish. */
2508 do_wake = 1;
2509
2510 /* If we are running in an unshackled thread, the team might vanish before
2511 gomp_team_barrier_wake is run if we release the lock first, so keep the
2512 lock for the call in that case. */
2513 if (shackled_thread_p)
2514 gomp_mutex_unlock (&team->task_lock);
2515 if (do_wake)
2516 gomp_team_barrier_wake (&team->barrier, do_wake);
2517 if (!shackled_thread_p)
2518 gomp_mutex_unlock (&team->task_lock);
2519
2520 gomp_finish_task (task);
2521 free (task);
2522 }
2523
2524 ialias (omp_fulfill_event)
2525