1*8feb0f0bSmrg /* Copyright (C) 2015-2020 Free Software Foundation, Inc.
21debfc3dSmrg Contributed by Jakub Jelinek <jakub@redhat.com>.
31debfc3dSmrg
41debfc3dSmrg This file is part of the GNU Offloading and Multi Processing Library
51debfc3dSmrg (libgomp).
61debfc3dSmrg
71debfc3dSmrg Libgomp is free software; you can redistribute it and/or modify it
81debfc3dSmrg under the terms of the GNU General Public License as published by
91debfc3dSmrg the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg any later version.
111debfc3dSmrg
121debfc3dSmrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
141debfc3dSmrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
151debfc3dSmrg more details.
161debfc3dSmrg
171debfc3dSmrg Under Section 7 of GPL version 3, you are granted additional
181debfc3dSmrg permissions described in the GCC Runtime Library Exception, version
191debfc3dSmrg 3.1, as published by the Free Software Foundation.
201debfc3dSmrg
211debfc3dSmrg You should have received a copy of the GNU General Public License and
221debfc3dSmrg a copy of the GCC Runtime Library Exception along with this program;
231debfc3dSmrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
241debfc3dSmrg <http://www.gnu.org/licenses/>. */
251debfc3dSmrg
261debfc3dSmrg /* This file handles the taskloop construct. It is included twice, once
271debfc3dSmrg for the long and once for unsigned long long variant. */
281debfc3dSmrg
291debfc3dSmrg /* Called when encountering an explicit task directive. If IF_CLAUSE is
301debfc3dSmrg false, then we must not delay in executing the task. If UNTIED is true,
311debfc3dSmrg then the task may be executed by any member of the team. */
321debfc3dSmrg
331debfc3dSmrg void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)341debfc3dSmrg GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
351debfc3dSmrg long arg_size, long arg_align, unsigned flags,
361debfc3dSmrg unsigned long num_tasks, int priority,
371debfc3dSmrg TYPE start, TYPE end, TYPE step)
381debfc3dSmrg {
391debfc3dSmrg struct gomp_thread *thr = gomp_thread ();
401debfc3dSmrg struct gomp_team *team = thr->ts.team;
411debfc3dSmrg
421debfc3dSmrg #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
431debfc3dSmrg /* If pthread_mutex_* is used for omp_*lock*, then each task must be
441debfc3dSmrg tied to one thread all the time. This means UNTIED tasks must be
451debfc3dSmrg tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
461debfc3dSmrg might be running on different thread than FN. */
471debfc3dSmrg if (cpyfn)
481debfc3dSmrg flags &= ~GOMP_TASK_FLAG_IF;
491debfc3dSmrg flags &= ~GOMP_TASK_FLAG_UNTIED;
501debfc3dSmrg #endif
511debfc3dSmrg
521debfc3dSmrg /* If parallel or taskgroup has been cancelled, don't start new tasks. */
531debfc3dSmrg if (team && gomp_team_barrier_cancelled (&team->barrier))
54*8feb0f0bSmrg {
55*8feb0f0bSmrg early_return:
56*8feb0f0bSmrg if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57*8feb0f0bSmrg == GOMP_TASK_FLAG_REDUCTION)
58*8feb0f0bSmrg {
59*8feb0f0bSmrg struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60*8feb0f0bSmrg uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61*8feb0f0bSmrg /* Tell callers GOMP_taskgroup_reduction_register has not been
62*8feb0f0bSmrg called. */
63*8feb0f0bSmrg ptr[2] = 0;
64*8feb0f0bSmrg }
651debfc3dSmrg return;
66*8feb0f0bSmrg }
671debfc3dSmrg
681debfc3dSmrg #ifdef TYPE_is_long
691debfc3dSmrg TYPE s = step;
701debfc3dSmrg if (step > 0)
711debfc3dSmrg {
721debfc3dSmrg if (start >= end)
73*8feb0f0bSmrg goto early_return;
741debfc3dSmrg s--;
751debfc3dSmrg }
761debfc3dSmrg else
771debfc3dSmrg {
781debfc3dSmrg if (start <= end)
79*8feb0f0bSmrg goto early_return;
801debfc3dSmrg s++;
811debfc3dSmrg }
821debfc3dSmrg UTYPE n = (end - start + s) / step;
831debfc3dSmrg #else
841debfc3dSmrg UTYPE n;
851debfc3dSmrg if (flags & GOMP_TASK_FLAG_UP)
861debfc3dSmrg {
871debfc3dSmrg if (start >= end)
88*8feb0f0bSmrg goto early_return;
891debfc3dSmrg n = (end - start + step - 1) / step;
901debfc3dSmrg }
911debfc3dSmrg else
921debfc3dSmrg {
931debfc3dSmrg if (start <= end)
94*8feb0f0bSmrg goto early_return;
951debfc3dSmrg n = (start - end - step - 1) / -step;
961debfc3dSmrg }
971debfc3dSmrg #endif
981debfc3dSmrg
991debfc3dSmrg TYPE task_step = step;
1001debfc3dSmrg unsigned long nfirst = n;
1011debfc3dSmrg if (flags & GOMP_TASK_FLAG_GRAINSIZE)
1021debfc3dSmrg {
1031debfc3dSmrg unsigned long grainsize = num_tasks;
1041debfc3dSmrg #ifdef TYPE_is_long
1051debfc3dSmrg num_tasks = n / grainsize;
1061debfc3dSmrg #else
1071debfc3dSmrg UTYPE ndiv = n / grainsize;
1081debfc3dSmrg num_tasks = ndiv;
1091debfc3dSmrg if (num_tasks != ndiv)
1101debfc3dSmrg num_tasks = ~0UL;
1111debfc3dSmrg #endif
1121debfc3dSmrg if (num_tasks <= 1)
1131debfc3dSmrg {
1141debfc3dSmrg num_tasks = 1;
1151debfc3dSmrg task_step = end - start;
1161debfc3dSmrg }
1171debfc3dSmrg else if (num_tasks >= grainsize
1181debfc3dSmrg #ifndef TYPE_is_long
1191debfc3dSmrg && num_tasks != ~0UL
1201debfc3dSmrg #endif
1211debfc3dSmrg )
1221debfc3dSmrg {
1231debfc3dSmrg UTYPE mul = num_tasks * grainsize;
1241debfc3dSmrg task_step = (TYPE) grainsize * step;
1251debfc3dSmrg if (mul != n)
1261debfc3dSmrg {
1271debfc3dSmrg task_step += step;
1281debfc3dSmrg nfirst = n - mul - 1;
1291debfc3dSmrg }
1301debfc3dSmrg }
1311debfc3dSmrg else
1321debfc3dSmrg {
1331debfc3dSmrg UTYPE div = n / num_tasks;
1341debfc3dSmrg UTYPE mod = n % num_tasks;
1351debfc3dSmrg task_step = (TYPE) div * step;
1361debfc3dSmrg if (mod)
1371debfc3dSmrg {
1381debfc3dSmrg task_step += step;
1391debfc3dSmrg nfirst = mod - 1;
1401debfc3dSmrg }
1411debfc3dSmrg }
1421debfc3dSmrg }
1431debfc3dSmrg else
1441debfc3dSmrg {
1451debfc3dSmrg if (num_tasks == 0)
1461debfc3dSmrg num_tasks = team ? team->nthreads : 1;
1471debfc3dSmrg if (num_tasks >= n)
1481debfc3dSmrg num_tasks = n;
1491debfc3dSmrg else
1501debfc3dSmrg {
1511debfc3dSmrg UTYPE div = n / num_tasks;
1521debfc3dSmrg UTYPE mod = n % num_tasks;
1531debfc3dSmrg task_step = (TYPE) div * step;
1541debfc3dSmrg if (mod)
1551debfc3dSmrg {
1561debfc3dSmrg task_step += step;
1571debfc3dSmrg nfirst = mod - 1;
1581debfc3dSmrg }
1591debfc3dSmrg }
1601debfc3dSmrg }
1611debfc3dSmrg
1621debfc3dSmrg if (flags & GOMP_TASK_FLAG_NOGROUP)
1631debfc3dSmrg {
164c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0)
165c0a68be4Smrg && thr->task
166c0a68be4Smrg && thr->task->taskgroup)
167c0a68be4Smrg {
168c0a68be4Smrg if (thr->task->taskgroup->cancelled)
169c0a68be4Smrg return;
170c0a68be4Smrg if (thr->task->taskgroup->workshare
171c0a68be4Smrg && thr->task->taskgroup->prev
172c0a68be4Smrg && thr->task->taskgroup->prev->cancelled)
1731debfc3dSmrg return;
1741debfc3dSmrg }
175c0a68be4Smrg }
1761debfc3dSmrg else
177c0a68be4Smrg {
1781debfc3dSmrg ialias_call (GOMP_taskgroup_start) ();
179c0a68be4Smrg if (flags & GOMP_TASK_FLAG_REDUCTION)
180c0a68be4Smrg {
181c0a68be4Smrg struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
182c0a68be4Smrg uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
183c0a68be4Smrg ialias_call (GOMP_taskgroup_reduction_register) (ptr);
184c0a68be4Smrg }
185c0a68be4Smrg }
1861debfc3dSmrg
1871debfc3dSmrg if (priority > gomp_max_task_priority_var)
1881debfc3dSmrg priority = gomp_max_task_priority_var;
1891debfc3dSmrg
1901debfc3dSmrg if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
1911debfc3dSmrg || (thr->task && thr->task->final_task)
1921debfc3dSmrg || team->task_count + num_tasks > 64 * team->nthreads)
1931debfc3dSmrg {
1941debfc3dSmrg unsigned long i;
1951debfc3dSmrg if (__builtin_expect (cpyfn != NULL, 0))
1961debfc3dSmrg {
1971debfc3dSmrg struct gomp_task task[num_tasks];
1981debfc3dSmrg struct gomp_task *parent = thr->task;
1991debfc3dSmrg arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
2001debfc3dSmrg char buf[num_tasks * arg_size + arg_align - 1];
2011debfc3dSmrg char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
2021debfc3dSmrg & ~(uintptr_t) (arg_align - 1));
2031debfc3dSmrg char *orig_arg = arg;
2041debfc3dSmrg for (i = 0; i < num_tasks; i++)
2051debfc3dSmrg {
2061debfc3dSmrg gomp_init_task (&task[i], parent, gomp_icv (false));
2071debfc3dSmrg task[i].priority = priority;
2081debfc3dSmrg task[i].kind = GOMP_TASK_UNDEFERRED;
2091debfc3dSmrg task[i].final_task = (thr->task && thr->task->final_task)
2101debfc3dSmrg || (flags & GOMP_TASK_FLAG_FINAL);
2111debfc3dSmrg if (thr->task)
2121debfc3dSmrg {
2131debfc3dSmrg task[i].in_tied_task = thr->task->in_tied_task;
2141debfc3dSmrg task[i].taskgroup = thr->task->taskgroup;
2151debfc3dSmrg }
2161debfc3dSmrg thr->task = &task[i];
2171debfc3dSmrg cpyfn (arg, data);
2181debfc3dSmrg arg += arg_size;
2191debfc3dSmrg }
2201debfc3dSmrg arg = orig_arg;
2211debfc3dSmrg for (i = 0; i < num_tasks; i++)
2221debfc3dSmrg {
2231debfc3dSmrg thr->task = &task[i];
2241debfc3dSmrg ((TYPE *)arg)[0] = start;
2251debfc3dSmrg start += task_step;
2261debfc3dSmrg ((TYPE *)arg)[1] = start;
2271debfc3dSmrg if (i == nfirst)
2281debfc3dSmrg task_step -= step;
2291debfc3dSmrg fn (arg);
2301debfc3dSmrg arg += arg_size;
2311debfc3dSmrg if (!priority_queue_empty_p (&task[i].children_queue,
2321debfc3dSmrg MEMMODEL_RELAXED))
2331debfc3dSmrg {
2341debfc3dSmrg gomp_mutex_lock (&team->task_lock);
2351debfc3dSmrg gomp_clear_parent (&task[i].children_queue);
2361debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
2371debfc3dSmrg }
2381debfc3dSmrg gomp_end_task ();
2391debfc3dSmrg }
2401debfc3dSmrg }
2411debfc3dSmrg else
2421debfc3dSmrg for (i = 0; i < num_tasks; i++)
2431debfc3dSmrg {
2441debfc3dSmrg struct gomp_task task;
2451debfc3dSmrg
2461debfc3dSmrg gomp_init_task (&task, thr->task, gomp_icv (false));
2471debfc3dSmrg task.priority = priority;
2481debfc3dSmrg task.kind = GOMP_TASK_UNDEFERRED;
2491debfc3dSmrg task.final_task = (thr->task && thr->task->final_task)
2501debfc3dSmrg || (flags & GOMP_TASK_FLAG_FINAL);
2511debfc3dSmrg if (thr->task)
2521debfc3dSmrg {
2531debfc3dSmrg task.in_tied_task = thr->task->in_tied_task;
2541debfc3dSmrg task.taskgroup = thr->task->taskgroup;
2551debfc3dSmrg }
2561debfc3dSmrg thr->task = &task;
2571debfc3dSmrg ((TYPE *)data)[0] = start;
2581debfc3dSmrg start += task_step;
2591debfc3dSmrg ((TYPE *)data)[1] = start;
2601debfc3dSmrg if (i == nfirst)
2611debfc3dSmrg task_step -= step;
2621debfc3dSmrg fn (data);
2631debfc3dSmrg if (!priority_queue_empty_p (&task.children_queue,
2641debfc3dSmrg MEMMODEL_RELAXED))
2651debfc3dSmrg {
2661debfc3dSmrg gomp_mutex_lock (&team->task_lock);
2671debfc3dSmrg gomp_clear_parent (&task.children_queue);
2681debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
2691debfc3dSmrg }
2701debfc3dSmrg gomp_end_task ();
2711debfc3dSmrg }
2721debfc3dSmrg }
2731debfc3dSmrg else
2741debfc3dSmrg {
2751debfc3dSmrg struct gomp_task *tasks[num_tasks];
2761debfc3dSmrg struct gomp_task *parent = thr->task;
2771debfc3dSmrg struct gomp_taskgroup *taskgroup = parent->taskgroup;
2781debfc3dSmrg char *arg;
2791debfc3dSmrg int do_wake;
2801debfc3dSmrg unsigned long i;
2811debfc3dSmrg
2821debfc3dSmrg for (i = 0; i < num_tasks; i++)
2831debfc3dSmrg {
2841debfc3dSmrg struct gomp_task *task
2851debfc3dSmrg = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
2861debfc3dSmrg tasks[i] = task;
2871debfc3dSmrg arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
2881debfc3dSmrg & ~(uintptr_t) (arg_align - 1));
2891debfc3dSmrg gomp_init_task (task, parent, gomp_icv (false));
2901debfc3dSmrg task->priority = priority;
2911debfc3dSmrg task->kind = GOMP_TASK_UNDEFERRED;
2921debfc3dSmrg task->in_tied_task = parent->in_tied_task;
2931debfc3dSmrg task->taskgroup = taskgroup;
2941debfc3dSmrg thr->task = task;
2951debfc3dSmrg if (cpyfn)
2961debfc3dSmrg {
2971debfc3dSmrg cpyfn (arg, data);
2981debfc3dSmrg task->copy_ctors_done = true;
2991debfc3dSmrg }
3001debfc3dSmrg else
3011debfc3dSmrg memcpy (arg, data, arg_size);
3021debfc3dSmrg ((TYPE *)arg)[0] = start;
3031debfc3dSmrg start += task_step;
3041debfc3dSmrg ((TYPE *)arg)[1] = start;
3051debfc3dSmrg if (i == nfirst)
3061debfc3dSmrg task_step -= step;
3071debfc3dSmrg thr->task = parent;
3081debfc3dSmrg task->kind = GOMP_TASK_WAITING;
3091debfc3dSmrg task->fn = fn;
3101debfc3dSmrg task->fn_data = arg;
3111debfc3dSmrg task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
3121debfc3dSmrg }
3131debfc3dSmrg gomp_mutex_lock (&team->task_lock);
3141debfc3dSmrg /* If parallel or taskgroup has been cancelled, don't start new
3151debfc3dSmrg tasks. */
316c0a68be4Smrg if (__builtin_expect (gomp_cancel_var, 0)
317c0a68be4Smrg && cpyfn == NULL)
3181debfc3dSmrg {
319c0a68be4Smrg if (gomp_team_barrier_cancelled (&team->barrier))
320c0a68be4Smrg {
321c0a68be4Smrg do_cancel:
3221debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
3231debfc3dSmrg for (i = 0; i < num_tasks; i++)
3241debfc3dSmrg {
3251debfc3dSmrg gomp_finish_task (tasks[i]);
3261debfc3dSmrg free (tasks[i]);
3271debfc3dSmrg }
3281debfc3dSmrg if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
3291debfc3dSmrg ialias_call (GOMP_taskgroup_end) ();
3301debfc3dSmrg return;
3311debfc3dSmrg }
3321debfc3dSmrg if (taskgroup)
333c0a68be4Smrg {
334c0a68be4Smrg if (taskgroup->cancelled)
335c0a68be4Smrg goto do_cancel;
336c0a68be4Smrg if (taskgroup->workshare
337c0a68be4Smrg && taskgroup->prev
338c0a68be4Smrg && taskgroup->prev->cancelled)
339c0a68be4Smrg goto do_cancel;
340c0a68be4Smrg }
341c0a68be4Smrg }
342c0a68be4Smrg if (taskgroup)
3431debfc3dSmrg taskgroup->num_children += num_tasks;
3441debfc3dSmrg for (i = 0; i < num_tasks; i++)
3451debfc3dSmrg {
3461debfc3dSmrg struct gomp_task *task = tasks[i];
3471debfc3dSmrg priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
3481debfc3dSmrg task, priority,
3491debfc3dSmrg PRIORITY_INSERT_BEGIN,
3501debfc3dSmrg /*last_parent_depends_on=*/false,
3511debfc3dSmrg task->parent_depends_on);
3521debfc3dSmrg if (taskgroup)
3531debfc3dSmrg priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
3541debfc3dSmrg task, priority, PRIORITY_INSERT_BEGIN,
3551debfc3dSmrg /*last_parent_depends_on=*/false,
3561debfc3dSmrg task->parent_depends_on);
3571debfc3dSmrg priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
3581debfc3dSmrg PRIORITY_INSERT_END,
3591debfc3dSmrg /*last_parent_depends_on=*/false,
3601debfc3dSmrg task->parent_depends_on);
3611debfc3dSmrg ++team->task_count;
3621debfc3dSmrg ++team->task_queued_count;
3631debfc3dSmrg }
3641debfc3dSmrg gomp_team_barrier_set_task_pending (&team->barrier);
3651debfc3dSmrg if (team->task_running_count + !parent->in_tied_task
3661debfc3dSmrg < team->nthreads)
3671debfc3dSmrg {
3681debfc3dSmrg do_wake = team->nthreads - team->task_running_count
3691debfc3dSmrg - !parent->in_tied_task;
3701debfc3dSmrg if ((unsigned long) do_wake > num_tasks)
3711debfc3dSmrg do_wake = num_tasks;
3721debfc3dSmrg }
3731debfc3dSmrg else
3741debfc3dSmrg do_wake = 0;
3751debfc3dSmrg gomp_mutex_unlock (&team->task_lock);
3761debfc3dSmrg if (do_wake)
3771debfc3dSmrg gomp_team_barrier_wake (&team->barrier, do_wake);
3781debfc3dSmrg }
3791debfc3dSmrg if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
3801debfc3dSmrg ialias_call (GOMP_taskgroup_end) ();
3811debfc3dSmrg }
382