1*38fd1498Szrj /* Copyright (C) 2015-2018 Free Software Foundation, Inc.
2*38fd1498Szrj Contributed by Jakub Jelinek <jakub@redhat.com>.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of the GNU Offloading and Multi Processing Library
5*38fd1498Szrj (libgomp).
6*38fd1498Szrj
7*38fd1498Szrj Libgomp is free software; you can redistribute it and/or modify it
8*38fd1498Szrj under the terms of the GNU General Public License as published by
9*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj any later version.
11*38fd1498Szrj
12*38fd1498Szrj Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*38fd1498Szrj FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15*38fd1498Szrj more details.
16*38fd1498Szrj
17*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
18*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
19*38fd1498Szrj 3.1, as published by the Free Software Foundation.
20*38fd1498Szrj
21*38fd1498Szrj You should have received a copy of the GNU General Public License and
22*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
23*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24*38fd1498Szrj <http://www.gnu.org/licenses/>. */
25*38fd1498Szrj
26*38fd1498Szrj /* This file handles the taskloop construct. It is included twice, once
27*38fd1498Szrj for the long and once for unsigned long long variant. */
28*38fd1498Szrj
29*38fd1498Szrj /* Called when encountering an explicit task directive. If IF_CLAUSE is
30*38fd1498Szrj false, then we must not delay in executing the task. If UNTIED is true,
31*38fd1498Szrj then the task may be executed by any member of the team. */
32*38fd1498Szrj
33*38fd1498Szrj void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)34*38fd1498Szrj GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35*38fd1498Szrj long arg_size, long arg_align, unsigned flags,
36*38fd1498Szrj unsigned long num_tasks, int priority,
37*38fd1498Szrj TYPE start, TYPE end, TYPE step)
38*38fd1498Szrj {
39*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
40*38fd1498Szrj struct gomp_team *team = thr->ts.team;
41*38fd1498Szrj
42*38fd1498Szrj #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43*38fd1498Szrj /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44*38fd1498Szrj tied to one thread all the time. This means UNTIED tasks must be
45*38fd1498Szrj tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46*38fd1498Szrj might be running on different thread than FN. */
47*38fd1498Szrj if (cpyfn)
48*38fd1498Szrj flags &= ~GOMP_TASK_FLAG_IF;
49*38fd1498Szrj flags &= ~GOMP_TASK_FLAG_UNTIED;
50*38fd1498Szrj #endif
51*38fd1498Szrj
52*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new tasks. */
53*38fd1498Szrj if (team && gomp_team_barrier_cancelled (&team->barrier))
54*38fd1498Szrj return;
55*38fd1498Szrj
56*38fd1498Szrj #ifdef TYPE_is_long
57*38fd1498Szrj TYPE s = step;
58*38fd1498Szrj if (step > 0)
59*38fd1498Szrj {
60*38fd1498Szrj if (start >= end)
61*38fd1498Szrj return;
62*38fd1498Szrj s--;
63*38fd1498Szrj }
64*38fd1498Szrj else
65*38fd1498Szrj {
66*38fd1498Szrj if (start <= end)
67*38fd1498Szrj return;
68*38fd1498Szrj s++;
69*38fd1498Szrj }
70*38fd1498Szrj UTYPE n = (end - start + s) / step;
71*38fd1498Szrj #else
72*38fd1498Szrj UTYPE n;
73*38fd1498Szrj if (flags & GOMP_TASK_FLAG_UP)
74*38fd1498Szrj {
75*38fd1498Szrj if (start >= end)
76*38fd1498Szrj return;
77*38fd1498Szrj n = (end - start + step - 1) / step;
78*38fd1498Szrj }
79*38fd1498Szrj else
80*38fd1498Szrj {
81*38fd1498Szrj if (start <= end)
82*38fd1498Szrj return;
83*38fd1498Szrj n = (start - end - step - 1) / -step;
84*38fd1498Szrj }
85*38fd1498Szrj #endif
86*38fd1498Szrj
87*38fd1498Szrj TYPE task_step = step;
88*38fd1498Szrj unsigned long nfirst = n;
89*38fd1498Szrj if (flags & GOMP_TASK_FLAG_GRAINSIZE)
90*38fd1498Szrj {
91*38fd1498Szrj unsigned long grainsize = num_tasks;
92*38fd1498Szrj #ifdef TYPE_is_long
93*38fd1498Szrj num_tasks = n / grainsize;
94*38fd1498Szrj #else
95*38fd1498Szrj UTYPE ndiv = n / grainsize;
96*38fd1498Szrj num_tasks = ndiv;
97*38fd1498Szrj if (num_tasks != ndiv)
98*38fd1498Szrj num_tasks = ~0UL;
99*38fd1498Szrj #endif
100*38fd1498Szrj if (num_tasks <= 1)
101*38fd1498Szrj {
102*38fd1498Szrj num_tasks = 1;
103*38fd1498Szrj task_step = end - start;
104*38fd1498Szrj }
105*38fd1498Szrj else if (num_tasks >= grainsize
106*38fd1498Szrj #ifndef TYPE_is_long
107*38fd1498Szrj && num_tasks != ~0UL
108*38fd1498Szrj #endif
109*38fd1498Szrj )
110*38fd1498Szrj {
111*38fd1498Szrj UTYPE mul = num_tasks * grainsize;
112*38fd1498Szrj task_step = (TYPE) grainsize * step;
113*38fd1498Szrj if (mul != n)
114*38fd1498Szrj {
115*38fd1498Szrj task_step += step;
116*38fd1498Szrj nfirst = n - mul - 1;
117*38fd1498Szrj }
118*38fd1498Szrj }
119*38fd1498Szrj else
120*38fd1498Szrj {
121*38fd1498Szrj UTYPE div = n / num_tasks;
122*38fd1498Szrj UTYPE mod = n % num_tasks;
123*38fd1498Szrj task_step = (TYPE) div * step;
124*38fd1498Szrj if (mod)
125*38fd1498Szrj {
126*38fd1498Szrj task_step += step;
127*38fd1498Szrj nfirst = mod - 1;
128*38fd1498Szrj }
129*38fd1498Szrj }
130*38fd1498Szrj }
131*38fd1498Szrj else
132*38fd1498Szrj {
133*38fd1498Szrj if (num_tasks == 0)
134*38fd1498Szrj num_tasks = team ? team->nthreads : 1;
135*38fd1498Szrj if (num_tasks >= n)
136*38fd1498Szrj num_tasks = n;
137*38fd1498Szrj else
138*38fd1498Szrj {
139*38fd1498Szrj UTYPE div = n / num_tasks;
140*38fd1498Szrj UTYPE mod = n % num_tasks;
141*38fd1498Szrj task_step = (TYPE) div * step;
142*38fd1498Szrj if (mod)
143*38fd1498Szrj {
144*38fd1498Szrj task_step += step;
145*38fd1498Szrj nfirst = mod - 1;
146*38fd1498Szrj }
147*38fd1498Szrj }
148*38fd1498Szrj }
149*38fd1498Szrj
150*38fd1498Szrj if (flags & GOMP_TASK_FLAG_NOGROUP)
151*38fd1498Szrj {
152*38fd1498Szrj if (thr->task && thr->task->taskgroup && thr->task->taskgroup->cancelled)
153*38fd1498Szrj return;
154*38fd1498Szrj }
155*38fd1498Szrj else
156*38fd1498Szrj ialias_call (GOMP_taskgroup_start) ();
157*38fd1498Szrj
158*38fd1498Szrj if (priority > gomp_max_task_priority_var)
159*38fd1498Szrj priority = gomp_max_task_priority_var;
160*38fd1498Szrj
161*38fd1498Szrj if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
162*38fd1498Szrj || (thr->task && thr->task->final_task)
163*38fd1498Szrj || team->task_count + num_tasks > 64 * team->nthreads)
164*38fd1498Szrj {
165*38fd1498Szrj unsigned long i;
166*38fd1498Szrj if (__builtin_expect (cpyfn != NULL, 0))
167*38fd1498Szrj {
168*38fd1498Szrj struct gomp_task task[num_tasks];
169*38fd1498Szrj struct gomp_task *parent = thr->task;
170*38fd1498Szrj arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
171*38fd1498Szrj char buf[num_tasks * arg_size + arg_align - 1];
172*38fd1498Szrj char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
173*38fd1498Szrj & ~(uintptr_t) (arg_align - 1));
174*38fd1498Szrj char *orig_arg = arg;
175*38fd1498Szrj for (i = 0; i < num_tasks; i++)
176*38fd1498Szrj {
177*38fd1498Szrj gomp_init_task (&task[i], parent, gomp_icv (false));
178*38fd1498Szrj task[i].priority = priority;
179*38fd1498Szrj task[i].kind = GOMP_TASK_UNDEFERRED;
180*38fd1498Szrj task[i].final_task = (thr->task && thr->task->final_task)
181*38fd1498Szrj || (flags & GOMP_TASK_FLAG_FINAL);
182*38fd1498Szrj if (thr->task)
183*38fd1498Szrj {
184*38fd1498Szrj task[i].in_tied_task = thr->task->in_tied_task;
185*38fd1498Szrj task[i].taskgroup = thr->task->taskgroup;
186*38fd1498Szrj }
187*38fd1498Szrj thr->task = &task[i];
188*38fd1498Szrj cpyfn (arg, data);
189*38fd1498Szrj arg += arg_size;
190*38fd1498Szrj }
191*38fd1498Szrj arg = orig_arg;
192*38fd1498Szrj for (i = 0; i < num_tasks; i++)
193*38fd1498Szrj {
194*38fd1498Szrj thr->task = &task[i];
195*38fd1498Szrj ((TYPE *)arg)[0] = start;
196*38fd1498Szrj start += task_step;
197*38fd1498Szrj ((TYPE *)arg)[1] = start;
198*38fd1498Szrj if (i == nfirst)
199*38fd1498Szrj task_step -= step;
200*38fd1498Szrj fn (arg);
201*38fd1498Szrj arg += arg_size;
202*38fd1498Szrj if (!priority_queue_empty_p (&task[i].children_queue,
203*38fd1498Szrj MEMMODEL_RELAXED))
204*38fd1498Szrj {
205*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
206*38fd1498Szrj gomp_clear_parent (&task[i].children_queue);
207*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
208*38fd1498Szrj }
209*38fd1498Szrj gomp_end_task ();
210*38fd1498Szrj }
211*38fd1498Szrj }
212*38fd1498Szrj else
213*38fd1498Szrj for (i = 0; i < num_tasks; i++)
214*38fd1498Szrj {
215*38fd1498Szrj struct gomp_task task;
216*38fd1498Szrj
217*38fd1498Szrj gomp_init_task (&task, thr->task, gomp_icv (false));
218*38fd1498Szrj task.priority = priority;
219*38fd1498Szrj task.kind = GOMP_TASK_UNDEFERRED;
220*38fd1498Szrj task.final_task = (thr->task && thr->task->final_task)
221*38fd1498Szrj || (flags & GOMP_TASK_FLAG_FINAL);
222*38fd1498Szrj if (thr->task)
223*38fd1498Szrj {
224*38fd1498Szrj task.in_tied_task = thr->task->in_tied_task;
225*38fd1498Szrj task.taskgroup = thr->task->taskgroup;
226*38fd1498Szrj }
227*38fd1498Szrj thr->task = &task;
228*38fd1498Szrj ((TYPE *)data)[0] = start;
229*38fd1498Szrj start += task_step;
230*38fd1498Szrj ((TYPE *)data)[1] = start;
231*38fd1498Szrj if (i == nfirst)
232*38fd1498Szrj task_step -= step;
233*38fd1498Szrj fn (data);
234*38fd1498Szrj if (!priority_queue_empty_p (&task.children_queue,
235*38fd1498Szrj MEMMODEL_RELAXED))
236*38fd1498Szrj {
237*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
238*38fd1498Szrj gomp_clear_parent (&task.children_queue);
239*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
240*38fd1498Szrj }
241*38fd1498Szrj gomp_end_task ();
242*38fd1498Szrj }
243*38fd1498Szrj }
244*38fd1498Szrj else
245*38fd1498Szrj {
246*38fd1498Szrj struct gomp_task *tasks[num_tasks];
247*38fd1498Szrj struct gomp_task *parent = thr->task;
248*38fd1498Szrj struct gomp_taskgroup *taskgroup = parent->taskgroup;
249*38fd1498Szrj char *arg;
250*38fd1498Szrj int do_wake;
251*38fd1498Szrj unsigned long i;
252*38fd1498Szrj
253*38fd1498Szrj for (i = 0; i < num_tasks; i++)
254*38fd1498Szrj {
255*38fd1498Szrj struct gomp_task *task
256*38fd1498Szrj = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
257*38fd1498Szrj tasks[i] = task;
258*38fd1498Szrj arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
259*38fd1498Szrj & ~(uintptr_t) (arg_align - 1));
260*38fd1498Szrj gomp_init_task (task, parent, gomp_icv (false));
261*38fd1498Szrj task->priority = priority;
262*38fd1498Szrj task->kind = GOMP_TASK_UNDEFERRED;
263*38fd1498Szrj task->in_tied_task = parent->in_tied_task;
264*38fd1498Szrj task->taskgroup = taskgroup;
265*38fd1498Szrj thr->task = task;
266*38fd1498Szrj if (cpyfn)
267*38fd1498Szrj {
268*38fd1498Szrj cpyfn (arg, data);
269*38fd1498Szrj task->copy_ctors_done = true;
270*38fd1498Szrj }
271*38fd1498Szrj else
272*38fd1498Szrj memcpy (arg, data, arg_size);
273*38fd1498Szrj ((TYPE *)arg)[0] = start;
274*38fd1498Szrj start += task_step;
275*38fd1498Szrj ((TYPE *)arg)[1] = start;
276*38fd1498Szrj if (i == nfirst)
277*38fd1498Szrj task_step -= step;
278*38fd1498Szrj thr->task = parent;
279*38fd1498Szrj task->kind = GOMP_TASK_WAITING;
280*38fd1498Szrj task->fn = fn;
281*38fd1498Szrj task->fn_data = arg;
282*38fd1498Szrj task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
283*38fd1498Szrj }
284*38fd1498Szrj gomp_mutex_lock (&team->task_lock);
285*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new
286*38fd1498Szrj tasks. */
287*38fd1498Szrj if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
288*38fd1498Szrj || (taskgroup && taskgroup->cancelled))
289*38fd1498Szrj && cpyfn == NULL, 0))
290*38fd1498Szrj {
291*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
292*38fd1498Szrj for (i = 0; i < num_tasks; i++)
293*38fd1498Szrj {
294*38fd1498Szrj gomp_finish_task (tasks[i]);
295*38fd1498Szrj free (tasks[i]);
296*38fd1498Szrj }
297*38fd1498Szrj if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
298*38fd1498Szrj ialias_call (GOMP_taskgroup_end) ();
299*38fd1498Szrj return;
300*38fd1498Szrj }
301*38fd1498Szrj if (taskgroup)
302*38fd1498Szrj taskgroup->num_children += num_tasks;
303*38fd1498Szrj for (i = 0; i < num_tasks; i++)
304*38fd1498Szrj {
305*38fd1498Szrj struct gomp_task *task = tasks[i];
306*38fd1498Szrj priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
307*38fd1498Szrj task, priority,
308*38fd1498Szrj PRIORITY_INSERT_BEGIN,
309*38fd1498Szrj /*last_parent_depends_on=*/false,
310*38fd1498Szrj task->parent_depends_on);
311*38fd1498Szrj if (taskgroup)
312*38fd1498Szrj priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
313*38fd1498Szrj task, priority, PRIORITY_INSERT_BEGIN,
314*38fd1498Szrj /*last_parent_depends_on=*/false,
315*38fd1498Szrj task->parent_depends_on);
316*38fd1498Szrj priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
317*38fd1498Szrj PRIORITY_INSERT_END,
318*38fd1498Szrj /*last_parent_depends_on=*/false,
319*38fd1498Szrj task->parent_depends_on);
320*38fd1498Szrj ++team->task_count;
321*38fd1498Szrj ++team->task_queued_count;
322*38fd1498Szrj }
323*38fd1498Szrj gomp_team_barrier_set_task_pending (&team->barrier);
324*38fd1498Szrj if (team->task_running_count + !parent->in_tied_task
325*38fd1498Szrj < team->nthreads)
326*38fd1498Szrj {
327*38fd1498Szrj do_wake = team->nthreads - team->task_running_count
328*38fd1498Szrj - !parent->in_tied_task;
329*38fd1498Szrj if ((unsigned long) do_wake > num_tasks)
330*38fd1498Szrj do_wake = num_tasks;
331*38fd1498Szrj }
332*38fd1498Szrj else
333*38fd1498Szrj do_wake = 0;
334*38fd1498Szrj gomp_mutex_unlock (&team->task_lock);
335*38fd1498Szrj if (do_wake)
336*38fd1498Szrj gomp_team_barrier_wake (&team->barrier, do_wake);
337*38fd1498Szrj }
338*38fd1498Szrj if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
339*38fd1498Szrj ialias_call (GOMP_taskgroup_end) ();
340*38fd1498Szrj }
341