xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/taskloop.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles the taskloop construct.  It is included twice, once
27    for the long and once for unsigned long long variant.  */
28 
29 /* Called when encountering an explicit task directive.  If IF_CLAUSE is
30    false, then we must not delay in executing the task.  If UNTIED is true,
31    then the task may be executed by any member of the team.  */
32 
33 void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35 	       long arg_size, long arg_align, unsigned flags,
36 	       unsigned long num_tasks, int priority,
37 	       TYPE start, TYPE end, TYPE step)
38 {
39   struct gomp_thread *thr = gomp_thread ();
40   struct gomp_team *team = thr->ts.team;
41 
42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44      tied to one thread all the time.  This means UNTIED tasks must be
45      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46      might be running on different thread than FN.  */
47   if (cpyfn)
48     flags &= ~GOMP_TASK_FLAG_IF;
49   flags &= ~GOMP_TASK_FLAG_UNTIED;
50 #endif
51 
52   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
53   if (team && gomp_team_barrier_cancelled (&team->barrier))
54     {
55     early_return:
56       if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57 	  == GOMP_TASK_FLAG_REDUCTION)
58 	{
59 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61 	  /* Tell callers GOMP_taskgroup_reduction_register has not been
62 	     called.  */
63 	  ptr[2] = 0;
64 	}
65       return;
66     }
67 
68 #ifdef TYPE_is_long
69   TYPE s = step;
70   if (step > 0)
71     {
72       if (start >= end)
73 	goto early_return;
74       s--;
75     }
76   else
77     {
78       if (start <= end)
79 	goto early_return;
80       s++;
81     }
82   UTYPE n = (end - start + s) / step;
83 #else
84   UTYPE n;
85   if (flags & GOMP_TASK_FLAG_UP)
86     {
87       if (start >= end)
88 	goto early_return;
89       n = (end - start + step - 1) / step;
90     }
91   else
92     {
93       if (start <= end)
94 	goto early_return;
95       n = (start - end - step - 1) / -step;
96     }
97 #endif
98 
99   TYPE task_step = step;
100   TYPE nfirst_task_step = step;
101   unsigned long nfirst = n;
102   if (flags & GOMP_TASK_FLAG_GRAINSIZE)
103     {
104       unsigned long grainsize = num_tasks;
105 #ifdef TYPE_is_long
106       num_tasks = n / grainsize;
107 #else
108       UTYPE ndiv = n / grainsize;
109       num_tasks = ndiv;
110       if (num_tasks != ndiv)
111 	num_tasks = ~0UL;
112 #endif
113       if ((flags & GOMP_TASK_FLAG_STRICT)
114 	  && num_tasks != ~0ULL)
115 	{
116 	  UTYPE mod = n % grainsize;
117 	  task_step = (TYPE) grainsize * step;
118 	  if (mod)
119 	    {
120 	      num_tasks++;
121 	      nfirst_task_step = (TYPE) mod * step;
122 	      if (num_tasks == 1)
123 		task_step = nfirst_task_step;
124 	      else
125 		nfirst = num_tasks - 2;
126 	    }
127 	}
128       else if (num_tasks <= 1)
129 	{
130 	  num_tasks = 1;
131 	  task_step = end - start;
132 	}
133       else if (num_tasks >= grainsize
134 #ifndef TYPE_is_long
135 	       && num_tasks != ~0UL
136 #endif
137 	      )
138 	{
139 	  UTYPE mul = num_tasks * grainsize;
140 	  task_step = (TYPE) grainsize * step;
141 	  if (mul != n)
142 	    {
143 	      nfirst_task_step = task_step;
144 	      task_step += step;
145 	      nfirst = n - mul - 1;
146 	    }
147 	}
148       else
149 	{
150 	  UTYPE div = n / num_tasks;
151 	  UTYPE mod = n % num_tasks;
152 	  task_step = (TYPE) div * step;
153 	  if (mod)
154 	    {
155 	      nfirst_task_step = task_step;
156 	      task_step += step;
157 	      nfirst = mod - 1;
158 	    }
159 	}
160     }
161   else
162     {
163       if (num_tasks == 0)
164 	num_tasks = team ? team->nthreads : 1;
165       if (num_tasks >= n)
166 	num_tasks = n;
167       else
168 	{
169 	  UTYPE div = n / num_tasks;
170 	  UTYPE mod = n % num_tasks;
171 	  task_step = (TYPE) div * step;
172 	  if (mod)
173 	    {
174 	      nfirst_task_step = task_step;
175 	      task_step += step;
176 	      nfirst = mod - 1;
177 	    }
178 	}
179     }
180 
181   if (flags & GOMP_TASK_FLAG_NOGROUP)
182     {
183       if (__builtin_expect (gomp_cancel_var, 0)
184 	  && thr->task
185 	  && thr->task->taskgroup)
186 	{
187 	  if (thr->task->taskgroup->cancelled)
188 	    return;
189 	  if (thr->task->taskgroup->workshare
190 	      && thr->task->taskgroup->prev
191 	      && thr->task->taskgroup->prev->cancelled)
192 	    return;
193 	}
194     }
195   else
196     {
197       ialias_call (GOMP_taskgroup_start) ();
198       if (flags & GOMP_TASK_FLAG_REDUCTION)
199 	{
200 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
201 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
202 	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
203 	}
204     }
205 
206   if (priority > gomp_max_task_priority_var)
207     priority = gomp_max_task_priority_var;
208 
209   if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
210       || (thr->task && thr->task->final_task)
211       || team->task_count + num_tasks > 64 * team->nthreads)
212     {
213       unsigned long i;
214       if (__builtin_expect (cpyfn != NULL, 0))
215 	{
216 	  struct gomp_task task[num_tasks];
217 	  struct gomp_task *parent = thr->task;
218 	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
219 	  char buf[num_tasks * arg_size + arg_align - 1];
220 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
221 				& ~(uintptr_t) (arg_align - 1));
222 	  char *orig_arg = arg;
223 	  for (i = 0; i < num_tasks; i++)
224 	    {
225 	      gomp_init_task (&task[i], parent, gomp_icv (false));
226 	      task[i].priority = priority;
227 	      task[i].kind = GOMP_TASK_UNDEFERRED;
228 	      task[i].final_task = (thr->task && thr->task->final_task)
229 				   || (flags & GOMP_TASK_FLAG_FINAL);
230 	      if (thr->task)
231 		{
232 		  task[i].in_tied_task = thr->task->in_tied_task;
233 		  task[i].taskgroup = thr->task->taskgroup;
234 		}
235 	      thr->task = &task[i];
236 	      cpyfn (arg, data);
237 	      arg += arg_size;
238 	    }
239 	  arg = orig_arg;
240 	  for (i = 0; i < num_tasks; i++)
241 	    {
242 	      thr->task = &task[i];
243 	      ((TYPE *)arg)[0] = start;
244 	      start += task_step;
245 	      ((TYPE *)arg)[1] = start;
246 	      if (i == nfirst)
247 		task_step = nfirst_task_step;
248 	      fn (arg);
249 	      arg += arg_size;
250 	      if (!priority_queue_empty_p (&task[i].children_queue,
251 					   MEMMODEL_RELAXED))
252 		{
253 		  gomp_mutex_lock (&team->task_lock);
254 		  gomp_clear_parent (&task[i].children_queue);
255 		  gomp_mutex_unlock (&team->task_lock);
256 		}
257 	      gomp_end_task ();
258 	    }
259 	}
260       else
261 	for (i = 0; i < num_tasks; i++)
262 	  {
263 	    struct gomp_task task;
264 
265 	    gomp_init_task (&task, thr->task, gomp_icv (false));
266 	    task.priority = priority;
267 	    task.kind = GOMP_TASK_UNDEFERRED;
268 	    task.final_task = (thr->task && thr->task->final_task)
269 			      || (flags & GOMP_TASK_FLAG_FINAL);
270 	    if (thr->task)
271 	      {
272 		task.in_tied_task = thr->task->in_tied_task;
273 		task.taskgroup = thr->task->taskgroup;
274 	      }
275 	    thr->task = &task;
276 	    ((TYPE *)data)[0] = start;
277 	    start += task_step;
278 	    ((TYPE *)data)[1] = start;
279 	    if (i == nfirst)
280 	      task_step = nfirst_task_step;
281 	    fn (data);
282 	    if (!priority_queue_empty_p (&task.children_queue,
283 					 MEMMODEL_RELAXED))
284 	      {
285 		gomp_mutex_lock (&team->task_lock);
286 		gomp_clear_parent (&task.children_queue);
287 		gomp_mutex_unlock (&team->task_lock);
288 	      }
289 	    gomp_end_task ();
290 	  }
291     }
292   else
293     {
294       struct gomp_task *tasks[num_tasks];
295       struct gomp_task *parent = thr->task;
296       struct gomp_taskgroup *taskgroup = parent->taskgroup;
297       char *arg;
298       int do_wake;
299       unsigned long i;
300 
301       for (i = 0; i < num_tasks; i++)
302 	{
303 	  struct gomp_task *task
304 	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
305 	  tasks[i] = task;
306 	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
307 			  & ~(uintptr_t) (arg_align - 1));
308 	  gomp_init_task (task, parent, gomp_icv (false));
309 	  task->priority = priority;
310 	  task->kind = GOMP_TASK_UNDEFERRED;
311 	  task->in_tied_task = parent->in_tied_task;
312 	  task->taskgroup = taskgroup;
313 	  thr->task = task;
314 	  if (cpyfn)
315 	    {
316 	      cpyfn (arg, data);
317 	      task->copy_ctors_done = true;
318 	    }
319 	  else
320 	    memcpy (arg, data, arg_size);
321 	  ((TYPE *)arg)[0] = start;
322 	  start += task_step;
323 	  ((TYPE *)arg)[1] = start;
324 	  if (i == nfirst)
325 	    task_step = nfirst_task_step;
326 	  thr->task = parent;
327 	  task->kind = GOMP_TASK_WAITING;
328 	  task->fn = fn;
329 	  task->fn_data = arg;
330 	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
331 	}
332       gomp_mutex_lock (&team->task_lock);
333       /* If parallel or taskgroup has been cancelled, don't start new
334 	 tasks.  */
335       if (__builtin_expect (gomp_cancel_var, 0)
336 	  && cpyfn == NULL)
337 	{
338 	  if (gomp_team_barrier_cancelled (&team->barrier))
339 	    {
340 	    do_cancel:
341 	      gomp_mutex_unlock (&team->task_lock);
342 	      for (i = 0; i < num_tasks; i++)
343 		{
344 		  gomp_finish_task (tasks[i]);
345 		  free (tasks[i]);
346 		}
347 	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
348 		ialias_call (GOMP_taskgroup_end) ();
349 	      return;
350 	    }
351 	  if (taskgroup)
352 	    {
353 	      if (taskgroup->cancelled)
354 		goto do_cancel;
355 	      if (taskgroup->workshare
356 		  && taskgroup->prev
357 		  && taskgroup->prev->cancelled)
358 		goto do_cancel;
359 	    }
360 	}
361       if (taskgroup)
362 	taskgroup->num_children += num_tasks;
363       for (i = 0; i < num_tasks; i++)
364 	{
365 	  struct gomp_task *task = tasks[i];
366 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
367 				 task, priority,
368 				 PRIORITY_INSERT_BEGIN,
369 				 /*last_parent_depends_on=*/false,
370 				 task->parent_depends_on);
371 	  if (taskgroup)
372 	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
373 				   task, priority, PRIORITY_INSERT_BEGIN,
374 				   /*last_parent_depends_on=*/false,
375 				   task->parent_depends_on);
376 	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
377 				 PRIORITY_INSERT_END,
378 				 /*last_parent_depends_on=*/false,
379 				 task->parent_depends_on);
380 	  ++team->task_count;
381 	  ++team->task_queued_count;
382 	}
383       gomp_team_barrier_set_task_pending (&team->barrier);
384       if (team->task_running_count + !parent->in_tied_task
385 	  < team->nthreads)
386 	{
387 	  do_wake = team->nthreads - team->task_running_count
388 		    - !parent->in_tied_task;
389 	  if ((unsigned long) do_wake > num_tasks)
390 	    do_wake = num_tasks;
391 	}
392       else
393 	do_wake = 0;
394       gomp_mutex_unlock (&team->task_lock);
395       if (do_wake)
396 	gomp_team_barrier_wake (&team->barrier, do_wake);
397     }
398   if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
399     ialias_call (GOMP_taskgroup_end) ();
400 }
401