xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/taskloop.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1*8feb0f0bSmrg /* Copyright (C) 2015-2020 Free Software Foundation, Inc.
21debfc3dSmrg    Contributed by Jakub Jelinek <jakub@redhat.com>.
31debfc3dSmrg 
41debfc3dSmrg    This file is part of the GNU Offloading and Multi Processing Library
51debfc3dSmrg    (libgomp).
61debfc3dSmrg 
71debfc3dSmrg    Libgomp is free software; you can redistribute it and/or modify it
81debfc3dSmrg    under the terms of the GNU General Public License as published by
91debfc3dSmrg    the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg    any later version.
111debfc3dSmrg 
121debfc3dSmrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
131debfc3dSmrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
141debfc3dSmrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
151debfc3dSmrg    more details.
161debfc3dSmrg 
171debfc3dSmrg    Under Section 7 of GPL version 3, you are granted additional
181debfc3dSmrg    permissions described in the GCC Runtime Library Exception, version
191debfc3dSmrg    3.1, as published by the Free Software Foundation.
201debfc3dSmrg 
211debfc3dSmrg    You should have received a copy of the GNU General Public License and
221debfc3dSmrg    a copy of the GCC Runtime Library Exception along with this program;
231debfc3dSmrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
241debfc3dSmrg    <http://www.gnu.org/licenses/>.  */
251debfc3dSmrg 
261debfc3dSmrg /* This file handles the taskloop construct.  It is included twice, once
271debfc3dSmrg    for the long and once for unsigned long long variant.  */
281debfc3dSmrg 
291debfc3dSmrg /* Called when encountering an explicit task directive.  If IF_CLAUSE is
301debfc3dSmrg    false, then we must not delay in executing the task.  If UNTIED is true,
311debfc3dSmrg    then the task may be executed by any member of the team.  */
321debfc3dSmrg 
331debfc3dSmrg void
GOMP_taskloop(void (* fn)(void *),void * data,void (* cpyfn)(void *,void *),long arg_size,long arg_align,unsigned flags,unsigned long num_tasks,int priority,TYPE start,TYPE end,TYPE step)341debfc3dSmrg GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
351debfc3dSmrg 	       long arg_size, long arg_align, unsigned flags,
361debfc3dSmrg 	       unsigned long num_tasks, int priority,
371debfc3dSmrg 	       TYPE start, TYPE end, TYPE step)
381debfc3dSmrg {
391debfc3dSmrg   struct gomp_thread *thr = gomp_thread ();
401debfc3dSmrg   struct gomp_team *team = thr->ts.team;
411debfc3dSmrg 
421debfc3dSmrg #ifdef HAVE_BROKEN_POSIX_SEMAPHORES
431debfc3dSmrg   /* If pthread_mutex_* is used for omp_*lock*, then each task must be
441debfc3dSmrg      tied to one thread all the time.  This means UNTIED tasks must be
451debfc3dSmrg      tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
461debfc3dSmrg      might be running on different thread than FN.  */
471debfc3dSmrg   if (cpyfn)
481debfc3dSmrg     flags &= ~GOMP_TASK_FLAG_IF;
491debfc3dSmrg   flags &= ~GOMP_TASK_FLAG_UNTIED;
501debfc3dSmrg #endif
511debfc3dSmrg 
521debfc3dSmrg   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
531debfc3dSmrg   if (team && gomp_team_barrier_cancelled (&team->barrier))
54*8feb0f0bSmrg     {
55*8feb0f0bSmrg     early_return:
56*8feb0f0bSmrg       if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57*8feb0f0bSmrg 	  == GOMP_TASK_FLAG_REDUCTION)
58*8feb0f0bSmrg 	{
59*8feb0f0bSmrg 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60*8feb0f0bSmrg 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61*8feb0f0bSmrg 	  /* Tell callers GOMP_taskgroup_reduction_register has not been
62*8feb0f0bSmrg 	     called.  */
63*8feb0f0bSmrg 	  ptr[2] = 0;
64*8feb0f0bSmrg 	}
651debfc3dSmrg       return;
66*8feb0f0bSmrg     }
671debfc3dSmrg 
681debfc3dSmrg #ifdef TYPE_is_long
691debfc3dSmrg   TYPE s = step;
701debfc3dSmrg   if (step > 0)
711debfc3dSmrg     {
721debfc3dSmrg       if (start >= end)
73*8feb0f0bSmrg 	goto early_return;
741debfc3dSmrg       s--;
751debfc3dSmrg     }
761debfc3dSmrg   else
771debfc3dSmrg     {
781debfc3dSmrg       if (start <= end)
79*8feb0f0bSmrg 	goto early_return;
801debfc3dSmrg       s++;
811debfc3dSmrg     }
821debfc3dSmrg   UTYPE n = (end - start + s) / step;
831debfc3dSmrg #else
841debfc3dSmrg   UTYPE n;
851debfc3dSmrg   if (flags & GOMP_TASK_FLAG_UP)
861debfc3dSmrg     {
871debfc3dSmrg       if (start >= end)
88*8feb0f0bSmrg 	goto early_return;
891debfc3dSmrg       n = (end - start + step - 1) / step;
901debfc3dSmrg     }
911debfc3dSmrg   else
921debfc3dSmrg     {
931debfc3dSmrg       if (start <= end)
94*8feb0f0bSmrg 	goto early_return;
951debfc3dSmrg       n = (start - end - step - 1) / -step;
961debfc3dSmrg     }
971debfc3dSmrg #endif
981debfc3dSmrg 
991debfc3dSmrg   TYPE task_step = step;
1001debfc3dSmrg   unsigned long nfirst = n;
1011debfc3dSmrg   if (flags & GOMP_TASK_FLAG_GRAINSIZE)
1021debfc3dSmrg     {
1031debfc3dSmrg       unsigned long grainsize = num_tasks;
1041debfc3dSmrg #ifdef TYPE_is_long
1051debfc3dSmrg       num_tasks = n / grainsize;
1061debfc3dSmrg #else
1071debfc3dSmrg       UTYPE ndiv = n / grainsize;
1081debfc3dSmrg       num_tasks = ndiv;
1091debfc3dSmrg       if (num_tasks != ndiv)
1101debfc3dSmrg 	num_tasks = ~0UL;
1111debfc3dSmrg #endif
1121debfc3dSmrg       if (num_tasks <= 1)
1131debfc3dSmrg 	{
1141debfc3dSmrg 	  num_tasks = 1;
1151debfc3dSmrg 	  task_step = end - start;
1161debfc3dSmrg 	}
1171debfc3dSmrg       else if (num_tasks >= grainsize
1181debfc3dSmrg #ifndef TYPE_is_long
1191debfc3dSmrg 	       && num_tasks != ~0UL
1201debfc3dSmrg #endif
1211debfc3dSmrg 	      )
1221debfc3dSmrg 	{
1231debfc3dSmrg 	  UTYPE mul = num_tasks * grainsize;
1241debfc3dSmrg 	  task_step = (TYPE) grainsize * step;
1251debfc3dSmrg 	  if (mul != n)
1261debfc3dSmrg 	    {
1271debfc3dSmrg 	      task_step += step;
1281debfc3dSmrg 	      nfirst = n - mul - 1;
1291debfc3dSmrg 	    }
1301debfc3dSmrg 	}
1311debfc3dSmrg       else
1321debfc3dSmrg 	{
1331debfc3dSmrg 	  UTYPE div = n / num_tasks;
1341debfc3dSmrg 	  UTYPE mod = n % num_tasks;
1351debfc3dSmrg 	  task_step = (TYPE) div * step;
1361debfc3dSmrg 	  if (mod)
1371debfc3dSmrg 	    {
1381debfc3dSmrg 	      task_step += step;
1391debfc3dSmrg 	      nfirst = mod - 1;
1401debfc3dSmrg 	    }
1411debfc3dSmrg 	}
1421debfc3dSmrg     }
1431debfc3dSmrg   else
1441debfc3dSmrg     {
1451debfc3dSmrg       if (num_tasks == 0)
1461debfc3dSmrg 	num_tasks = team ? team->nthreads : 1;
1471debfc3dSmrg       if (num_tasks >= n)
1481debfc3dSmrg 	num_tasks = n;
1491debfc3dSmrg       else
1501debfc3dSmrg 	{
1511debfc3dSmrg 	  UTYPE div = n / num_tasks;
1521debfc3dSmrg 	  UTYPE mod = n % num_tasks;
1531debfc3dSmrg 	  task_step = (TYPE) div * step;
1541debfc3dSmrg 	  if (mod)
1551debfc3dSmrg 	    {
1561debfc3dSmrg 	      task_step += step;
1571debfc3dSmrg 	      nfirst = mod - 1;
1581debfc3dSmrg 	    }
1591debfc3dSmrg 	}
1601debfc3dSmrg     }
1611debfc3dSmrg 
1621debfc3dSmrg   if (flags & GOMP_TASK_FLAG_NOGROUP)
1631debfc3dSmrg     {
164c0a68be4Smrg       if (__builtin_expect (gomp_cancel_var, 0)
165c0a68be4Smrg 	  && thr->task
166c0a68be4Smrg 	  && thr->task->taskgroup)
167c0a68be4Smrg 	{
168c0a68be4Smrg 	  if (thr->task->taskgroup->cancelled)
169c0a68be4Smrg 	    return;
170c0a68be4Smrg 	  if (thr->task->taskgroup->workshare
171c0a68be4Smrg 	      && thr->task->taskgroup->prev
172c0a68be4Smrg 	      && thr->task->taskgroup->prev->cancelled)
1731debfc3dSmrg 	    return;
1741debfc3dSmrg 	}
175c0a68be4Smrg     }
1761debfc3dSmrg   else
177c0a68be4Smrg     {
1781debfc3dSmrg       ialias_call (GOMP_taskgroup_start) ();
179c0a68be4Smrg       if (flags & GOMP_TASK_FLAG_REDUCTION)
180c0a68be4Smrg 	{
181c0a68be4Smrg 	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
182c0a68be4Smrg 	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
183c0a68be4Smrg 	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
184c0a68be4Smrg 	}
185c0a68be4Smrg     }
1861debfc3dSmrg 
1871debfc3dSmrg   if (priority > gomp_max_task_priority_var)
1881debfc3dSmrg     priority = gomp_max_task_priority_var;
1891debfc3dSmrg 
1901debfc3dSmrg   if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
1911debfc3dSmrg       || (thr->task && thr->task->final_task)
1921debfc3dSmrg       || team->task_count + num_tasks > 64 * team->nthreads)
1931debfc3dSmrg     {
1941debfc3dSmrg       unsigned long i;
1951debfc3dSmrg       if (__builtin_expect (cpyfn != NULL, 0))
1961debfc3dSmrg 	{
1971debfc3dSmrg 	  struct gomp_task task[num_tasks];
1981debfc3dSmrg 	  struct gomp_task *parent = thr->task;
1991debfc3dSmrg 	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
2001debfc3dSmrg 	  char buf[num_tasks * arg_size + arg_align - 1];
2011debfc3dSmrg 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
2021debfc3dSmrg 				& ~(uintptr_t) (arg_align - 1));
2031debfc3dSmrg 	  char *orig_arg = arg;
2041debfc3dSmrg 	  for (i = 0; i < num_tasks; i++)
2051debfc3dSmrg 	    {
2061debfc3dSmrg 	      gomp_init_task (&task[i], parent, gomp_icv (false));
2071debfc3dSmrg 	      task[i].priority = priority;
2081debfc3dSmrg 	      task[i].kind = GOMP_TASK_UNDEFERRED;
2091debfc3dSmrg 	      task[i].final_task = (thr->task && thr->task->final_task)
2101debfc3dSmrg 				   || (flags & GOMP_TASK_FLAG_FINAL);
2111debfc3dSmrg 	      if (thr->task)
2121debfc3dSmrg 		{
2131debfc3dSmrg 		  task[i].in_tied_task = thr->task->in_tied_task;
2141debfc3dSmrg 		  task[i].taskgroup = thr->task->taskgroup;
2151debfc3dSmrg 		}
2161debfc3dSmrg 	      thr->task = &task[i];
2171debfc3dSmrg 	      cpyfn (arg, data);
2181debfc3dSmrg 	      arg += arg_size;
2191debfc3dSmrg 	    }
2201debfc3dSmrg 	  arg = orig_arg;
2211debfc3dSmrg 	  for (i = 0; i < num_tasks; i++)
2221debfc3dSmrg 	    {
2231debfc3dSmrg 	      thr->task = &task[i];
2241debfc3dSmrg 	      ((TYPE *)arg)[0] = start;
2251debfc3dSmrg 	      start += task_step;
2261debfc3dSmrg 	      ((TYPE *)arg)[1] = start;
2271debfc3dSmrg 	      if (i == nfirst)
2281debfc3dSmrg 		task_step -= step;
2291debfc3dSmrg 	      fn (arg);
2301debfc3dSmrg 	      arg += arg_size;
2311debfc3dSmrg 	      if (!priority_queue_empty_p (&task[i].children_queue,
2321debfc3dSmrg 					   MEMMODEL_RELAXED))
2331debfc3dSmrg 		{
2341debfc3dSmrg 		  gomp_mutex_lock (&team->task_lock);
2351debfc3dSmrg 		  gomp_clear_parent (&task[i].children_queue);
2361debfc3dSmrg 		  gomp_mutex_unlock (&team->task_lock);
2371debfc3dSmrg 		}
2381debfc3dSmrg 	      gomp_end_task ();
2391debfc3dSmrg 	    }
2401debfc3dSmrg 	}
2411debfc3dSmrg       else
2421debfc3dSmrg 	for (i = 0; i < num_tasks; i++)
2431debfc3dSmrg 	  {
2441debfc3dSmrg 	    struct gomp_task task;
2451debfc3dSmrg 
2461debfc3dSmrg 	    gomp_init_task (&task, thr->task, gomp_icv (false));
2471debfc3dSmrg 	    task.priority = priority;
2481debfc3dSmrg 	    task.kind = GOMP_TASK_UNDEFERRED;
2491debfc3dSmrg 	    task.final_task = (thr->task && thr->task->final_task)
2501debfc3dSmrg 			      || (flags & GOMP_TASK_FLAG_FINAL);
2511debfc3dSmrg 	    if (thr->task)
2521debfc3dSmrg 	      {
2531debfc3dSmrg 		task.in_tied_task = thr->task->in_tied_task;
2541debfc3dSmrg 		task.taskgroup = thr->task->taskgroup;
2551debfc3dSmrg 	      }
2561debfc3dSmrg 	    thr->task = &task;
2571debfc3dSmrg 	    ((TYPE *)data)[0] = start;
2581debfc3dSmrg 	    start += task_step;
2591debfc3dSmrg 	    ((TYPE *)data)[1] = start;
2601debfc3dSmrg 	    if (i == nfirst)
2611debfc3dSmrg 	      task_step -= step;
2621debfc3dSmrg 	    fn (data);
2631debfc3dSmrg 	    if (!priority_queue_empty_p (&task.children_queue,
2641debfc3dSmrg 					 MEMMODEL_RELAXED))
2651debfc3dSmrg 	      {
2661debfc3dSmrg 		gomp_mutex_lock (&team->task_lock);
2671debfc3dSmrg 		gomp_clear_parent (&task.children_queue);
2681debfc3dSmrg 		gomp_mutex_unlock (&team->task_lock);
2691debfc3dSmrg 	      }
2701debfc3dSmrg 	    gomp_end_task ();
2711debfc3dSmrg 	  }
2721debfc3dSmrg     }
2731debfc3dSmrg   else
2741debfc3dSmrg     {
2751debfc3dSmrg       struct gomp_task *tasks[num_tasks];
2761debfc3dSmrg       struct gomp_task *parent = thr->task;
2771debfc3dSmrg       struct gomp_taskgroup *taskgroup = parent->taskgroup;
2781debfc3dSmrg       char *arg;
2791debfc3dSmrg       int do_wake;
2801debfc3dSmrg       unsigned long i;
2811debfc3dSmrg 
2821debfc3dSmrg       for (i = 0; i < num_tasks; i++)
2831debfc3dSmrg 	{
2841debfc3dSmrg 	  struct gomp_task *task
2851debfc3dSmrg 	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
2861debfc3dSmrg 	  tasks[i] = task;
2871debfc3dSmrg 	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
2881debfc3dSmrg 			  & ~(uintptr_t) (arg_align - 1));
2891debfc3dSmrg 	  gomp_init_task (task, parent, gomp_icv (false));
2901debfc3dSmrg 	  task->priority = priority;
2911debfc3dSmrg 	  task->kind = GOMP_TASK_UNDEFERRED;
2921debfc3dSmrg 	  task->in_tied_task = parent->in_tied_task;
2931debfc3dSmrg 	  task->taskgroup = taskgroup;
2941debfc3dSmrg 	  thr->task = task;
2951debfc3dSmrg 	  if (cpyfn)
2961debfc3dSmrg 	    {
2971debfc3dSmrg 	      cpyfn (arg, data);
2981debfc3dSmrg 	      task->copy_ctors_done = true;
2991debfc3dSmrg 	    }
3001debfc3dSmrg 	  else
3011debfc3dSmrg 	    memcpy (arg, data, arg_size);
3021debfc3dSmrg 	  ((TYPE *)arg)[0] = start;
3031debfc3dSmrg 	  start += task_step;
3041debfc3dSmrg 	  ((TYPE *)arg)[1] = start;
3051debfc3dSmrg 	  if (i == nfirst)
3061debfc3dSmrg 	    task_step -= step;
3071debfc3dSmrg 	  thr->task = parent;
3081debfc3dSmrg 	  task->kind = GOMP_TASK_WAITING;
3091debfc3dSmrg 	  task->fn = fn;
3101debfc3dSmrg 	  task->fn_data = arg;
3111debfc3dSmrg 	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
3121debfc3dSmrg 	}
3131debfc3dSmrg       gomp_mutex_lock (&team->task_lock);
3141debfc3dSmrg       /* If parallel or taskgroup has been cancelled, don't start new
3151debfc3dSmrg 	 tasks.  */
316c0a68be4Smrg       if (__builtin_expect (gomp_cancel_var, 0)
317c0a68be4Smrg 	  && cpyfn == NULL)
3181debfc3dSmrg 	{
319c0a68be4Smrg 	  if (gomp_team_barrier_cancelled (&team->barrier))
320c0a68be4Smrg 	    {
321c0a68be4Smrg 	    do_cancel:
3221debfc3dSmrg 	      gomp_mutex_unlock (&team->task_lock);
3231debfc3dSmrg 	      for (i = 0; i < num_tasks; i++)
3241debfc3dSmrg 		{
3251debfc3dSmrg 		  gomp_finish_task (tasks[i]);
3261debfc3dSmrg 		  free (tasks[i]);
3271debfc3dSmrg 		}
3281debfc3dSmrg 	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
3291debfc3dSmrg 		ialias_call (GOMP_taskgroup_end) ();
3301debfc3dSmrg 	      return;
3311debfc3dSmrg 	    }
3321debfc3dSmrg 	  if (taskgroup)
333c0a68be4Smrg 	    {
334c0a68be4Smrg 	      if (taskgroup->cancelled)
335c0a68be4Smrg 		goto do_cancel;
336c0a68be4Smrg 	      if (taskgroup->workshare
337c0a68be4Smrg 		  && taskgroup->prev
338c0a68be4Smrg 		  && taskgroup->prev->cancelled)
339c0a68be4Smrg 		goto do_cancel;
340c0a68be4Smrg 	    }
341c0a68be4Smrg 	}
342c0a68be4Smrg       if (taskgroup)
3431debfc3dSmrg 	taskgroup->num_children += num_tasks;
3441debfc3dSmrg       for (i = 0; i < num_tasks; i++)
3451debfc3dSmrg 	{
3461debfc3dSmrg 	  struct gomp_task *task = tasks[i];
3471debfc3dSmrg 	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
3481debfc3dSmrg 				 task, priority,
3491debfc3dSmrg 				 PRIORITY_INSERT_BEGIN,
3501debfc3dSmrg 				 /*last_parent_depends_on=*/false,
3511debfc3dSmrg 				 task->parent_depends_on);
3521debfc3dSmrg 	  if (taskgroup)
3531debfc3dSmrg 	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
3541debfc3dSmrg 				   task, priority, PRIORITY_INSERT_BEGIN,
3551debfc3dSmrg 				   /*last_parent_depends_on=*/false,
3561debfc3dSmrg 				   task->parent_depends_on);
3571debfc3dSmrg 	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
3581debfc3dSmrg 				 PRIORITY_INSERT_END,
3591debfc3dSmrg 				 /*last_parent_depends_on=*/false,
3601debfc3dSmrg 				 task->parent_depends_on);
3611debfc3dSmrg 	  ++team->task_count;
3621debfc3dSmrg 	  ++team->task_queued_count;
3631debfc3dSmrg 	}
3641debfc3dSmrg       gomp_team_barrier_set_task_pending (&team->barrier);
3651debfc3dSmrg       if (team->task_running_count + !parent->in_tied_task
3661debfc3dSmrg 	  < team->nthreads)
3671debfc3dSmrg 	{
3681debfc3dSmrg 	  do_wake = team->nthreads - team->task_running_count
3691debfc3dSmrg 		    - !parent->in_tied_task;
3701debfc3dSmrg 	  if ((unsigned long) do_wake > num_tasks)
3711debfc3dSmrg 	    do_wake = num_tasks;
3721debfc3dSmrg 	}
3731debfc3dSmrg       else
3741debfc3dSmrg 	do_wake = 0;
3751debfc3dSmrg       gomp_mutex_unlock (&team->task_lock);
3761debfc3dSmrg       if (do_wake)
3771debfc3dSmrg 	gomp_team_barrier_wake (&team->barrier, do_wake);
3781debfc3dSmrg     }
3791debfc3dSmrg   if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
3801debfc3dSmrg     ialias_call (GOMP_taskgroup_end) ();
3811debfc3dSmrg }
382