1 /* Copyright (C) 2015-2019 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file handles the taskloop construct. It is included twice, once 27 for the long and once for unsigned long long variant. */ 28 29 /* Called when encountering an explicit task directive. If IF_CLAUSE is 30 false, then we must not delay in executing the task. If UNTIED is true, 31 then the task may be executed by any member of the team. */ 32 33 void 34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), 35 long arg_size, long arg_align, unsigned flags, 36 unsigned long num_tasks, int priority, 37 TYPE start, TYPE end, TYPE step) 38 { 39 struct gomp_thread *thr = gomp_thread (); 40 struct gomp_team *team = thr->ts.team; 41 42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES 43 /* If pthread_mutex_* is used for omp_*lock*, then each task must be 44 tied to one thread all the time. This means UNTIED tasks must be 45 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN 46 might be running on different thread than FN. */ 47 if (cpyfn) 48 flags &= ~GOMP_TASK_FLAG_IF; 49 flags &= ~GOMP_TASK_FLAG_UNTIED; 50 #endif 51 52 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 53 if (team && gomp_team_barrier_cancelled (&team->barrier)) 54 return; 55 56 #ifdef TYPE_is_long 57 TYPE s = step; 58 if (step > 0) 59 { 60 if (start >= end) 61 return; 62 s--; 63 } 64 else 65 { 66 if (start <= end) 67 return; 68 s++; 69 } 70 UTYPE n = (end - start + s) / step; 71 #else 72 UTYPE n; 73 if (flags & GOMP_TASK_FLAG_UP) 74 { 75 if (start >= end) 76 return; 77 n = (end - start + step - 1) / step; 78 } 79 else 80 { 81 if (start <= end) 82 return; 83 n = (start - end - step - 1) / -step; 84 } 85 #endif 86 87 TYPE task_step = step; 88 unsigned long nfirst = n; 89 if (flags & GOMP_TASK_FLAG_GRAINSIZE) 90 { 91 unsigned long grainsize = num_tasks; 92 #ifdef TYPE_is_long 93 num_tasks = n / grainsize; 94 #else 95 UTYPE ndiv = n / grainsize; 96 num_tasks = ndiv; 97 if (num_tasks != ndiv) 98 num_tasks = ~0UL; 99 #endif 100 if (num_tasks <= 1) 101 { 102 num_tasks = 1; 103 task_step = end - start; 104 } 105 else if (num_tasks >= grainsize 106 #ifndef TYPE_is_long 107 && num_tasks != ~0UL 108 #endif 109 ) 110 { 111 UTYPE mul = num_tasks * grainsize; 112 task_step = (TYPE) grainsize * step; 113 if (mul != n) 114 { 115 task_step += step; 116 nfirst = n - mul - 1; 117 } 118 } 119 else 120 { 121 UTYPE div = n / num_tasks; 122 UTYPE mod = n % num_tasks; 123 task_step = (TYPE) div * step; 124 if (mod) 125 { 126 task_step += step; 127 nfirst = mod - 1; 128 } 129 } 130 } 131 else 132 { 133 if (num_tasks == 0) 134 num_tasks = team ? team->nthreads : 1; 135 if (num_tasks >= n) 136 num_tasks = n; 137 else 138 { 139 UTYPE div = n / num_tasks; 140 UTYPE mod = n % num_tasks; 141 task_step = (TYPE) div * step; 142 if (mod) 143 { 144 task_step += step; 145 nfirst = mod - 1; 146 } 147 } 148 } 149 150 if (flags & GOMP_TASK_FLAG_NOGROUP) 151 { 152 if (__builtin_expect (gomp_cancel_var, 0) 153 && thr->task 154 && thr->task->taskgroup) 155 { 156 if (thr->task->taskgroup->cancelled) 157 return; 158 if (thr->task->taskgroup->workshare 159 && thr->task->taskgroup->prev 160 && thr->task->taskgroup->prev->cancelled) 161 return; 162 } 163 } 164 else 165 { 166 ialias_call (GOMP_taskgroup_start) (); 167 if (flags & GOMP_TASK_FLAG_REDUCTION) 168 { 169 struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; 170 uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; 171 ialias_call (GOMP_taskgroup_reduction_register) (ptr); 172 } 173 } 174 175 if (priority > gomp_max_task_priority_var) 176 priority = gomp_max_task_priority_var; 177 178 if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL 179 || (thr->task && thr->task->final_task) 180 || team->task_count + num_tasks > 64 * team->nthreads) 181 { 182 unsigned long i; 183 if (__builtin_expect (cpyfn != NULL, 0)) 184 { 185 struct gomp_task task[num_tasks]; 186 struct gomp_task *parent = thr->task; 187 arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); 188 char buf[num_tasks * arg_size + arg_align - 1]; 189 char *arg = (char *) (((uintptr_t) buf + arg_align - 1) 190 & ~(uintptr_t) (arg_align - 1)); 191 char *orig_arg = arg; 192 for (i = 0; i < num_tasks; i++) 193 { 194 gomp_init_task (&task[i], parent, gomp_icv (false)); 195 task[i].priority = priority; 196 task[i].kind = GOMP_TASK_UNDEFERRED; 197 task[i].final_task = (thr->task && thr->task->final_task) 198 || (flags & GOMP_TASK_FLAG_FINAL); 199 if (thr->task) 200 { 201 task[i].in_tied_task = thr->task->in_tied_task; 202 task[i].taskgroup = thr->task->taskgroup; 203 } 204 thr->task = &task[i]; 205 cpyfn (arg, data); 206 arg += arg_size; 207 } 208 arg = orig_arg; 209 for (i = 0; i < num_tasks; i++) 210 { 211 thr->task = &task[i]; 212 ((TYPE *)arg)[0] = start; 213 start += task_step; 214 ((TYPE *)arg)[1] = start; 215 if (i == nfirst) 216 task_step -= step; 217 fn (arg); 218 arg += arg_size; 219 if (!priority_queue_empty_p (&task[i].children_queue, 220 MEMMODEL_RELAXED)) 221 { 222 gomp_mutex_lock (&team->task_lock); 223 gomp_clear_parent (&task[i].children_queue); 224 gomp_mutex_unlock (&team->task_lock); 225 } 226 gomp_end_task (); 227 } 228 } 229 else 230 for (i = 0; i < num_tasks; i++) 231 { 232 struct gomp_task task; 233 234 gomp_init_task (&task, thr->task, gomp_icv (false)); 235 task.priority = priority; 236 task.kind = GOMP_TASK_UNDEFERRED; 237 task.final_task = (thr->task && thr->task->final_task) 238 || (flags & GOMP_TASK_FLAG_FINAL); 239 if (thr->task) 240 { 241 task.in_tied_task = thr->task->in_tied_task; 242 task.taskgroup = thr->task->taskgroup; 243 } 244 thr->task = &task; 245 ((TYPE *)data)[0] = start; 246 start += task_step; 247 ((TYPE *)data)[1] = start; 248 if (i == nfirst) 249 task_step -= step; 250 fn (data); 251 if (!priority_queue_empty_p (&task.children_queue, 252 MEMMODEL_RELAXED)) 253 { 254 gomp_mutex_lock (&team->task_lock); 255 gomp_clear_parent (&task.children_queue); 256 gomp_mutex_unlock (&team->task_lock); 257 } 258 gomp_end_task (); 259 } 260 } 261 else 262 { 263 struct gomp_task *tasks[num_tasks]; 264 struct gomp_task *parent = thr->task; 265 struct gomp_taskgroup *taskgroup = parent->taskgroup; 266 char *arg; 267 int do_wake; 268 unsigned long i; 269 270 for (i = 0; i < num_tasks; i++) 271 { 272 struct gomp_task *task 273 = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); 274 tasks[i] = task; 275 arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) 276 & ~(uintptr_t) (arg_align - 1)); 277 gomp_init_task (task, parent, gomp_icv (false)); 278 task->priority = priority; 279 task->kind = GOMP_TASK_UNDEFERRED; 280 task->in_tied_task = parent->in_tied_task; 281 task->taskgroup = taskgroup; 282 thr->task = task; 283 if (cpyfn) 284 { 285 cpyfn (arg, data); 286 task->copy_ctors_done = true; 287 } 288 else 289 memcpy (arg, data, arg_size); 290 ((TYPE *)arg)[0] = start; 291 start += task_step; 292 ((TYPE *)arg)[1] = start; 293 if (i == nfirst) 294 task_step -= step; 295 thr->task = parent; 296 task->kind = GOMP_TASK_WAITING; 297 task->fn = fn; 298 task->fn_data = arg; 299 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; 300 } 301 gomp_mutex_lock (&team->task_lock); 302 /* If parallel or taskgroup has been cancelled, don't start new 303 tasks. */ 304 if (__builtin_expect (gomp_cancel_var, 0) 305 && cpyfn == NULL) 306 { 307 if (gomp_team_barrier_cancelled (&team->barrier)) 308 { 309 do_cancel: 310 gomp_mutex_unlock (&team->task_lock); 311 for (i = 0; i < num_tasks; i++) 312 { 313 gomp_finish_task (tasks[i]); 314 free (tasks[i]); 315 } 316 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 317 ialias_call (GOMP_taskgroup_end) (); 318 return; 319 } 320 if (taskgroup) 321 { 322 if (taskgroup->cancelled) 323 goto do_cancel; 324 if (taskgroup->workshare 325 && taskgroup->prev 326 && taskgroup->prev->cancelled) 327 goto do_cancel; 328 } 329 } 330 if (taskgroup) 331 taskgroup->num_children += num_tasks; 332 for (i = 0; i < num_tasks; i++) 333 { 334 struct gomp_task *task = tasks[i]; 335 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, 336 task, priority, 337 PRIORITY_INSERT_BEGIN, 338 /*last_parent_depends_on=*/false, 339 task->parent_depends_on); 340 if (taskgroup) 341 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 342 task, priority, PRIORITY_INSERT_BEGIN, 343 /*last_parent_depends_on=*/false, 344 task->parent_depends_on); 345 priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, 346 PRIORITY_INSERT_END, 347 /*last_parent_depends_on=*/false, 348 task->parent_depends_on); 349 ++team->task_count; 350 ++team->task_queued_count; 351 } 352 gomp_team_barrier_set_task_pending (&team->barrier); 353 if (team->task_running_count + !parent->in_tied_task 354 < team->nthreads) 355 { 356 do_wake = team->nthreads - team->task_running_count 357 - !parent->in_tied_task; 358 if ((unsigned long) do_wake > num_tasks) 359 do_wake = num_tasks; 360 } 361 else 362 do_wake = 0; 363 gomp_mutex_unlock (&team->task_lock); 364 if (do_wake) 365 gomp_team_barrier_wake (&team->barrier, do_wake); 366 } 367 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 368 ialias_call (GOMP_taskgroup_end) (); 369 } 370