1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file handles the taskloop construct. It is included twice, once 27 for the long and once for unsigned long long variant. */ 28 29 /* Called when encountering an explicit task directive. If IF_CLAUSE is 30 false, then we must not delay in executing the task. If UNTIED is true, 31 then the task may be executed by any member of the team. */ 32 33 void 34 GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), 35 long arg_size, long arg_align, unsigned flags, 36 unsigned long num_tasks, int priority, 37 TYPE start, TYPE end, TYPE step) 38 { 39 struct gomp_thread *thr = gomp_thread (); 40 struct gomp_team *team = thr->ts.team; 41 42 #ifdef HAVE_BROKEN_POSIX_SEMAPHORES 43 /* If pthread_mutex_* is used for omp_*lock*, then each task must be 44 tied to one thread all the time. This means UNTIED tasks must be 45 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN 46 might be running on different thread than FN. */ 47 if (cpyfn) 48 flags &= ~GOMP_TASK_FLAG_IF; 49 flags &= ~GOMP_TASK_FLAG_UNTIED; 50 #endif 51 52 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 53 if (team && gomp_team_barrier_cancelled (&team->barrier)) 54 { 55 early_return: 56 if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION)) 57 == GOMP_TASK_FLAG_REDUCTION) 58 { 59 struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; 60 uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; 61 /* Tell callers GOMP_taskgroup_reduction_register has not been 62 called. */ 63 ptr[2] = 0; 64 } 65 return; 66 } 67 68 #ifdef TYPE_is_long 69 TYPE s = step; 70 if (step > 0) 71 { 72 if (start >= end) 73 goto early_return; 74 s--; 75 } 76 else 77 { 78 if (start <= end) 79 goto early_return; 80 s++; 81 } 82 UTYPE n = (end - start + s) / step; 83 #else 84 UTYPE n; 85 if (flags & GOMP_TASK_FLAG_UP) 86 { 87 if (start >= end) 88 goto early_return; 89 n = (end - start + step - 1) / step; 90 } 91 else 92 { 93 if (start <= end) 94 goto early_return; 95 n = (start - end - step - 1) / -step; 96 } 97 #endif 98 99 TYPE task_step = step; 100 TYPE nfirst_task_step = step; 101 unsigned long nfirst = n; 102 if (flags & GOMP_TASK_FLAG_GRAINSIZE) 103 { 104 unsigned long grainsize = num_tasks; 105 #ifdef TYPE_is_long 106 num_tasks = n / grainsize; 107 #else 108 UTYPE ndiv = n / grainsize; 109 num_tasks = ndiv; 110 if (num_tasks != ndiv) 111 num_tasks = ~0UL; 112 #endif 113 if ((flags & GOMP_TASK_FLAG_STRICT) 114 && num_tasks != ~0ULL) 115 { 116 UTYPE mod = n % grainsize; 117 task_step = (TYPE) grainsize * step; 118 if (mod) 119 { 120 num_tasks++; 121 nfirst_task_step = (TYPE) mod * step; 122 if (num_tasks == 1) 123 task_step = nfirst_task_step; 124 else 125 nfirst = num_tasks - 2; 126 } 127 } 128 else if (num_tasks <= 1) 129 { 130 num_tasks = 1; 131 task_step = end - start; 132 } 133 else if (num_tasks >= grainsize 134 #ifndef TYPE_is_long 135 && num_tasks != ~0UL 136 #endif 137 ) 138 { 139 UTYPE mul = num_tasks * grainsize; 140 task_step = (TYPE) grainsize * step; 141 if (mul != n) 142 { 143 nfirst_task_step = task_step; 144 task_step += step; 145 nfirst = n - mul - 1; 146 } 147 } 148 else 149 { 150 UTYPE div = n / num_tasks; 151 UTYPE mod = n % num_tasks; 152 task_step = (TYPE) div * step; 153 if (mod) 154 { 155 nfirst_task_step = task_step; 156 task_step += step; 157 nfirst = mod - 1; 158 } 159 } 160 } 161 else 162 { 163 if (num_tasks == 0) 164 num_tasks = team ? team->nthreads : 1; 165 if (num_tasks >= n) 166 num_tasks = n; 167 else 168 { 169 UTYPE div = n / num_tasks; 170 UTYPE mod = n % num_tasks; 171 task_step = (TYPE) div * step; 172 if (mod) 173 { 174 nfirst_task_step = task_step; 175 task_step += step; 176 nfirst = mod - 1; 177 } 178 } 179 } 180 181 if (flags & GOMP_TASK_FLAG_NOGROUP) 182 { 183 if (__builtin_expect (gomp_cancel_var, 0) 184 && thr->task 185 && thr->task->taskgroup) 186 { 187 if (thr->task->taskgroup->cancelled) 188 return; 189 if (thr->task->taskgroup->workshare 190 && thr->task->taskgroup->prev 191 && thr->task->taskgroup->prev->cancelled) 192 return; 193 } 194 } 195 else 196 { 197 ialias_call (GOMP_taskgroup_start) (); 198 if (flags & GOMP_TASK_FLAG_REDUCTION) 199 { 200 struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; }; 201 uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr; 202 ialias_call (GOMP_taskgroup_reduction_register) (ptr); 203 } 204 } 205 206 if (priority > gomp_max_task_priority_var) 207 priority = gomp_max_task_priority_var; 208 209 if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL 210 || (thr->task && thr->task->final_task) 211 || team->task_count + num_tasks > 64 * team->nthreads) 212 { 213 unsigned long i; 214 if (__builtin_expect (cpyfn != NULL, 0)) 215 { 216 struct gomp_task task[num_tasks]; 217 struct gomp_task *parent = thr->task; 218 arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1); 219 char buf[num_tasks * arg_size + arg_align - 1]; 220 char *arg = (char *) (((uintptr_t) buf + arg_align - 1) 221 & ~(uintptr_t) (arg_align - 1)); 222 char *orig_arg = arg; 223 for (i = 0; i < num_tasks; i++) 224 { 225 gomp_init_task (&task[i], parent, gomp_icv (false)); 226 task[i].priority = priority; 227 task[i].kind = GOMP_TASK_UNDEFERRED; 228 task[i].final_task = (thr->task && thr->task->final_task) 229 || (flags & GOMP_TASK_FLAG_FINAL); 230 if (thr->task) 231 { 232 task[i].in_tied_task = thr->task->in_tied_task; 233 task[i].taskgroup = thr->task->taskgroup; 234 } 235 thr->task = &task[i]; 236 cpyfn (arg, data); 237 arg += arg_size; 238 } 239 arg = orig_arg; 240 for (i = 0; i < num_tasks; i++) 241 { 242 thr->task = &task[i]; 243 ((TYPE *)arg)[0] = start; 244 start += task_step; 245 ((TYPE *)arg)[1] = start; 246 if (i == nfirst) 247 task_step = nfirst_task_step; 248 fn (arg); 249 arg += arg_size; 250 if (!priority_queue_empty_p (&task[i].children_queue, 251 MEMMODEL_RELAXED)) 252 { 253 gomp_mutex_lock (&team->task_lock); 254 gomp_clear_parent (&task[i].children_queue); 255 gomp_mutex_unlock (&team->task_lock); 256 } 257 gomp_end_task (); 258 } 259 } 260 else 261 for (i = 0; i < num_tasks; i++) 262 { 263 struct gomp_task task; 264 265 gomp_init_task (&task, thr->task, gomp_icv (false)); 266 task.priority = priority; 267 task.kind = GOMP_TASK_UNDEFERRED; 268 task.final_task = (thr->task && thr->task->final_task) 269 || (flags & GOMP_TASK_FLAG_FINAL); 270 if (thr->task) 271 { 272 task.in_tied_task = thr->task->in_tied_task; 273 task.taskgroup = thr->task->taskgroup; 274 } 275 thr->task = &task; 276 ((TYPE *)data)[0] = start; 277 start += task_step; 278 ((TYPE *)data)[1] = start; 279 if (i == nfirst) 280 task_step = nfirst_task_step; 281 fn (data); 282 if (!priority_queue_empty_p (&task.children_queue, 283 MEMMODEL_RELAXED)) 284 { 285 gomp_mutex_lock (&team->task_lock); 286 gomp_clear_parent (&task.children_queue); 287 gomp_mutex_unlock (&team->task_lock); 288 } 289 gomp_end_task (); 290 } 291 } 292 else 293 { 294 struct gomp_task *tasks[num_tasks]; 295 struct gomp_task *parent = thr->task; 296 struct gomp_taskgroup *taskgroup = parent->taskgroup; 297 char *arg; 298 int do_wake; 299 unsigned long i; 300 301 for (i = 0; i < num_tasks; i++) 302 { 303 struct gomp_task *task 304 = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1); 305 tasks[i] = task; 306 arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) 307 & ~(uintptr_t) (arg_align - 1)); 308 gomp_init_task (task, parent, gomp_icv (false)); 309 task->priority = priority; 310 task->kind = GOMP_TASK_UNDEFERRED; 311 task->in_tied_task = parent->in_tied_task; 312 task->taskgroup = taskgroup; 313 thr->task = task; 314 if (cpyfn) 315 { 316 cpyfn (arg, data); 317 task->copy_ctors_done = true; 318 } 319 else 320 memcpy (arg, data, arg_size); 321 ((TYPE *)arg)[0] = start; 322 start += task_step; 323 ((TYPE *)arg)[1] = start; 324 if (i == nfirst) 325 task_step = nfirst_task_step; 326 thr->task = parent; 327 task->kind = GOMP_TASK_WAITING; 328 task->fn = fn; 329 task->fn_data = arg; 330 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; 331 } 332 gomp_mutex_lock (&team->task_lock); 333 /* If parallel or taskgroup has been cancelled, don't start new 334 tasks. */ 335 if (__builtin_expect (gomp_cancel_var, 0) 336 && cpyfn == NULL) 337 { 338 if (gomp_team_barrier_cancelled (&team->barrier)) 339 { 340 do_cancel: 341 gomp_mutex_unlock (&team->task_lock); 342 for (i = 0; i < num_tasks; i++) 343 { 344 gomp_finish_task (tasks[i]); 345 free (tasks[i]); 346 } 347 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 348 ialias_call (GOMP_taskgroup_end) (); 349 return; 350 } 351 if (taskgroup) 352 { 353 if (taskgroup->cancelled) 354 goto do_cancel; 355 if (taskgroup->workshare 356 && taskgroup->prev 357 && taskgroup->prev->cancelled) 358 goto do_cancel; 359 } 360 } 361 if (taskgroup) 362 taskgroup->num_children += num_tasks; 363 for (i = 0; i < num_tasks; i++) 364 { 365 struct gomp_task *task = tasks[i]; 366 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, 367 task, priority, 368 PRIORITY_INSERT_BEGIN, 369 /*last_parent_depends_on=*/false, 370 task->parent_depends_on); 371 if (taskgroup) 372 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 373 task, priority, PRIORITY_INSERT_BEGIN, 374 /*last_parent_depends_on=*/false, 375 task->parent_depends_on); 376 priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, 377 PRIORITY_INSERT_END, 378 /*last_parent_depends_on=*/false, 379 task->parent_depends_on); 380 ++team->task_count; 381 ++team->task_queued_count; 382 } 383 gomp_team_barrier_set_task_pending (&team->barrier); 384 if (team->task_running_count + !parent->in_tied_task 385 < team->nthreads) 386 { 387 do_wake = team->nthreads - team->task_running_count 388 - !parent->in_tied_task; 389 if ((unsigned long) do_wake > num_tasks) 390 do_wake = num_tasks; 391 } 392 else 393 do_wake = 0; 394 gomp_mutex_unlock (&team->task_lock); 395 if (do_wake) 396 gomp_team_barrier_wake (&team->barrier, do_wake); 397 } 398 if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0) 399 ialias_call (GOMP_taskgroup_end) (); 400 } 401