1 /* Copyright (C) 2005-2016 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file handles the maintainence of threads in response to team 27 creation and termination. */ 28 29 #include "libgomp.h" 30 #include "pool.h" 31 #include <stdlib.h> 32 #include <string.h> 33 34 /* This attribute contains PTHREAD_CREATE_DETACHED. */ 35 pthread_attr_t gomp_thread_attr; 36 37 /* This key is for the thread destructor. */ 38 pthread_key_t gomp_thread_destructor; 39 40 41 /* This is the libgomp per-thread data structure. */ 42 #if defined HAVE_TLS || defined USE_EMUTLS 43 __thread struct gomp_thread gomp_tls_data; 44 #else 45 pthread_key_t gomp_tls_key; 46 #endif 47 48 49 /* This structure is used to communicate across pthread_create. */ 50 51 struct gomp_thread_start_data 52 { 53 void (*fn) (void *); 54 void *fn_data; 55 struct gomp_team_state ts; 56 struct gomp_task *task; 57 struct gomp_thread_pool *thread_pool; 58 unsigned int place; 59 bool nested; 60 }; 61 62 63 /* This function is a pthread_create entry point. This contains the idle 64 loop in which a thread waits to be called up to become part of a team. */ 65 66 static void * 67 gomp_thread_start (void *xdata) 68 { 69 struct gomp_thread_start_data *data = xdata; 70 struct gomp_thread *thr; 71 struct gomp_thread_pool *pool; 72 void (*local_fn) (void *); 73 void *local_data; 74 75 #if defined HAVE_TLS || defined USE_EMUTLS 76 thr = &gomp_tls_data; 77 #else 78 struct gomp_thread local_thr; 79 thr = &local_thr; 80 pthread_setspecific (gomp_tls_key, thr); 81 #endif 82 gomp_sem_init (&thr->release, 0); 83 84 /* Extract what we need from data. */ 85 local_fn = data->fn; 86 local_data = data->fn_data; 87 thr->thread_pool = data->thread_pool; 88 thr->ts = data->ts; 89 thr->task = data->task; 90 thr->place = data->place; 91 92 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 93 94 /* Make thread pool local. */ 95 pool = thr->thread_pool; 96 97 if (data->nested) 98 { 99 struct gomp_team *team = thr->ts.team; 100 struct gomp_task *task = thr->task; 101 102 gomp_barrier_wait (&team->barrier); 103 104 local_fn (local_data); 105 gomp_team_barrier_wait_final (&team->barrier); 106 gomp_finish_task (task); 107 gomp_barrier_wait_last (&team->barrier); 108 } 109 else 110 { 111 pool->threads[thr->ts.team_id] = thr; 112 113 gomp_barrier_wait (&pool->threads_dock); 114 do 115 { 116 struct gomp_team *team = thr->ts.team; 117 struct gomp_task *task = thr->task; 118 119 local_fn (local_data); 120 gomp_team_barrier_wait_final (&team->barrier); 121 gomp_finish_task (task); 122 123 gomp_barrier_wait (&pool->threads_dock); 124 125 local_fn = thr->fn; 126 local_data = thr->data; 127 thr->fn = NULL; 128 } 129 while (local_fn); 130 } 131 132 gomp_sem_destroy (&thr->release); 133 thr->thread_pool = NULL; 134 thr->task = NULL; 135 return NULL; 136 } 137 138 static inline struct gomp_team * 139 get_last_team (unsigned nthreads) 140 { 141 struct gomp_thread *thr = gomp_thread (); 142 if (thr->ts.team == NULL) 143 { 144 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads); 145 struct gomp_team *last_team = pool->last_team; 146 if (last_team != NULL && last_team->nthreads == nthreads) 147 { 148 pool->last_team = NULL; 149 return last_team; 150 } 151 } 152 return NULL; 153 } 154 155 /* Create a new team data structure. */ 156 157 struct gomp_team * 158 gomp_new_team (unsigned nthreads) 159 { 160 struct gomp_team *team; 161 int i; 162 163 team = get_last_team (nthreads); 164 if (team == NULL) 165 { 166 size_t extra = sizeof (team->ordered_release[0]) 167 + sizeof (team->implicit_task[0]); 168 team = gomp_malloc (sizeof (*team) + nthreads * extra); 169 170 #ifndef HAVE_SYNC_BUILTINS 171 gomp_mutex_init (&team->work_share_list_free_lock); 172 #endif 173 gomp_barrier_init (&team->barrier, nthreads); 174 gomp_mutex_init (&team->task_lock); 175 176 team->nthreads = nthreads; 177 } 178 179 team->work_share_chunk = 8; 180 #ifdef HAVE_SYNC_BUILTINS 181 team->single_count = 0; 182 #endif 183 team->work_shares_to_free = &team->work_shares[0]; 184 gomp_init_work_share (&team->work_shares[0], false, nthreads); 185 team->work_shares[0].next_alloc = NULL; 186 team->work_share_list_free = NULL; 187 team->work_share_list_alloc = &team->work_shares[1]; 188 for (i = 1; i < 7; i++) 189 team->work_shares[i].next_free = &team->work_shares[i + 1]; 190 team->work_shares[i].next_free = NULL; 191 192 gomp_sem_init (&team->master_release, 0); 193 team->ordered_release = (void *) &team->implicit_task[nthreads]; 194 team->ordered_release[0] = &team->master_release; 195 196 priority_queue_init (&team->task_queue); 197 team->task_count = 0; 198 team->task_queued_count = 0; 199 team->task_running_count = 0; 200 team->work_share_cancelled = 0; 201 team->team_cancelled = 0; 202 203 return team; 204 } 205 206 207 /* Free a team data structure. */ 208 209 static void 210 free_team (struct gomp_team *team) 211 { 212 #ifndef HAVE_SYNC_BUILTINS 213 gomp_mutex_destroy (&team->work_share_list_free_lock); 214 #endif 215 gomp_barrier_destroy (&team->barrier); 216 gomp_mutex_destroy (&team->task_lock); 217 priority_queue_free (&team->task_queue); 218 free (team); 219 } 220 221 static void 222 gomp_free_pool_helper (void *thread_pool) 223 { 224 struct gomp_thread *thr = gomp_thread (); 225 struct gomp_thread_pool *pool 226 = (struct gomp_thread_pool *) thread_pool; 227 gomp_barrier_wait_last (&pool->threads_dock); 228 gomp_sem_destroy (&thr->release); 229 thr->thread_pool = NULL; 230 thr->task = NULL; 231 pthread_exit (NULL); 232 } 233 234 /* Free a thread pool and release its threads. */ 235 236 void 237 gomp_free_thread (void *arg __attribute__((unused))) 238 { 239 struct gomp_thread *thr = gomp_thread (); 240 struct gomp_thread_pool *pool = thr->thread_pool; 241 if (pool) 242 { 243 if (pool->threads_used > 0) 244 { 245 int i; 246 for (i = 1; i < pool->threads_used; i++) 247 { 248 struct gomp_thread *nthr = pool->threads[i]; 249 nthr->fn = gomp_free_pool_helper; 250 nthr->data = pool; 251 } 252 /* This barrier undocks threads docked on pool->threads_dock. */ 253 gomp_barrier_wait (&pool->threads_dock); 254 /* And this waits till all threads have called gomp_barrier_wait_last 255 in gomp_free_pool_helper. */ 256 gomp_barrier_wait (&pool->threads_dock); 257 /* Now it is safe to destroy the barrier and free the pool. */ 258 gomp_barrier_destroy (&pool->threads_dock); 259 260 #ifdef HAVE_SYNC_BUILTINS 261 __sync_fetch_and_add (&gomp_managed_threads, 262 1L - pool->threads_used); 263 #else 264 gomp_mutex_lock (&gomp_managed_threads_lock); 265 gomp_managed_threads -= pool->threads_used - 1L; 266 gomp_mutex_unlock (&gomp_managed_threads_lock); 267 #endif 268 } 269 free (pool->threads); 270 if (pool->last_team) 271 free_team (pool->last_team); 272 free (pool); 273 thr->thread_pool = NULL; 274 } 275 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) 276 gomp_team_end (); 277 if (thr->task != NULL) 278 { 279 struct gomp_task *task = thr->task; 280 gomp_end_task (); 281 free (task); 282 } 283 } 284 285 /* Launch a team. */ 286 287 void 288 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 289 unsigned flags, struct gomp_team *team) 290 { 291 struct gomp_thread_start_data *start_data; 292 struct gomp_thread *thr, *nthr; 293 struct gomp_task *task; 294 struct gomp_task_icv *icv; 295 bool nested; 296 struct gomp_thread_pool *pool; 297 unsigned i, n, old_threads_used = 0; 298 pthread_attr_t thread_attr, *attr; 299 unsigned long nthreads_var; 300 char bind, bind_var; 301 unsigned int s = 0, rest = 0, p = 0, k = 0; 302 unsigned int affinity_count = 0; 303 struct gomp_thread **affinity_thr = NULL; 304 305 thr = gomp_thread (); 306 nested = thr->ts.level; 307 pool = thr->thread_pool; 308 task = thr->task; 309 icv = task ? &task->icv : &gomp_global_icv; 310 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) 311 gomp_init_affinity (); 312 313 /* Always save the previous state, even if this isn't a nested team. 314 In particular, we should save any work share state from an outer 315 orphaned work share construct. */ 316 team->prev_ts = thr->ts; 317 318 thr->ts.team = team; 319 thr->ts.team_id = 0; 320 ++thr->ts.level; 321 if (nthreads > 1) 322 ++thr->ts.active_level; 323 thr->ts.work_share = &team->work_shares[0]; 324 thr->ts.last_work_share = NULL; 325 #ifdef HAVE_SYNC_BUILTINS 326 thr->ts.single_count = 0; 327 #endif 328 thr->ts.static_trip = 0; 329 thr->task = &team->implicit_task[0]; 330 nthreads_var = icv->nthreads_var; 331 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) 332 && thr->ts.level < gomp_nthreads_var_list_len) 333 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; 334 bind_var = icv->bind_var; 335 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) 336 bind_var = flags & 7; 337 bind = bind_var; 338 if (__builtin_expect (gomp_bind_var_list != NULL, 0) 339 && thr->ts.level < gomp_bind_var_list_len) 340 bind_var = gomp_bind_var_list[thr->ts.level]; 341 gomp_init_task (thr->task, task, icv); 342 team->implicit_task[0].icv.nthreads_var = nthreads_var; 343 team->implicit_task[0].icv.bind_var = bind_var; 344 345 if (nthreads == 1) 346 return; 347 348 i = 1; 349 350 if (__builtin_expect (gomp_places_list != NULL, 0)) 351 { 352 /* Depending on chosen proc_bind model, set subpartition 353 for the master thread and initialize helper variables 354 P and optionally S, K and/or REST used by later place 355 computation for each additional thread. */ 356 p = thr->place - 1; 357 switch (bind) 358 { 359 case omp_proc_bind_true: 360 case omp_proc_bind_close: 361 if (nthreads > thr->ts.place_partition_len) 362 { 363 /* T > P. S threads will be placed in each place, 364 and the final REM threads placed one by one 365 into the already occupied places. */ 366 s = nthreads / thr->ts.place_partition_len; 367 rest = nthreads % thr->ts.place_partition_len; 368 } 369 else 370 s = 1; 371 k = 1; 372 break; 373 case omp_proc_bind_master: 374 /* Each thread will be bound to master's place. */ 375 break; 376 case omp_proc_bind_spread: 377 if (nthreads <= thr->ts.place_partition_len) 378 { 379 /* T <= P. Each subpartition will have in between s 380 and s+1 places (subpartitions starting at or 381 after rest will have s places, earlier s+1 places), 382 each thread will be bound to the first place in 383 its subpartition (except for the master thread 384 that can be bound to another place in its 385 subpartition). */ 386 s = thr->ts.place_partition_len / nthreads; 387 rest = thr->ts.place_partition_len % nthreads; 388 rest = (s + 1) * rest + thr->ts.place_partition_off; 389 if (p < rest) 390 { 391 p -= (p - thr->ts.place_partition_off) % (s + 1); 392 thr->ts.place_partition_len = s + 1; 393 } 394 else 395 { 396 p -= (p - rest) % s; 397 thr->ts.place_partition_len = s; 398 } 399 thr->ts.place_partition_off = p; 400 } 401 else 402 { 403 /* T > P. Each subpartition will have just a single 404 place and we'll place between s and s+1 405 threads into each subpartition. */ 406 s = nthreads / thr->ts.place_partition_len; 407 rest = nthreads % thr->ts.place_partition_len; 408 thr->ts.place_partition_off = p; 409 thr->ts.place_partition_len = 1; 410 k = 1; 411 } 412 break; 413 } 414 } 415 else 416 bind = omp_proc_bind_false; 417 418 /* We only allow the reuse of idle threads for non-nested PARALLEL 419 regions. This appears to be implied by the semantics of 420 threadprivate variables, but perhaps that's reading too much into 421 things. Certainly it does prevent any locking problems, since 422 only the initial program thread will modify gomp_threads. */ 423 if (!nested) 424 { 425 old_threads_used = pool->threads_used; 426 427 if (nthreads <= old_threads_used) 428 n = nthreads; 429 else if (old_threads_used == 0) 430 { 431 n = 0; 432 gomp_barrier_init (&pool->threads_dock, nthreads); 433 } 434 else 435 { 436 n = old_threads_used; 437 438 /* Increase the barrier threshold to make sure all new 439 threads arrive before the team is released. */ 440 gomp_barrier_reinit (&pool->threads_dock, nthreads); 441 } 442 443 /* Not true yet, but soon will be. We're going to release all 444 threads from the dock, and those that aren't part of the 445 team will exit. */ 446 pool->threads_used = nthreads; 447 448 /* If necessary, expand the size of the gomp_threads array. It is 449 expected that changes in the number of threads are rare, thus we 450 make no effort to expand gomp_threads_size geometrically. */ 451 if (nthreads >= pool->threads_size) 452 { 453 pool->threads_size = nthreads + 1; 454 pool->threads 455 = gomp_realloc (pool->threads, 456 pool->threads_size 457 * sizeof (struct gomp_thread_data *)); 458 } 459 460 /* Release existing idle threads. */ 461 for (; i < n; ++i) 462 { 463 unsigned int place_partition_off = thr->ts.place_partition_off; 464 unsigned int place_partition_len = thr->ts.place_partition_len; 465 unsigned int place = 0; 466 if (__builtin_expect (gomp_places_list != NULL, 0)) 467 { 468 switch (bind) 469 { 470 case omp_proc_bind_true: 471 case omp_proc_bind_close: 472 if (k == s) 473 { 474 ++p; 475 if (p == (team->prev_ts.place_partition_off 476 + team->prev_ts.place_partition_len)) 477 p = team->prev_ts.place_partition_off; 478 k = 1; 479 if (i == nthreads - rest) 480 s = 1; 481 } 482 else 483 ++k; 484 break; 485 case omp_proc_bind_master: 486 break; 487 case omp_proc_bind_spread: 488 if (k == 0) 489 { 490 /* T <= P. */ 491 if (p < rest) 492 p += s + 1; 493 else 494 p += s; 495 if (p == (team->prev_ts.place_partition_off 496 + team->prev_ts.place_partition_len)) 497 p = team->prev_ts.place_partition_off; 498 place_partition_off = p; 499 if (p < rest) 500 place_partition_len = s + 1; 501 else 502 place_partition_len = s; 503 } 504 else 505 { 506 /* T > P. */ 507 if (k == s) 508 { 509 ++p; 510 if (p == (team->prev_ts.place_partition_off 511 + team->prev_ts.place_partition_len)) 512 p = team->prev_ts.place_partition_off; 513 k = 1; 514 if (i == nthreads - rest) 515 s = 1; 516 } 517 else 518 ++k; 519 place_partition_off = p; 520 place_partition_len = 1; 521 } 522 break; 523 } 524 if (affinity_thr != NULL 525 || (bind != omp_proc_bind_true 526 && pool->threads[i]->place != p + 1) 527 || pool->threads[i]->place <= place_partition_off 528 || pool->threads[i]->place > (place_partition_off 529 + place_partition_len)) 530 { 531 unsigned int l; 532 if (affinity_thr == NULL) 533 { 534 unsigned int j; 535 536 if (team->prev_ts.place_partition_len > 64) 537 affinity_thr 538 = gomp_malloc (team->prev_ts.place_partition_len 539 * sizeof (struct gomp_thread *)); 540 else 541 affinity_thr 542 = gomp_alloca (team->prev_ts.place_partition_len 543 * sizeof (struct gomp_thread *)); 544 memset (affinity_thr, '\0', 545 team->prev_ts.place_partition_len 546 * sizeof (struct gomp_thread *)); 547 for (j = i; j < old_threads_used; j++) 548 { 549 if (pool->threads[j]->place 550 > team->prev_ts.place_partition_off 551 && (pool->threads[j]->place 552 <= (team->prev_ts.place_partition_off 553 + team->prev_ts.place_partition_len))) 554 { 555 l = pool->threads[j]->place - 1 556 - team->prev_ts.place_partition_off; 557 pool->threads[j]->data = affinity_thr[l]; 558 affinity_thr[l] = pool->threads[j]; 559 } 560 pool->threads[j] = NULL; 561 } 562 if (nthreads > old_threads_used) 563 memset (&pool->threads[old_threads_used], 564 '\0', ((nthreads - old_threads_used) 565 * sizeof (struct gomp_thread *))); 566 n = nthreads; 567 affinity_count = old_threads_used - i; 568 } 569 if (affinity_count == 0) 570 break; 571 l = p; 572 if (affinity_thr[l - team->prev_ts.place_partition_off] 573 == NULL) 574 { 575 if (bind != omp_proc_bind_true) 576 continue; 577 for (l = place_partition_off; 578 l < place_partition_off + place_partition_len; 579 l++) 580 if (affinity_thr[l - team->prev_ts.place_partition_off] 581 != NULL) 582 break; 583 if (l == place_partition_off + place_partition_len) 584 continue; 585 } 586 nthr = affinity_thr[l - team->prev_ts.place_partition_off]; 587 affinity_thr[l - team->prev_ts.place_partition_off] 588 = (struct gomp_thread *) nthr->data; 589 affinity_count--; 590 pool->threads[i] = nthr; 591 } 592 else 593 nthr = pool->threads[i]; 594 place = p + 1; 595 } 596 else 597 nthr = pool->threads[i]; 598 nthr->ts.team = team; 599 nthr->ts.work_share = &team->work_shares[0]; 600 nthr->ts.last_work_share = NULL; 601 nthr->ts.team_id = i; 602 nthr->ts.level = team->prev_ts.level + 1; 603 nthr->ts.active_level = thr->ts.active_level; 604 nthr->ts.place_partition_off = place_partition_off; 605 nthr->ts.place_partition_len = place_partition_len; 606 #ifdef HAVE_SYNC_BUILTINS 607 nthr->ts.single_count = 0; 608 #endif 609 nthr->ts.static_trip = 0; 610 nthr->task = &team->implicit_task[i]; 611 nthr->place = place; 612 gomp_init_task (nthr->task, task, icv); 613 team->implicit_task[i].icv.nthreads_var = nthreads_var; 614 team->implicit_task[i].icv.bind_var = bind_var; 615 nthr->fn = fn; 616 nthr->data = data; 617 team->ordered_release[i] = &nthr->release; 618 } 619 620 if (__builtin_expect (affinity_thr != NULL, 0)) 621 { 622 /* If AFFINITY_THR is non-NULL just because we had to 623 permute some threads in the pool, but we've managed 624 to find exactly as many old threads as we'd find 625 without affinity, we don't need to handle this 626 specially anymore. */ 627 if (nthreads <= old_threads_used 628 ? (affinity_count == old_threads_used - nthreads) 629 : (i == old_threads_used)) 630 { 631 if (team->prev_ts.place_partition_len > 64) 632 free (affinity_thr); 633 affinity_thr = NULL; 634 affinity_count = 0; 635 } 636 else 637 { 638 i = 1; 639 /* We are going to compute the places/subpartitions 640 again from the beginning. So, we need to reinitialize 641 vars modified by the switch (bind) above inside 642 of the loop, to the state they had after the initial 643 switch (bind). */ 644 switch (bind) 645 { 646 case omp_proc_bind_true: 647 case omp_proc_bind_close: 648 if (nthreads > thr->ts.place_partition_len) 649 /* T > P. S has been changed, so needs 650 to be recomputed. */ 651 s = nthreads / thr->ts.place_partition_len; 652 k = 1; 653 p = thr->place - 1; 654 break; 655 case omp_proc_bind_master: 656 /* No vars have been changed. */ 657 break; 658 case omp_proc_bind_spread: 659 p = thr->ts.place_partition_off; 660 if (k != 0) 661 { 662 /* T > P. */ 663 s = nthreads / team->prev_ts.place_partition_len; 664 k = 1; 665 } 666 break; 667 } 668 669 /* Increase the barrier threshold to make sure all new 670 threads and all the threads we're going to let die 671 arrive before the team is released. */ 672 if (affinity_count) 673 gomp_barrier_reinit (&pool->threads_dock, 674 nthreads + affinity_count); 675 } 676 } 677 678 if (i == nthreads) 679 goto do_release; 680 681 } 682 683 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) 684 { 685 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; 686 687 if (old_threads_used == 0) 688 --diff; 689 690 #ifdef HAVE_SYNC_BUILTINS 691 __sync_fetch_and_add (&gomp_managed_threads, diff); 692 #else 693 gomp_mutex_lock (&gomp_managed_threads_lock); 694 gomp_managed_threads += diff; 695 gomp_mutex_unlock (&gomp_managed_threads_lock); 696 #endif 697 } 698 699 attr = &gomp_thread_attr; 700 if (__builtin_expect (gomp_places_list != NULL, 0)) 701 { 702 size_t stacksize; 703 pthread_attr_init (&thread_attr); 704 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); 705 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 706 pthread_attr_setstacksize (&thread_attr, stacksize); 707 attr = &thread_attr; 708 } 709 710 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 711 * (nthreads-i)); 712 713 /* Launch new threads. */ 714 for (; i < nthreads; ++i) 715 { 716 pthread_t pt; 717 int err; 718 719 start_data->ts.place_partition_off = thr->ts.place_partition_off; 720 start_data->ts.place_partition_len = thr->ts.place_partition_len; 721 start_data->place = 0; 722 if (__builtin_expect (gomp_places_list != NULL, 0)) 723 { 724 switch (bind) 725 { 726 case omp_proc_bind_true: 727 case omp_proc_bind_close: 728 if (k == s) 729 { 730 ++p; 731 if (p == (team->prev_ts.place_partition_off 732 + team->prev_ts.place_partition_len)) 733 p = team->prev_ts.place_partition_off; 734 k = 1; 735 if (i == nthreads - rest) 736 s = 1; 737 } 738 else 739 ++k; 740 break; 741 case omp_proc_bind_master: 742 break; 743 case omp_proc_bind_spread: 744 if (k == 0) 745 { 746 /* T <= P. */ 747 if (p < rest) 748 p += s + 1; 749 else 750 p += s; 751 if (p == (team->prev_ts.place_partition_off 752 + team->prev_ts.place_partition_len)) 753 p = team->prev_ts.place_partition_off; 754 start_data->ts.place_partition_off = p; 755 if (p < rest) 756 start_data->ts.place_partition_len = s + 1; 757 else 758 start_data->ts.place_partition_len = s; 759 } 760 else 761 { 762 /* T > P. */ 763 if (k == s) 764 { 765 ++p; 766 if (p == (team->prev_ts.place_partition_off 767 + team->prev_ts.place_partition_len)) 768 p = team->prev_ts.place_partition_off; 769 k = 1; 770 if (i == nthreads - rest) 771 s = 1; 772 } 773 else 774 ++k; 775 start_data->ts.place_partition_off = p; 776 start_data->ts.place_partition_len = 1; 777 } 778 break; 779 } 780 start_data->place = p + 1; 781 if (affinity_thr != NULL && pool->threads[i] != NULL) 782 continue; 783 gomp_init_thread_affinity (attr, p); 784 } 785 786 start_data->fn = fn; 787 start_data->fn_data = data; 788 start_data->ts.team = team; 789 start_data->ts.work_share = &team->work_shares[0]; 790 start_data->ts.last_work_share = NULL; 791 start_data->ts.team_id = i; 792 start_data->ts.level = team->prev_ts.level + 1; 793 start_data->ts.active_level = thr->ts.active_level; 794 #ifdef HAVE_SYNC_BUILTINS 795 start_data->ts.single_count = 0; 796 #endif 797 start_data->ts.static_trip = 0; 798 start_data->task = &team->implicit_task[i]; 799 gomp_init_task (start_data->task, task, icv); 800 team->implicit_task[i].icv.nthreads_var = nthreads_var; 801 team->implicit_task[i].icv.bind_var = bind_var; 802 start_data->thread_pool = pool; 803 start_data->nested = nested; 804 805 attr = gomp_adjust_thread_attr (attr, &thread_attr); 806 err = pthread_create (&pt, attr, gomp_thread_start, start_data++); 807 if (err != 0) 808 gomp_fatal ("Thread creation failed: %s", strerror (err)); 809 } 810 811 if (__builtin_expect (attr == &thread_attr, 0)) 812 pthread_attr_destroy (&thread_attr); 813 814 do_release: 815 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock); 816 817 /* Decrease the barrier threshold to match the number of threads 818 that should arrive back at the end of this team. The extra 819 threads should be exiting. Note that we arrange for this test 820 to never be true for nested teams. If AFFINITY_COUNT is non-zero, 821 the barrier as well as gomp_managed_threads was temporarily 822 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, 823 AFFINITY_COUNT if non-zero will be always at least 824 OLD_THREADS_COUNT - NTHREADS. */ 825 if (__builtin_expect (nthreads < old_threads_used, 0) 826 || __builtin_expect (affinity_count, 0)) 827 { 828 long diff = (long) nthreads - (long) old_threads_used; 829 830 if (affinity_count) 831 diff = -affinity_count; 832 833 gomp_barrier_reinit (&pool->threads_dock, nthreads); 834 835 #ifdef HAVE_SYNC_BUILTINS 836 __sync_fetch_and_add (&gomp_managed_threads, diff); 837 #else 838 gomp_mutex_lock (&gomp_managed_threads_lock); 839 gomp_managed_threads += diff; 840 gomp_mutex_unlock (&gomp_managed_threads_lock); 841 #endif 842 } 843 if (__builtin_expect (affinity_thr != NULL, 0) 844 && team->prev_ts.place_partition_len > 64) 845 free (affinity_thr); 846 } 847 848 849 /* Terminate the current team. This is only to be called by the master 850 thread. We assume that we must wait for the other threads. */ 851 852 void 853 gomp_team_end (void) 854 { 855 struct gomp_thread *thr = gomp_thread (); 856 struct gomp_team *team = thr->ts.team; 857 858 /* This barrier handles all pending explicit threads. 859 As #pragma omp cancel parallel might get awaited count in 860 team->barrier in a inconsistent state, we need to use a different 861 counter here. */ 862 gomp_team_barrier_wait_final (&team->barrier); 863 if (__builtin_expect (team->team_cancelled, 0)) 864 { 865 struct gomp_work_share *ws = team->work_shares_to_free; 866 do 867 { 868 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); 869 if (next_ws == NULL) 870 gomp_ptrlock_set (&ws->next_ws, ws); 871 gomp_fini_work_share (ws); 872 ws = next_ws; 873 } 874 while (ws != NULL); 875 } 876 else 877 gomp_fini_work_share (thr->ts.work_share); 878 879 gomp_end_task (); 880 thr->ts = team->prev_ts; 881 882 if (__builtin_expect (thr->ts.team != NULL, 0)) 883 { 884 #ifdef HAVE_SYNC_BUILTINS 885 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 886 #else 887 gomp_mutex_lock (&gomp_managed_threads_lock); 888 gomp_managed_threads -= team->nthreads - 1L; 889 gomp_mutex_unlock (&gomp_managed_threads_lock); 890 #endif 891 /* This barrier has gomp_barrier_wait_last counterparts 892 and ensures the team can be safely destroyed. */ 893 gomp_barrier_wait (&team->barrier); 894 } 895 896 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 897 { 898 struct gomp_work_share *ws = team->work_shares[0].next_alloc; 899 do 900 { 901 struct gomp_work_share *next_ws = ws->next_alloc; 902 free (ws); 903 ws = next_ws; 904 } 905 while (ws != NULL); 906 } 907 gomp_sem_destroy (&team->master_release); 908 909 if (__builtin_expect (thr->ts.team != NULL, 0) 910 || __builtin_expect (team->nthreads == 1, 0)) 911 free_team (team); 912 else 913 { 914 struct gomp_thread_pool *pool = thr->thread_pool; 915 if (pool->last_team) 916 free_team (pool->last_team); 917 pool->last_team = team; 918 gomp_release_thread_pool (pool); 919 } 920 } 921 922 923 /* Constructors for this file. */ 924 925 static void __attribute__((constructor)) 926 initialize_team (void) 927 { 928 #if !defined HAVE_TLS && !defined USE_EMUTLS 929 static struct gomp_thread initial_thread_tls_data; 930 931 pthread_key_create (&gomp_tls_key, NULL); 932 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 933 #endif 934 935 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 936 gomp_fatal ("could not create thread pool destructor."); 937 } 938 939 static void __attribute__((destructor)) 940 team_destructor (void) 941 { 942 /* Without this dlclose on libgomp could lead to subsequent 943 crashes. */ 944 pthread_key_delete (gomp_thread_destructor); 945 } 946 947 struct gomp_task_icv * 948 gomp_new_icv (void) 949 { 950 struct gomp_thread *thr = gomp_thread (); 951 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 952 gomp_init_task (task, NULL, &gomp_global_icv); 953 thr->task = task; 954 pthread_setspecific (gomp_thread_destructor, thr); 955 return &task->icv; 956 } 957