1 /* Copyright (C) 2005-2013 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU OpenMP Library (libgomp). 5 6 Libgomp is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25 /* This file handles the maintainence of threads in response to team 26 creation and termination. */ 27 28 #include "libgomp.h" 29 #include <stdlib.h> 30 #include <string.h> 31 32 /* This attribute contains PTHREAD_CREATE_DETACHED. */ 33 pthread_attr_t gomp_thread_attr; 34 35 /* This key is for the thread destructor. */ 36 pthread_key_t gomp_thread_destructor; 37 38 39 /* This is the libgomp per-thread data structure. */ 40 #ifdef HAVE_TLS 41 __thread struct gomp_thread gomp_tls_data; 42 #else 43 pthread_key_t gomp_tls_key; 44 #endif 45 46 47 /* This structure is used to communicate across pthread_create. */ 48 49 struct gomp_thread_start_data 50 { 51 void (*fn) (void *); 52 void *fn_data; 53 struct gomp_team_state ts; 54 struct gomp_task *task; 55 struct gomp_thread_pool *thread_pool; 56 bool nested; 57 }; 58 59 60 /* This function is a pthread_create entry point. This contains the idle 61 loop in which a thread waits to be called up to become part of a team. */ 62 63 static void * 64 gomp_thread_start (void *xdata) 65 { 66 struct gomp_thread_start_data *data = xdata; 67 struct gomp_thread *thr; 68 struct gomp_thread_pool *pool; 69 void (*local_fn) (void *); 70 void *local_data; 71 72 #ifdef HAVE_TLS 73 thr = &gomp_tls_data; 74 #else 75 struct gomp_thread local_thr; 76 thr = &local_thr; 77 pthread_setspecific (gomp_tls_key, thr); 78 #endif 79 gomp_sem_init (&thr->release, 0); 80 81 /* Extract what we need from data. */ 82 local_fn = data->fn; 83 local_data = data->fn_data; 84 thr->thread_pool = data->thread_pool; 85 thr->ts = data->ts; 86 thr->task = data->task; 87 88 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 89 90 /* Make thread pool local. */ 91 pool = thr->thread_pool; 92 93 if (data->nested) 94 { 95 struct gomp_team *team = thr->ts.team; 96 struct gomp_task *task = thr->task; 97 98 gomp_barrier_wait (&team->barrier); 99 100 local_fn (local_data); 101 gomp_team_barrier_wait (&team->barrier); 102 gomp_finish_task (task); 103 gomp_barrier_wait_last (&team->barrier); 104 } 105 else 106 { 107 pool->threads[thr->ts.team_id] = thr; 108 109 gomp_barrier_wait (&pool->threads_dock); 110 do 111 { 112 struct gomp_team *team = thr->ts.team; 113 struct gomp_task *task = thr->task; 114 115 local_fn (local_data); 116 gomp_team_barrier_wait (&team->barrier); 117 gomp_finish_task (task); 118 119 gomp_barrier_wait (&pool->threads_dock); 120 121 local_fn = thr->fn; 122 local_data = thr->data; 123 thr->fn = NULL; 124 } 125 while (local_fn); 126 } 127 128 gomp_sem_destroy (&thr->release); 129 return NULL; 130 } 131 132 133 /* Create a new team data structure. */ 134 135 struct gomp_team * 136 gomp_new_team (unsigned nthreads) 137 { 138 struct gomp_team *team; 139 size_t size; 140 int i; 141 142 size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) 143 + sizeof (team->implicit_task[0])); 144 team = gomp_malloc (size); 145 146 team->work_share_chunk = 8; 147 #ifdef HAVE_SYNC_BUILTINS 148 team->single_count = 0; 149 #else 150 gomp_mutex_init (&team->work_share_list_free_lock); 151 #endif 152 gomp_init_work_share (&team->work_shares[0], false, nthreads); 153 team->work_shares[0].next_alloc = NULL; 154 team->work_share_list_free = NULL; 155 team->work_share_list_alloc = &team->work_shares[1]; 156 for (i = 1; i < 7; i++) 157 team->work_shares[i].next_free = &team->work_shares[i + 1]; 158 team->work_shares[i].next_free = NULL; 159 160 team->nthreads = nthreads; 161 gomp_barrier_init (&team->barrier, nthreads); 162 163 gomp_sem_init (&team->master_release, 0); 164 team->ordered_release = (void *) &team->implicit_task[nthreads]; 165 team->ordered_release[0] = &team->master_release; 166 167 gomp_mutex_init (&team->task_lock); 168 team->task_queue = NULL; 169 team->task_count = 0; 170 team->task_running_count = 0; 171 172 return team; 173 } 174 175 176 /* Free a team data structure. */ 177 178 static void 179 free_team (struct gomp_team *team) 180 { 181 gomp_barrier_destroy (&team->barrier); 182 gomp_mutex_destroy (&team->task_lock); 183 free (team); 184 } 185 186 /* Allocate and initialize a thread pool. */ 187 188 static struct gomp_thread_pool *gomp_new_thread_pool (void) 189 { 190 struct gomp_thread_pool *pool 191 = gomp_malloc (sizeof(struct gomp_thread_pool)); 192 pool->threads = NULL; 193 pool->threads_size = 0; 194 pool->threads_used = 0; 195 pool->last_team = NULL; 196 return pool; 197 } 198 199 static void 200 gomp_free_pool_helper (void *thread_pool) 201 { 202 struct gomp_thread_pool *pool 203 = (struct gomp_thread_pool *) thread_pool; 204 gomp_barrier_wait_last (&pool->threads_dock); 205 gomp_sem_destroy (&gomp_thread ()->release); 206 pthread_exit (NULL); 207 } 208 209 /* Free a thread pool and release its threads. */ 210 211 static void 212 gomp_free_thread (void *arg __attribute__((unused))) 213 { 214 struct gomp_thread *thr = gomp_thread (); 215 struct gomp_thread_pool *pool = thr->thread_pool; 216 if (pool) 217 { 218 if (pool->threads_used > 0) 219 { 220 int i; 221 for (i = 1; i < pool->threads_used; i++) 222 { 223 struct gomp_thread *nthr = pool->threads[i]; 224 nthr->fn = gomp_free_pool_helper; 225 nthr->data = pool; 226 } 227 /* This barrier undocks threads docked on pool->threads_dock. */ 228 gomp_barrier_wait (&pool->threads_dock); 229 /* And this waits till all threads have called gomp_barrier_wait_last 230 in gomp_free_pool_helper. */ 231 gomp_barrier_wait (&pool->threads_dock); 232 /* Now it is safe to destroy the barrier and free the pool. */ 233 gomp_barrier_destroy (&pool->threads_dock); 234 235 #ifdef HAVE_SYNC_BUILTINS 236 __sync_fetch_and_add (&gomp_managed_threads, 237 1L - pool->threads_used); 238 #else 239 gomp_mutex_lock (&gomp_remaining_threads_lock); 240 gomp_managed_threads -= pool->threads_used - 1L; 241 gomp_mutex_unlock (&gomp_remaining_threads_lock); 242 #endif 243 } 244 free (pool->threads); 245 if (pool->last_team) 246 free_team (pool->last_team); 247 free (pool); 248 thr->thread_pool = NULL; 249 } 250 if (thr->task != NULL) 251 { 252 struct gomp_task *task = thr->task; 253 gomp_end_task (); 254 free (task); 255 } 256 } 257 258 /* Launch a team. */ 259 260 void 261 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 262 struct gomp_team *team) 263 { 264 struct gomp_thread_start_data *start_data; 265 struct gomp_thread *thr, *nthr; 266 struct gomp_task *task; 267 struct gomp_task_icv *icv; 268 bool nested; 269 struct gomp_thread_pool *pool; 270 unsigned i, n, old_threads_used = 0; 271 pthread_attr_t thread_attr, *attr; 272 unsigned long nthreads_var; 273 274 thr = gomp_thread (); 275 nested = thr->ts.team != NULL; 276 if (__builtin_expect (thr->thread_pool == NULL, 0)) 277 { 278 thr->thread_pool = gomp_new_thread_pool (); 279 pthread_setspecific (gomp_thread_destructor, thr); 280 } 281 pool = thr->thread_pool; 282 task = thr->task; 283 icv = task ? &task->icv : &gomp_global_icv; 284 285 /* Always save the previous state, even if this isn't a nested team. 286 In particular, we should save any work share state from an outer 287 orphaned work share construct. */ 288 team->prev_ts = thr->ts; 289 290 thr->ts.team = team; 291 thr->ts.team_id = 0; 292 ++thr->ts.level; 293 if (nthreads > 1) 294 ++thr->ts.active_level; 295 thr->ts.work_share = &team->work_shares[0]; 296 thr->ts.last_work_share = NULL; 297 #ifdef HAVE_SYNC_BUILTINS 298 thr->ts.single_count = 0; 299 #endif 300 thr->ts.static_trip = 0; 301 thr->task = &team->implicit_task[0]; 302 nthreads_var = icv->nthreads_var; 303 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) 304 && thr->ts.level < gomp_nthreads_var_list_len) 305 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; 306 gomp_init_task (thr->task, task, icv); 307 team->implicit_task[0].icv.nthreads_var = nthreads_var; 308 309 if (nthreads == 1) 310 return; 311 312 i = 1; 313 314 /* We only allow the reuse of idle threads for non-nested PARALLEL 315 regions. This appears to be implied by the semantics of 316 threadprivate variables, but perhaps that's reading too much into 317 things. Certainly it does prevent any locking problems, since 318 only the initial program thread will modify gomp_threads. */ 319 if (!nested) 320 { 321 old_threads_used = pool->threads_used; 322 323 if (nthreads <= old_threads_used) 324 n = nthreads; 325 else if (old_threads_used == 0) 326 { 327 n = 0; 328 gomp_barrier_init (&pool->threads_dock, nthreads); 329 } 330 else 331 { 332 n = old_threads_used; 333 334 /* Increase the barrier threshold to make sure all new 335 threads arrive before the team is released. */ 336 gomp_barrier_reinit (&pool->threads_dock, nthreads); 337 } 338 339 /* Not true yet, but soon will be. We're going to release all 340 threads from the dock, and those that aren't part of the 341 team will exit. */ 342 pool->threads_used = nthreads; 343 344 /* Release existing idle threads. */ 345 for (; i < n; ++i) 346 { 347 nthr = pool->threads[i]; 348 nthr->ts.team = team; 349 nthr->ts.work_share = &team->work_shares[0]; 350 nthr->ts.last_work_share = NULL; 351 nthr->ts.team_id = i; 352 nthr->ts.level = team->prev_ts.level + 1; 353 nthr->ts.active_level = thr->ts.active_level; 354 #ifdef HAVE_SYNC_BUILTINS 355 nthr->ts.single_count = 0; 356 #endif 357 nthr->ts.static_trip = 0; 358 nthr->task = &team->implicit_task[i]; 359 gomp_init_task (nthr->task, task, icv); 360 team->implicit_task[i].icv.nthreads_var = nthreads_var; 361 nthr->fn = fn; 362 nthr->data = data; 363 team->ordered_release[i] = &nthr->release; 364 } 365 366 if (i == nthreads) 367 goto do_release; 368 369 /* If necessary, expand the size of the gomp_threads array. It is 370 expected that changes in the number of threads are rare, thus we 371 make no effort to expand gomp_threads_size geometrically. */ 372 if (nthreads >= pool->threads_size) 373 { 374 pool->threads_size = nthreads + 1; 375 pool->threads 376 = gomp_realloc (pool->threads, 377 pool->threads_size 378 * sizeof (struct gomp_thread_data *)); 379 } 380 } 381 382 if (__builtin_expect (nthreads > old_threads_used, 0)) 383 { 384 long diff = (long) nthreads - (long) old_threads_used; 385 386 if (old_threads_used == 0) 387 --diff; 388 389 #ifdef HAVE_SYNC_BUILTINS 390 __sync_fetch_and_add (&gomp_managed_threads, diff); 391 #else 392 gomp_mutex_lock (&gomp_remaining_threads_lock); 393 gomp_managed_threads += diff; 394 gomp_mutex_unlock (&gomp_remaining_threads_lock); 395 #endif 396 } 397 398 attr = &gomp_thread_attr; 399 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 400 { 401 size_t stacksize; 402 pthread_attr_init (&thread_attr); 403 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); 404 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 405 pthread_attr_setstacksize (&thread_attr, stacksize); 406 attr = &thread_attr; 407 } 408 409 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 410 * (nthreads-i)); 411 412 /* Launch new threads. */ 413 for (; i < nthreads; ++i, ++start_data) 414 { 415 pthread_t pt; 416 int err; 417 418 start_data->fn = fn; 419 start_data->fn_data = data; 420 start_data->ts.team = team; 421 start_data->ts.work_share = &team->work_shares[0]; 422 start_data->ts.last_work_share = NULL; 423 start_data->ts.team_id = i; 424 start_data->ts.level = team->prev_ts.level + 1; 425 start_data->ts.active_level = thr->ts.active_level; 426 #ifdef HAVE_SYNC_BUILTINS 427 start_data->ts.single_count = 0; 428 #endif 429 start_data->ts.static_trip = 0; 430 start_data->task = &team->implicit_task[i]; 431 gomp_init_task (start_data->task, task, icv); 432 team->implicit_task[i].icv.nthreads_var = nthreads_var; 433 start_data->thread_pool = pool; 434 start_data->nested = nested; 435 436 if (gomp_cpu_affinity != NULL) 437 gomp_init_thread_affinity (attr); 438 439 err = pthread_create (&pt, attr, gomp_thread_start, start_data); 440 if (err != 0) 441 gomp_fatal ("Thread creation failed: %s", strerror (err)); 442 } 443 444 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 445 pthread_attr_destroy (&thread_attr); 446 447 do_release: 448 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock); 449 450 /* Decrease the barrier threshold to match the number of threads 451 that should arrive back at the end of this team. The extra 452 threads should be exiting. Note that we arrange for this test 453 to never be true for nested teams. */ 454 if (__builtin_expect (nthreads < old_threads_used, 0)) 455 { 456 long diff = (long) nthreads - (long) old_threads_used; 457 458 gomp_barrier_reinit (&pool->threads_dock, nthreads); 459 460 #ifdef HAVE_SYNC_BUILTINS 461 __sync_fetch_and_add (&gomp_managed_threads, diff); 462 #else 463 gomp_mutex_lock (&gomp_remaining_threads_lock); 464 gomp_managed_threads += diff; 465 gomp_mutex_unlock (&gomp_remaining_threads_lock); 466 #endif 467 } 468 } 469 470 471 /* Terminate the current team. This is only to be called by the master 472 thread. We assume that we must wait for the other threads. */ 473 474 void 475 gomp_team_end (void) 476 { 477 struct gomp_thread *thr = gomp_thread (); 478 struct gomp_team *team = thr->ts.team; 479 480 /* This barrier handles all pending explicit threads. */ 481 gomp_team_barrier_wait (&team->barrier); 482 gomp_fini_work_share (thr->ts.work_share); 483 484 gomp_end_task (); 485 thr->ts = team->prev_ts; 486 487 if (__builtin_expect (thr->ts.team != NULL, 0)) 488 { 489 #ifdef HAVE_SYNC_BUILTINS 490 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 491 #else 492 gomp_mutex_lock (&gomp_remaining_threads_lock); 493 gomp_managed_threads -= team->nthreads - 1L; 494 gomp_mutex_unlock (&gomp_remaining_threads_lock); 495 #endif 496 /* This barrier has gomp_barrier_wait_last counterparts 497 and ensures the team can be safely destroyed. */ 498 gomp_barrier_wait (&team->barrier); 499 } 500 501 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 502 { 503 struct gomp_work_share *ws = team->work_shares[0].next_alloc; 504 do 505 { 506 struct gomp_work_share *next_ws = ws->next_alloc; 507 free (ws); 508 ws = next_ws; 509 } 510 while (ws != NULL); 511 } 512 gomp_sem_destroy (&team->master_release); 513 #ifndef HAVE_SYNC_BUILTINS 514 gomp_mutex_destroy (&team->work_share_list_free_lock); 515 #endif 516 517 if (__builtin_expect (thr->ts.team != NULL, 0) 518 || __builtin_expect (team->nthreads == 1, 0)) 519 free_team (team); 520 else 521 { 522 struct gomp_thread_pool *pool = thr->thread_pool; 523 if (pool->last_team) 524 free_team (pool->last_team); 525 pool->last_team = team; 526 } 527 } 528 529 530 /* Constructors for this file. */ 531 532 static void __attribute__((constructor)) 533 initialize_team (void) 534 { 535 struct gomp_thread *thr; 536 537 #ifndef HAVE_TLS 538 static struct gomp_thread initial_thread_tls_data; 539 540 pthread_key_create (&gomp_tls_key, NULL); 541 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 542 #endif 543 544 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 545 gomp_fatal ("could not create thread pool destructor."); 546 547 #ifdef HAVE_TLS 548 thr = &gomp_tls_data; 549 #else 550 thr = &initial_thread_tls_data; 551 #endif 552 gomp_sem_init (&thr->release, 0); 553 } 554 555 static void __attribute__((destructor)) 556 team_destructor (void) 557 { 558 /* Without this dlclose on libgomp could lead to subsequent 559 crashes. */ 560 pthread_key_delete (gomp_thread_destructor); 561 } 562 563 struct gomp_task_icv * 564 gomp_new_icv (void) 565 { 566 struct gomp_thread *thr = gomp_thread (); 567 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 568 gomp_init_task (task, NULL, &gomp_global_icv); 569 thr->task = task; 570 pthread_setspecific (gomp_thread_destructor, thr); 571 return &task->icv; 572 } 573