1 /* Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU OpenMP Library (libgomp). 5 6 Libgomp is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25 /* This file handles the maintainence of threads in response to team 26 creation and termination. */ 27 28 #include "libgomp.h" 29 #include <stdlib.h> 30 #include <string.h> 31 32 /* This attribute contains PTHREAD_CREATE_DETACHED. */ 33 pthread_attr_t gomp_thread_attr; 34 35 /* This key is for the thread destructor. */ 36 pthread_key_t gomp_thread_destructor; 37 38 39 /* This is the libgomp per-thread data structure. */ 40 #ifdef HAVE_TLS 41 __thread struct gomp_thread gomp_tls_data; 42 #else 43 pthread_key_t gomp_tls_key; 44 #endif 45 46 47 /* This structure is used to communicate across pthread_create. */ 48 49 struct gomp_thread_start_data 50 { 51 void (*fn) (void *); 52 void *fn_data; 53 struct gomp_team_state ts; 54 struct gomp_task *task; 55 struct gomp_thread_pool *thread_pool; 56 bool nested; 57 }; 58 59 60 /* This function is a pthread_create entry point. This contains the idle 61 loop in which a thread waits to be called up to become part of a team. */ 62 63 static void * 64 gomp_thread_start (void *xdata) 65 { 66 struct gomp_thread_start_data *data = xdata; 67 struct gomp_thread *thr; 68 struct gomp_thread_pool *pool; 69 void (*local_fn) (void *); 70 void *local_data; 71 72 #ifdef HAVE_TLS 73 thr = &gomp_tls_data; 74 #else 75 struct gomp_thread local_thr; 76 thr = &local_thr; 77 pthread_setspecific (gomp_tls_key, thr); 78 #endif 79 gomp_sem_init (&thr->release, 0); 80 81 /* Extract what we need from data. */ 82 local_fn = data->fn; 83 local_data = data->fn_data; 84 thr->thread_pool = data->thread_pool; 85 thr->ts = data->ts; 86 thr->task = data->task; 87 88 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 89 90 /* Make thread pool local. */ 91 pool = thr->thread_pool; 92 93 if (data->nested) 94 { 95 struct gomp_team *team = thr->ts.team; 96 struct gomp_task *task = thr->task; 97 98 gomp_barrier_wait (&team->barrier); 99 100 local_fn (local_data); 101 gomp_team_barrier_wait (&team->barrier); 102 gomp_finish_task (task); 103 gomp_barrier_wait_last (&team->barrier); 104 } 105 else 106 { 107 pool->threads[thr->ts.team_id] = thr; 108 109 gomp_barrier_wait (&pool->threads_dock); 110 do 111 { 112 struct gomp_team *team = thr->ts.team; 113 struct gomp_task *task = thr->task; 114 115 local_fn (local_data); 116 gomp_team_barrier_wait (&team->barrier); 117 gomp_finish_task (task); 118 119 gomp_barrier_wait (&pool->threads_dock); 120 121 local_fn = thr->fn; 122 local_data = thr->data; 123 thr->fn = NULL; 124 } 125 while (local_fn); 126 } 127 128 gomp_sem_destroy (&thr->release); 129 return NULL; 130 } 131 132 133 /* Create a new team data structure. */ 134 135 struct gomp_team * 136 gomp_new_team (unsigned nthreads) 137 { 138 struct gomp_team *team; 139 size_t size; 140 int i; 141 142 size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0]) 143 + sizeof (team->implicit_task[0])); 144 team = gomp_malloc (size); 145 146 team->work_share_chunk = 8; 147 #ifdef HAVE_SYNC_BUILTINS 148 team->single_count = 0; 149 #else 150 gomp_mutex_init (&team->work_share_list_free_lock); 151 #endif 152 gomp_init_work_share (&team->work_shares[0], false, nthreads); 153 team->work_shares[0].next_alloc = NULL; 154 team->work_share_list_free = NULL; 155 team->work_share_list_alloc = &team->work_shares[1]; 156 for (i = 1; i < 7; i++) 157 team->work_shares[i].next_free = &team->work_shares[i + 1]; 158 team->work_shares[i].next_free = NULL; 159 160 team->nthreads = nthreads; 161 gomp_barrier_init (&team->barrier, nthreads); 162 163 gomp_sem_init (&team->master_release, 0); 164 team->ordered_release = (void *) &team->implicit_task[nthreads]; 165 team->ordered_release[0] = &team->master_release; 166 167 gomp_mutex_init (&team->task_lock); 168 team->task_queue = NULL; 169 team->task_count = 0; 170 team->task_running_count = 0; 171 172 return team; 173 } 174 175 176 /* Free a team data structure. */ 177 178 static void 179 free_team (struct gomp_team *team) 180 { 181 gomp_barrier_destroy (&team->barrier); 182 gomp_mutex_destroy (&team->task_lock); 183 free (team); 184 } 185 186 /* Allocate and initialize a thread pool. */ 187 188 static struct gomp_thread_pool *gomp_new_thread_pool (void) 189 { 190 struct gomp_thread_pool *pool 191 = gomp_malloc (sizeof(struct gomp_thread_pool)); 192 pool->threads = NULL; 193 pool->threads_size = 0; 194 pool->threads_used = 0; 195 pool->last_team = NULL; 196 return pool; 197 } 198 199 static void 200 gomp_free_pool_helper (void *thread_pool) 201 { 202 struct gomp_thread_pool *pool 203 = (struct gomp_thread_pool *) thread_pool; 204 gomp_barrier_wait_last (&pool->threads_dock); 205 gomp_sem_destroy (&gomp_thread ()->release); 206 pthread_exit (NULL); 207 } 208 209 /* Free a thread pool and release its threads. */ 210 211 static void 212 gomp_free_thread (void *arg __attribute__((unused))) 213 { 214 struct gomp_thread *thr = gomp_thread (); 215 struct gomp_thread_pool *pool = thr->thread_pool; 216 if (pool) 217 { 218 if (pool->threads_used > 0) 219 { 220 int i; 221 for (i = 1; i < pool->threads_used; i++) 222 { 223 struct gomp_thread *nthr = pool->threads[i]; 224 nthr->fn = gomp_free_pool_helper; 225 nthr->data = pool; 226 } 227 /* This barrier undocks threads docked on pool->threads_dock. */ 228 gomp_barrier_wait (&pool->threads_dock); 229 /* And this waits till all threads have called gomp_barrier_wait_last 230 in gomp_free_pool_helper. */ 231 gomp_barrier_wait (&pool->threads_dock); 232 /* Now it is safe to destroy the barrier and free the pool. */ 233 gomp_barrier_destroy (&pool->threads_dock); 234 } 235 free (pool->threads); 236 if (pool->last_team) 237 free_team (pool->last_team); 238 free (pool); 239 thr->thread_pool = NULL; 240 } 241 if (thr->task != NULL) 242 { 243 struct gomp_task *task = thr->task; 244 gomp_end_task (); 245 free (task); 246 } 247 } 248 249 /* Launch a team. */ 250 251 void 252 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 253 struct gomp_team *team) 254 { 255 struct gomp_thread_start_data *start_data; 256 struct gomp_thread *thr, *nthr; 257 struct gomp_task *task; 258 struct gomp_task_icv *icv; 259 bool nested; 260 struct gomp_thread_pool *pool; 261 unsigned i, n, old_threads_used = 0; 262 pthread_attr_t thread_attr, *attr; 263 264 thr = gomp_thread (); 265 nested = thr->ts.team != NULL; 266 if (__builtin_expect (thr->thread_pool == NULL, 0)) 267 { 268 thr->thread_pool = gomp_new_thread_pool (); 269 pthread_setspecific (gomp_thread_destructor, thr); 270 } 271 pool = thr->thread_pool; 272 task = thr->task; 273 icv = task ? &task->icv : &gomp_global_icv; 274 275 /* Always save the previous state, even if this isn't a nested team. 276 In particular, we should save any work share state from an outer 277 orphaned work share construct. */ 278 team->prev_ts = thr->ts; 279 280 thr->ts.team = team; 281 thr->ts.team_id = 0; 282 ++thr->ts.level; 283 if (nthreads > 1) 284 ++thr->ts.active_level; 285 thr->ts.work_share = &team->work_shares[0]; 286 thr->ts.last_work_share = NULL; 287 #ifdef HAVE_SYNC_BUILTINS 288 thr->ts.single_count = 0; 289 #endif 290 thr->ts.static_trip = 0; 291 thr->task = &team->implicit_task[0]; 292 gomp_init_task (thr->task, task, icv); 293 294 if (nthreads == 1) 295 return; 296 297 i = 1; 298 299 /* We only allow the reuse of idle threads for non-nested PARALLEL 300 regions. This appears to be implied by the semantics of 301 threadprivate variables, but perhaps that's reading too much into 302 things. Certainly it does prevent any locking problems, since 303 only the initial program thread will modify gomp_threads. */ 304 if (!nested) 305 { 306 old_threads_used = pool->threads_used; 307 308 if (nthreads <= old_threads_used) 309 n = nthreads; 310 else if (old_threads_used == 0) 311 { 312 n = 0; 313 gomp_barrier_init (&pool->threads_dock, nthreads); 314 } 315 else 316 { 317 n = old_threads_used; 318 319 /* Increase the barrier threshold to make sure all new 320 threads arrive before the team is released. */ 321 gomp_barrier_reinit (&pool->threads_dock, nthreads); 322 } 323 324 /* Not true yet, but soon will be. We're going to release all 325 threads from the dock, and those that aren't part of the 326 team will exit. */ 327 pool->threads_used = nthreads; 328 329 /* Release existing idle threads. */ 330 for (; i < n; ++i) 331 { 332 nthr = pool->threads[i]; 333 nthr->ts.team = team; 334 nthr->ts.work_share = &team->work_shares[0]; 335 nthr->ts.last_work_share = NULL; 336 nthr->ts.team_id = i; 337 nthr->ts.level = team->prev_ts.level + 1; 338 nthr->ts.active_level = thr->ts.active_level; 339 #ifdef HAVE_SYNC_BUILTINS 340 nthr->ts.single_count = 0; 341 #endif 342 nthr->ts.static_trip = 0; 343 nthr->task = &team->implicit_task[i]; 344 gomp_init_task (nthr->task, task, icv); 345 nthr->fn = fn; 346 nthr->data = data; 347 team->ordered_release[i] = &nthr->release; 348 } 349 350 if (i == nthreads) 351 goto do_release; 352 353 /* If necessary, expand the size of the gomp_threads array. It is 354 expected that changes in the number of threads are rare, thus we 355 make no effort to expand gomp_threads_size geometrically. */ 356 if (nthreads >= pool->threads_size) 357 { 358 pool->threads_size = nthreads + 1; 359 pool->threads 360 = gomp_realloc (pool->threads, 361 pool->threads_size 362 * sizeof (struct gomp_thread_data *)); 363 } 364 } 365 366 if (__builtin_expect (nthreads > old_threads_used, 0)) 367 { 368 long diff = (long) nthreads - (long) old_threads_used; 369 370 if (old_threads_used == 0) 371 --diff; 372 373 #ifdef HAVE_SYNC_BUILTINS 374 __sync_fetch_and_add (&gomp_managed_threads, diff); 375 #else 376 gomp_mutex_lock (&gomp_remaining_threads_lock); 377 gomp_managed_threads += diff; 378 gomp_mutex_unlock (&gomp_remaining_threads_lock); 379 #endif 380 } 381 382 attr = &gomp_thread_attr; 383 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 384 { 385 size_t stacksize; 386 pthread_attr_init (&thread_attr); 387 pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED); 388 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 389 pthread_attr_setstacksize (&thread_attr, stacksize); 390 attr = &thread_attr; 391 } 392 393 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 394 * (nthreads-i)); 395 396 /* Launch new threads. */ 397 for (; i < nthreads; ++i, ++start_data) 398 { 399 pthread_t pt; 400 int err; 401 402 start_data->fn = fn; 403 start_data->fn_data = data; 404 start_data->ts.team = team; 405 start_data->ts.work_share = &team->work_shares[0]; 406 start_data->ts.last_work_share = NULL; 407 start_data->ts.team_id = i; 408 start_data->ts.level = team->prev_ts.level + 1; 409 start_data->ts.active_level = thr->ts.active_level; 410 #ifdef HAVE_SYNC_BUILTINS 411 start_data->ts.single_count = 0; 412 #endif 413 start_data->ts.static_trip = 0; 414 start_data->task = &team->implicit_task[i]; 415 gomp_init_task (start_data->task, task, icv); 416 start_data->thread_pool = pool; 417 start_data->nested = nested; 418 419 if (gomp_cpu_affinity != NULL) 420 gomp_init_thread_affinity (attr); 421 422 err = pthread_create (&pt, attr, gomp_thread_start, start_data); 423 if (err != 0) 424 gomp_fatal ("Thread creation failed: %s", strerror (err)); 425 } 426 427 if (__builtin_expect (gomp_cpu_affinity != NULL, 0)) 428 pthread_attr_destroy (&thread_attr); 429 430 do_release: 431 gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock); 432 433 /* Decrease the barrier threshold to match the number of threads 434 that should arrive back at the end of this team. The extra 435 threads should be exiting. Note that we arrange for this test 436 to never be true for nested teams. */ 437 if (__builtin_expect (nthreads < old_threads_used, 0)) 438 { 439 long diff = (long) nthreads - (long) old_threads_used; 440 441 gomp_barrier_reinit (&pool->threads_dock, nthreads); 442 443 #ifdef HAVE_SYNC_BUILTINS 444 __sync_fetch_and_add (&gomp_managed_threads, diff); 445 #else 446 gomp_mutex_lock (&gomp_remaining_threads_lock); 447 gomp_managed_threads += diff; 448 gomp_mutex_unlock (&gomp_remaining_threads_lock); 449 #endif 450 } 451 } 452 453 454 /* Terminate the current team. This is only to be called by the master 455 thread. We assume that we must wait for the other threads. */ 456 457 void 458 gomp_team_end (void) 459 { 460 struct gomp_thread *thr = gomp_thread (); 461 struct gomp_team *team = thr->ts.team; 462 463 /* This barrier handles all pending explicit threads. */ 464 gomp_team_barrier_wait (&team->barrier); 465 gomp_fini_work_share (thr->ts.work_share); 466 467 gomp_end_task (); 468 thr->ts = team->prev_ts; 469 470 if (__builtin_expect (thr->ts.team != NULL, 0)) 471 { 472 #ifdef HAVE_SYNC_BUILTINS 473 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 474 #else 475 gomp_mutex_lock (&gomp_remaining_threads_lock); 476 gomp_managed_threads -= team->nthreads - 1L; 477 gomp_mutex_unlock (&gomp_remaining_threads_lock); 478 #endif 479 /* This barrier has gomp_barrier_wait_last counterparts 480 and ensures the team can be safely destroyed. */ 481 gomp_barrier_wait (&team->barrier); 482 } 483 484 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 485 { 486 struct gomp_work_share *ws = team->work_shares[0].next_alloc; 487 do 488 { 489 struct gomp_work_share *next_ws = ws->next_alloc; 490 free (ws); 491 ws = next_ws; 492 } 493 while (ws != NULL); 494 } 495 gomp_sem_destroy (&team->master_release); 496 #ifndef HAVE_SYNC_BUILTINS 497 gomp_mutex_destroy (&team->work_share_list_free_lock); 498 #endif 499 500 if (__builtin_expect (thr->ts.team != NULL, 0) 501 || __builtin_expect (team->nthreads == 1, 0)) 502 free_team (team); 503 else 504 { 505 struct gomp_thread_pool *pool = thr->thread_pool; 506 if (pool->last_team) 507 free_team (pool->last_team); 508 pool->last_team = team; 509 } 510 } 511 512 513 /* Constructors for this file. */ 514 515 static void __attribute__((constructor)) 516 initialize_team (void) 517 { 518 struct gomp_thread *thr; 519 520 #ifndef HAVE_TLS 521 static struct gomp_thread initial_thread_tls_data; 522 523 pthread_key_create (&gomp_tls_key, NULL); 524 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 525 #endif 526 527 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 528 gomp_fatal ("could not create thread pool destructor."); 529 530 #ifdef HAVE_TLS 531 thr = &gomp_tls_data; 532 #else 533 thr = &initial_thread_tls_data; 534 #endif 535 gomp_sem_init (&thr->release, 0); 536 } 537 538 static void __attribute__((destructor)) 539 team_destructor (void) 540 { 541 /* Without this dlclose on libgomp could lead to subsequent 542 crashes. */ 543 pthread_key_delete (gomp_thread_destructor); 544 } 545 546 struct gomp_task_icv * 547 gomp_new_icv (void) 548 { 549 struct gomp_thread *thr = gomp_thread (); 550 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 551 gomp_init_task (task, NULL, &gomp_global_icv); 552 thr->task = task; 553 pthread_setspecific (gomp_thread_destructor, thr); 554 return &task->icv; 555 } 556