xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/team.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
2    Contributed by Richard Henderson <rth@redhat.com>.
3 
4    This file is part of the GNU OpenMP Library (libgomp).
5 
6    Libgomp is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14    more details.
15 
16    Under Section 7 of GPL version 3, you are granted additional
17    permissions described in the GCC Runtime Library Exception, version
18    3.1, as published by the Free Software Foundation.
19 
20    You should have received a copy of the GNU General Public License and
21    a copy of the GCC Runtime Library Exception along with this program;
22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23    <http://www.gnu.org/licenses/>.  */
24 
25 /* This file handles the maintainence of threads in response to team
26    creation and termination.  */
27 
28 #include "libgomp.h"
29 #include <stdlib.h>
30 #include <string.h>
31 
32 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
33 pthread_attr_t gomp_thread_attr;
34 
35 /* This key is for the thread destructor.  */
36 pthread_key_t gomp_thread_destructor;
37 
38 
39 /* This is the libgomp per-thread data structure.  */
40 #ifdef HAVE_TLS
41 __thread struct gomp_thread gomp_tls_data;
42 #else
43 pthread_key_t gomp_tls_key;
44 #endif
45 
46 
47 /* This structure is used to communicate across pthread_create.  */
48 
49 struct gomp_thread_start_data
50 {
51   void (*fn) (void *);
52   void *fn_data;
53   struct gomp_team_state ts;
54   struct gomp_task *task;
55   struct gomp_thread_pool *thread_pool;
56   bool nested;
57 };
58 
59 
60 /* This function is a pthread_create entry point.  This contains the idle
61    loop in which a thread waits to be called up to become part of a team.  */
62 
63 static void *
64 gomp_thread_start (void *xdata)
65 {
66   struct gomp_thread_start_data *data = xdata;
67   struct gomp_thread *thr;
68   struct gomp_thread_pool *pool;
69   void (*local_fn) (void *);
70   void *local_data;
71 
72 #ifdef HAVE_TLS
73   thr = &gomp_tls_data;
74 #else
75   struct gomp_thread local_thr;
76   thr = &local_thr;
77   pthread_setspecific (gomp_tls_key, thr);
78 #endif
79   gomp_sem_init (&thr->release, 0);
80 
81   /* Extract what we need from data.  */
82   local_fn = data->fn;
83   local_data = data->fn_data;
84   thr->thread_pool = data->thread_pool;
85   thr->ts = data->ts;
86   thr->task = data->task;
87 
88   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
89 
90   /* Make thread pool local. */
91   pool = thr->thread_pool;
92 
93   if (data->nested)
94     {
95       struct gomp_team *team = thr->ts.team;
96       struct gomp_task *task = thr->task;
97 
98       gomp_barrier_wait (&team->barrier);
99 
100       local_fn (local_data);
101       gomp_team_barrier_wait (&team->barrier);
102       gomp_finish_task (task);
103       gomp_barrier_wait_last (&team->barrier);
104     }
105   else
106     {
107       pool->threads[thr->ts.team_id] = thr;
108 
109       gomp_barrier_wait (&pool->threads_dock);
110       do
111 	{
112 	  struct gomp_team *team = thr->ts.team;
113 	  struct gomp_task *task = thr->task;
114 
115 	  local_fn (local_data);
116 	  gomp_team_barrier_wait (&team->barrier);
117 	  gomp_finish_task (task);
118 
119 	  gomp_barrier_wait (&pool->threads_dock);
120 
121 	  local_fn = thr->fn;
122 	  local_data = thr->data;
123 	  thr->fn = NULL;
124 	}
125       while (local_fn);
126     }
127 
128   gomp_sem_destroy (&thr->release);
129   return NULL;
130 }
131 
132 
133 /* Create a new team data structure.  */
134 
135 struct gomp_team *
136 gomp_new_team (unsigned nthreads)
137 {
138   struct gomp_team *team;
139   size_t size;
140   int i;
141 
142   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
143 				      + sizeof (team->implicit_task[0]));
144   team = gomp_malloc (size);
145 
146   team->work_share_chunk = 8;
147 #ifdef HAVE_SYNC_BUILTINS
148   team->single_count = 0;
149 #else
150   gomp_mutex_init (&team->work_share_list_free_lock);
151 #endif
152   gomp_init_work_share (&team->work_shares[0], false, nthreads);
153   team->work_shares[0].next_alloc = NULL;
154   team->work_share_list_free = NULL;
155   team->work_share_list_alloc = &team->work_shares[1];
156   for (i = 1; i < 7; i++)
157     team->work_shares[i].next_free = &team->work_shares[i + 1];
158   team->work_shares[i].next_free = NULL;
159 
160   team->nthreads = nthreads;
161   gomp_barrier_init (&team->barrier, nthreads);
162 
163   gomp_sem_init (&team->master_release, 0);
164   team->ordered_release = (void *) &team->implicit_task[nthreads];
165   team->ordered_release[0] = &team->master_release;
166 
167   gomp_mutex_init (&team->task_lock);
168   team->task_queue = NULL;
169   team->task_count = 0;
170   team->task_running_count = 0;
171 
172   return team;
173 }
174 
175 
176 /* Free a team data structure.  */
177 
178 static void
179 free_team (struct gomp_team *team)
180 {
181   gomp_barrier_destroy (&team->barrier);
182   gomp_mutex_destroy (&team->task_lock);
183   free (team);
184 }
185 
186 /* Allocate and initialize a thread pool. */
187 
188 static struct gomp_thread_pool *gomp_new_thread_pool (void)
189 {
190   struct gomp_thread_pool *pool
191     = gomp_malloc (sizeof(struct gomp_thread_pool));
192   pool->threads = NULL;
193   pool->threads_size = 0;
194   pool->threads_used = 0;
195   pool->last_team = NULL;
196   return pool;
197 }
198 
199 static void
200 gomp_free_pool_helper (void *thread_pool)
201 {
202   struct gomp_thread_pool *pool
203     = (struct gomp_thread_pool *) thread_pool;
204   gomp_barrier_wait_last (&pool->threads_dock);
205   gomp_sem_destroy (&gomp_thread ()->release);
206   pthread_exit (NULL);
207 }
208 
209 /* Free a thread pool and release its threads. */
210 
211 static void
212 gomp_free_thread (void *arg __attribute__((unused)))
213 {
214   struct gomp_thread *thr = gomp_thread ();
215   struct gomp_thread_pool *pool = thr->thread_pool;
216   if (pool)
217     {
218       if (pool->threads_used > 0)
219 	{
220 	  int i;
221 	  for (i = 1; i < pool->threads_used; i++)
222 	    {
223 	      struct gomp_thread *nthr = pool->threads[i];
224 	      nthr->fn = gomp_free_pool_helper;
225 	      nthr->data = pool;
226 	    }
227 	  /* This barrier undocks threads docked on pool->threads_dock.  */
228 	  gomp_barrier_wait (&pool->threads_dock);
229 	  /* And this waits till all threads have called gomp_barrier_wait_last
230 	     in gomp_free_pool_helper.  */
231 	  gomp_barrier_wait (&pool->threads_dock);
232 	  /* Now it is safe to destroy the barrier and free the pool.  */
233 	  gomp_barrier_destroy (&pool->threads_dock);
234 	}
235       free (pool->threads);
236       if (pool->last_team)
237 	free_team (pool->last_team);
238       free (pool);
239       thr->thread_pool = NULL;
240     }
241   if (thr->task != NULL)
242     {
243       struct gomp_task *task = thr->task;
244       gomp_end_task ();
245       free (task);
246     }
247 }
248 
249 /* Launch a team.  */
250 
251 void
252 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
253 		 struct gomp_team *team)
254 {
255   struct gomp_thread_start_data *start_data;
256   struct gomp_thread *thr, *nthr;
257   struct gomp_task *task;
258   struct gomp_task_icv *icv;
259   bool nested;
260   struct gomp_thread_pool *pool;
261   unsigned i, n, old_threads_used = 0;
262   pthread_attr_t thread_attr, *attr;
263 
264   thr = gomp_thread ();
265   nested = thr->ts.team != NULL;
266   if (__builtin_expect (thr->thread_pool == NULL, 0))
267     {
268       thr->thread_pool = gomp_new_thread_pool ();
269       pthread_setspecific (gomp_thread_destructor, thr);
270     }
271   pool = thr->thread_pool;
272   task = thr->task;
273   icv = task ? &task->icv : &gomp_global_icv;
274 
275   /* Always save the previous state, even if this isn't a nested team.
276      In particular, we should save any work share state from an outer
277      orphaned work share construct.  */
278   team->prev_ts = thr->ts;
279 
280   thr->ts.team = team;
281   thr->ts.team_id = 0;
282   ++thr->ts.level;
283   if (nthreads > 1)
284     ++thr->ts.active_level;
285   thr->ts.work_share = &team->work_shares[0];
286   thr->ts.last_work_share = NULL;
287 #ifdef HAVE_SYNC_BUILTINS
288   thr->ts.single_count = 0;
289 #endif
290   thr->ts.static_trip = 0;
291   thr->task = &team->implicit_task[0];
292   gomp_init_task (thr->task, task, icv);
293 
294   if (nthreads == 1)
295     return;
296 
297   i = 1;
298 
299   /* We only allow the reuse of idle threads for non-nested PARALLEL
300      regions.  This appears to be implied by the semantics of
301      threadprivate variables, but perhaps that's reading too much into
302      things.  Certainly it does prevent any locking problems, since
303      only the initial program thread will modify gomp_threads.  */
304   if (!nested)
305     {
306       old_threads_used = pool->threads_used;
307 
308       if (nthreads <= old_threads_used)
309 	n = nthreads;
310       else if (old_threads_used == 0)
311 	{
312 	  n = 0;
313 	  gomp_barrier_init (&pool->threads_dock, nthreads);
314 	}
315       else
316 	{
317 	  n = old_threads_used;
318 
319 	  /* Increase the barrier threshold to make sure all new
320 	     threads arrive before the team is released.  */
321 	  gomp_barrier_reinit (&pool->threads_dock, nthreads);
322 	}
323 
324       /* Not true yet, but soon will be.  We're going to release all
325 	 threads from the dock, and those that aren't part of the
326 	 team will exit.  */
327       pool->threads_used = nthreads;
328 
329       /* Release existing idle threads.  */
330       for (; i < n; ++i)
331 	{
332 	  nthr = pool->threads[i];
333 	  nthr->ts.team = team;
334 	  nthr->ts.work_share = &team->work_shares[0];
335 	  nthr->ts.last_work_share = NULL;
336 	  nthr->ts.team_id = i;
337 	  nthr->ts.level = team->prev_ts.level + 1;
338 	  nthr->ts.active_level = thr->ts.active_level;
339 #ifdef HAVE_SYNC_BUILTINS
340 	  nthr->ts.single_count = 0;
341 #endif
342 	  nthr->ts.static_trip = 0;
343 	  nthr->task = &team->implicit_task[i];
344 	  gomp_init_task (nthr->task, task, icv);
345 	  nthr->fn = fn;
346 	  nthr->data = data;
347 	  team->ordered_release[i] = &nthr->release;
348 	}
349 
350       if (i == nthreads)
351 	goto do_release;
352 
353       /* If necessary, expand the size of the gomp_threads array.  It is
354 	 expected that changes in the number of threads are rare, thus we
355 	 make no effort to expand gomp_threads_size geometrically.  */
356       if (nthreads >= pool->threads_size)
357 	{
358 	  pool->threads_size = nthreads + 1;
359 	  pool->threads
360 	    = gomp_realloc (pool->threads,
361 			    pool->threads_size
362 			    * sizeof (struct gomp_thread_data *));
363 	}
364     }
365 
366   if (__builtin_expect (nthreads > old_threads_used, 0))
367     {
368       long diff = (long) nthreads - (long) old_threads_used;
369 
370       if (old_threads_used == 0)
371 	--diff;
372 
373 #ifdef HAVE_SYNC_BUILTINS
374       __sync_fetch_and_add (&gomp_managed_threads, diff);
375 #else
376       gomp_mutex_lock (&gomp_remaining_threads_lock);
377       gomp_managed_threads += diff;
378       gomp_mutex_unlock (&gomp_remaining_threads_lock);
379 #endif
380     }
381 
382   attr = &gomp_thread_attr;
383   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
384     {
385       size_t stacksize;
386       pthread_attr_init (&thread_attr);
387       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
388       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
389 	pthread_attr_setstacksize (&thread_attr, stacksize);
390       attr = &thread_attr;
391     }
392 
393   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
394 			    * (nthreads-i));
395 
396   /* Launch new threads.  */
397   for (; i < nthreads; ++i, ++start_data)
398     {
399       pthread_t pt;
400       int err;
401 
402       start_data->fn = fn;
403       start_data->fn_data = data;
404       start_data->ts.team = team;
405       start_data->ts.work_share = &team->work_shares[0];
406       start_data->ts.last_work_share = NULL;
407       start_data->ts.team_id = i;
408       start_data->ts.level = team->prev_ts.level + 1;
409       start_data->ts.active_level = thr->ts.active_level;
410 #ifdef HAVE_SYNC_BUILTINS
411       start_data->ts.single_count = 0;
412 #endif
413       start_data->ts.static_trip = 0;
414       start_data->task = &team->implicit_task[i];
415       gomp_init_task (start_data->task, task, icv);
416       start_data->thread_pool = pool;
417       start_data->nested = nested;
418 
419       if (gomp_cpu_affinity != NULL)
420 	gomp_init_thread_affinity (attr);
421 
422       err = pthread_create (&pt, attr, gomp_thread_start, start_data);
423       if (err != 0)
424 	gomp_fatal ("Thread creation failed: %s", strerror (err));
425     }
426 
427   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
428     pthread_attr_destroy (&thread_attr);
429 
430  do_release:
431   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
432 
433   /* Decrease the barrier threshold to match the number of threads
434      that should arrive back at the end of this team.  The extra
435      threads should be exiting.  Note that we arrange for this test
436      to never be true for nested teams.  */
437   if (__builtin_expect (nthreads < old_threads_used, 0))
438     {
439       long diff = (long) nthreads - (long) old_threads_used;
440 
441       gomp_barrier_reinit (&pool->threads_dock, nthreads);
442 
443 #ifdef HAVE_SYNC_BUILTINS
444       __sync_fetch_and_add (&gomp_managed_threads, diff);
445 #else
446       gomp_mutex_lock (&gomp_remaining_threads_lock);
447       gomp_managed_threads += diff;
448       gomp_mutex_unlock (&gomp_remaining_threads_lock);
449 #endif
450     }
451 }
452 
453 
454 /* Terminate the current team.  This is only to be called by the master
455    thread.  We assume that we must wait for the other threads.  */
456 
457 void
458 gomp_team_end (void)
459 {
460   struct gomp_thread *thr = gomp_thread ();
461   struct gomp_team *team = thr->ts.team;
462 
463   /* This barrier handles all pending explicit threads.  */
464   gomp_team_barrier_wait (&team->barrier);
465   gomp_fini_work_share (thr->ts.work_share);
466 
467   gomp_end_task ();
468   thr->ts = team->prev_ts;
469 
470   if (__builtin_expect (thr->ts.team != NULL, 0))
471     {
472 #ifdef HAVE_SYNC_BUILTINS
473       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
474 #else
475       gomp_mutex_lock (&gomp_remaining_threads_lock);
476       gomp_managed_threads -= team->nthreads - 1L;
477       gomp_mutex_unlock (&gomp_remaining_threads_lock);
478 #endif
479       /* This barrier has gomp_barrier_wait_last counterparts
480 	 and ensures the team can be safely destroyed.  */
481       gomp_barrier_wait (&team->barrier);
482     }
483 
484   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
485     {
486       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
487       do
488 	{
489 	  struct gomp_work_share *next_ws = ws->next_alloc;
490 	  free (ws);
491 	  ws = next_ws;
492 	}
493       while (ws != NULL);
494     }
495   gomp_sem_destroy (&team->master_release);
496 #ifndef HAVE_SYNC_BUILTINS
497   gomp_mutex_destroy (&team->work_share_list_free_lock);
498 #endif
499 
500   if (__builtin_expect (thr->ts.team != NULL, 0)
501       || __builtin_expect (team->nthreads == 1, 0))
502     free_team (team);
503   else
504     {
505       struct gomp_thread_pool *pool = thr->thread_pool;
506       if (pool->last_team)
507 	free_team (pool->last_team);
508       pool->last_team = team;
509     }
510 }
511 
512 
513 /* Constructors for this file.  */
514 
515 static void __attribute__((constructor))
516 initialize_team (void)
517 {
518   struct gomp_thread *thr;
519 
520 #ifndef HAVE_TLS
521   static struct gomp_thread initial_thread_tls_data;
522 
523   pthread_key_create (&gomp_tls_key, NULL);
524   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
525 #endif
526 
527   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
528     gomp_fatal ("could not create thread pool destructor.");
529 
530 #ifdef HAVE_TLS
531   thr = &gomp_tls_data;
532 #else
533   thr = &initial_thread_tls_data;
534 #endif
535   gomp_sem_init (&thr->release, 0);
536 }
537 
538 static void __attribute__((destructor))
539 team_destructor (void)
540 {
541   /* Without this dlclose on libgomp could lead to subsequent
542      crashes.  */
543   pthread_key_delete (gomp_thread_destructor);
544 }
545 
546 struct gomp_task_icv *
547 gomp_new_icv (void)
548 {
549   struct gomp_thread *thr = gomp_thread ();
550   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
551   gomp_init_task (task, NULL, &gomp_global_icv);
552   thr->task = task;
553   pthread_setspecific (gomp_thread_destructor, thr);
554   return &task->icv;
555 }
556