xref: /netbsd-src/lib/libpthread/pthread.c (revision 5bbd2a12505d72a8177929a37b5cee489d0a1cfd)
1 /*	$NetBSD: pthread.c,v 1.137 2012/08/15 13:28:32 drochner Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: pthread.c,v 1.137 2012/08/15 13:28:32 drochner Exp $");
34 
35 #define	__EXPOSE_STACK	1
36 
37 #include <sys/param.h>
38 #include <sys/exec_elf.h>
39 #include <sys/mman.h>
40 #include <sys/lwp.h>
41 #include <sys/lwpctl.h>
42 #include <sys/tls.h>
43 
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <lwp.h>
49 #include <signal.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <syslog.h>
54 #include <ucontext.h>
55 #include <unistd.h>
56 #include <sched.h>
57 
58 #include "pthread.h"
59 #include "pthread_int.h"
60 
61 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
62 RB_HEAD(__pthread__alltree, __pthread_st) pthread__alltree;
63 
64 #ifndef lint
65 static int	pthread__cmp(struct __pthread_st *, struct __pthread_st *);
66 RB_PROTOTYPE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
67 #endif
68 
69 static void	pthread__create_tramp(void *);
70 static void	pthread__initthread(pthread_t);
71 static void	pthread__scrubthread(pthread_t, char *, int);
72 static void	pthread__initmain(pthread_t *);
73 static void	pthread__fork_callback(void);
74 static void	pthread__reap(pthread_t);
75 static void	pthread__child_callback(void);
76 static void	pthread__start(void);
77 
78 void	pthread__init(void);
79 
80 int pthread__started;
81 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
82 pthread_queue_t pthread__deadqueue;
83 pthread_queue_t pthread__allqueue;
84 
85 static pthread_attr_t pthread_default_attr;
86 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
87 
88 enum {
89 	DIAGASSERT_ABORT =	1<<0,
90 	DIAGASSERT_STDERR =	1<<1,
91 	DIAGASSERT_SYSLOG =	1<<2
92 };
93 
94 static int pthread__diagassert;
95 
96 int pthread__concurrency;
97 int pthread__nspins;
98 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
99 int pthread__dbg;	/* set by libpthread_dbg if active */
100 
101 /*
102  * We have to initialize the pthread_stack* variables here because
103  * mutexes are used before pthread_init() and thus pthread__initmain()
104  * are called.  Since mutexes only save the stack pointer and not a
105  * pointer to the thread data, it is safe to change the mapping from
106  * stack pointer to thread data afterwards.
107  */
108 size_t	pthread__stacksize;
109 size_t	pthread__pagesize;
110 static struct __pthread_st pthread__main;
111 
112 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
113 
114 __strong_alias(__libc_thr_self,pthread_self)
115 __strong_alias(__libc_thr_create,pthread_create)
116 __strong_alias(__libc_thr_exit,pthread_exit)
117 __strong_alias(__libc_thr_errno,pthread__errno)
118 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
119 __strong_alias(__libc_thr_equal,pthread_equal)
120 __strong_alias(__libc_thr_init,pthread__init)
121 
122 /*
123  * Static library kludge.  Place a reference to a symbol any library
124  * file which does not already have a reference here.
125  */
126 extern int pthread__cancel_stub_binder;
127 
128 void *pthread__static_lib_binder[] = {
129 	&pthread__cancel_stub_binder,
130 	pthread_cond_init,
131 	pthread_mutex_init,
132 	pthread_rwlock_init,
133 	pthread_barrier_init,
134 	pthread_key_create,
135 	pthread_setspecific,
136 };
137 
138 #define	NHASHLOCK	64
139 
140 static union hashlock {
141 	pthread_mutex_t	mutex;
142 	char		pad[64];
143 } hashlocks[NHASHLOCK] __aligned(64);
144 
145 /*
146  * This needs to be started by the library loading code, before main()
147  * gets to run, for various things that use the state of the initial thread
148  * to work properly (thread-specific data is an application-visible example;
149  * spinlock counts for mutexes is an internal example).
150  */
151 void
152 pthread__init(void)
153 {
154 	pthread_t first;
155 	char *p;
156 	int i;
157 	extern int __isthreaded;
158 
159 	pthread__pagesize = (size_t)sysconf(_SC_PAGESIZE);
160 	pthread__concurrency = sysconf(_SC_NPROCESSORS_CONF);
161 
162 	/* Initialize locks first; they're needed elsewhere. */
163 	pthread__lockprim_init();
164 	for (i = 0; i < NHASHLOCK; i++) {
165 		pthread_mutex_init(&hashlocks[i].mutex, NULL);
166 	}
167 
168 	/* Fetch parameters. */
169 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
170 	if (i == -1)
171 		err(1, "_lwp_unpark_all");
172 	if (i < pthread__unpark_max)
173 		pthread__unpark_max = i;
174 
175 	/* Basic data structure setup */
176 	pthread_attr_init(&pthread_default_attr);
177 	PTQ_INIT(&pthread__allqueue);
178 	PTQ_INIT(&pthread__deadqueue);
179 	RB_INIT(&pthread__alltree);
180 
181 	/* Create the thread structure corresponding to main() */
182 	pthread__initmain(&first);
183 	pthread__initthread(first);
184 	pthread__scrubthread(first, NULL, 0);
185 
186 	first->pt_lid = _lwp_self();
187 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
188 	RB_INSERT(__pthread__alltree, &pthread__alltree, first);
189 
190 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
191 		err(1, "_lwp_ctl");
192 	}
193 
194 	/* Start subsystems */
195 	PTHREAD_MD_INIT
196 
197 	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
198 		switch (*p) {
199 		case 'a':
200 			pthread__diagassert |= DIAGASSERT_ABORT;
201 			break;
202 		case 'A':
203 			pthread__diagassert &= ~DIAGASSERT_ABORT;
204 			break;
205 		case 'e':
206 			pthread__diagassert |= DIAGASSERT_STDERR;
207 			break;
208 		case 'E':
209 			pthread__diagassert &= ~DIAGASSERT_STDERR;
210 			break;
211 		case 'l':
212 			pthread__diagassert |= DIAGASSERT_SYSLOG;
213 			break;
214 		case 'L':
215 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
216 			break;
217 		}
218 	}
219 
220 	/* Tell libc that we're here and it should role-play accordingly. */
221 	pthread_atfork(NULL, NULL, pthread__fork_callback);
222 	__isthreaded = 1;
223 }
224 
225 static void
226 pthread__fork_callback(void)
227 {
228 	struct __pthread_st *self = pthread__self();
229 
230 	/* lwpctl state is not copied across fork. */
231 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
232 		err(1, "_lwp_ctl");
233 	}
234 	self->pt_lid = _lwp_self();
235 }
236 
237 static void
238 pthread__child_callback(void)
239 {
240 
241 	/*
242 	 * Clean up data structures that a forked child process might
243 	 * trip over. Note that if threads have been created (causing
244 	 * this handler to be registered) the standards say that the
245 	 * child will trigger undefined behavior if it makes any
246 	 * pthread_* calls (or any other calls that aren't
247 	 * async-signal-safe), so we don't really have to clean up
248 	 * much. Anything that permits some pthread_* calls to work is
249 	 * merely being polite.
250 	 */
251 	pthread__started = 0;
252 }
253 
254 static void
255 pthread__start(void)
256 {
257 
258 	/*
259 	 * Per-process timers are cleared by fork(); despite the
260 	 * various restrictions on fork() and threads, it's legal to
261 	 * fork() before creating any threads.
262 	 */
263 	pthread_atfork(NULL, NULL, pthread__child_callback);
264 }
265 
266 
267 /* General-purpose thread data structure sanitization. */
268 /* ARGSUSED */
269 static void
270 pthread__initthread(pthread_t t)
271 {
272 
273 	t->pt_self = t;
274 	t->pt_magic = PT_MAGIC;
275 	t->pt_willpark = 0;
276 	t->pt_unpark = 0;
277 	t->pt_nwaiters = 0;
278 	t->pt_sleepobj = NULL;
279 	t->pt_signalled = 0;
280 	t->pt_havespecific = 0;
281 	t->pt_early = NULL;
282 	t->pt_lwpctl = &pthread__dummy_lwpctl;
283 	t->pt_blocking = 0;
284 	t->pt_droplock = NULL;
285 
286 	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
287 	pthread_mutex_init(&t->pt_lock, NULL);
288 	PTQ_INIT(&t->pt_cleanup_stack);
289 	pthread_cond_init(&t->pt_joiners, NULL);
290 	memset(&t->pt_specific, 0, sizeof(t->pt_specific));
291 }
292 
293 static void
294 pthread__scrubthread(pthread_t t, char *name, int flags)
295 {
296 
297 	t->pt_state = PT_STATE_RUNNING;
298 	t->pt_exitval = NULL;
299 	t->pt_flags = flags;
300 	t->pt_cancel = 0;
301 	t->pt_errno = 0;
302 	t->pt_name = name;
303 	t->pt_lid = 0;
304 }
305 
306 static int
307 pthread__getstack(pthread_t newthread, const pthread_attr_t *attr)
308 {
309 	void *stackbase, *stackbase2, *redzone;
310 	size_t stacksize, guardsize;
311 	bool allocated;
312 
313 	if (attr != NULL) {
314 		pthread_attr_getstack(attr, &stackbase, &stacksize);
315 	} else {
316 		stackbase = NULL;
317 		stacksize = 0;
318 	}
319 	if (stacksize == 0)
320 		stacksize = pthread__stacksize;
321 
322 	if (newthread->pt_stack_allocated) {
323 		if (stackbase == NULL &&
324 		    newthread->pt_stack.ss_size == stacksize)
325 			return 0;
326 		stackbase2 = newthread->pt_stack.ss_sp;
327 #ifndef __MACHINE_STACK_GROWS_UP
328 		stackbase2 = (char *)stackbase2 - newthread->pt_guardsize;
329 #endif
330 		munmap(stackbase2,
331 		    newthread->pt_stack.ss_size + newthread->pt_guardsize);
332 		newthread->pt_stack.ss_sp = NULL;
333 		newthread->pt_stack.ss_size = 0;
334 		newthread->pt_guardsize = 0;
335 		newthread->pt_stack_allocated = false;
336 	}
337 
338 	newthread->pt_stack_allocated = false;
339 
340 	if (stackbase == NULL) {
341 		stacksize = ((stacksize - 1) | (pthread__pagesize - 1)) + 1;
342 		guardsize = pthread__pagesize;
343 		stackbase = mmap(NULL, stacksize + guardsize,
344 		    PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, (off_t)0);
345 		if (stackbase == MAP_FAILED)
346 			return ENOMEM;
347 		allocated = true;
348 	} else {
349 		guardsize = 0;
350 		allocated = false;
351 	}
352 #ifdef __MACHINE_STACK_GROWS_UP
353 	redzone = (char *)stackbase + stacksize;
354 	stackbase2 = (char *)stackbase;
355 #else
356 	redzone = (char *)stackbase;
357 	stackbase2 = (char *)stackbase + guardsize;
358 #endif
359 	if (allocated && guardsize &&
360 	    mprotect(redzone, guardsize, PROT_NONE) == -1) {
361 		munmap(stackbase, stacksize + guardsize);
362 		return EPERM;
363 	}
364 	newthread->pt_stack.ss_size = stacksize;
365 	newthread->pt_stack.ss_sp = stackbase2;
366 	newthread->pt_guardsize = guardsize;
367 	newthread->pt_stack_allocated = allocated;
368 	return 0;
369 }
370 
371 int
372 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
373 	    void *(*startfunc)(void *), void *arg)
374 {
375 	pthread_t newthread;
376 	pthread_attr_t nattr;
377 	struct pthread_attr_private *p;
378 	char * volatile name;
379 	unsigned long flag;
380 	void *private_area;
381 	int ret;
382 
383 	/*
384 	 * It's okay to check this without a lock because there can
385 	 * only be one thread before it becomes true.
386 	 */
387 	if (pthread__started == 0) {
388 		pthread__start();
389 		pthread__started = 1;
390 	}
391 
392 	if (attr == NULL)
393 		nattr = pthread_default_attr;
394 	else if (attr->pta_magic == PT_ATTR_MAGIC)
395 		nattr = *attr;
396 	else
397 		return EINVAL;
398 
399 	/* Fetch misc. attributes from the attr structure. */
400 	name = NULL;
401 	if ((p = nattr.pta_private) != NULL)
402 		if (p->ptap_name[0] != '\0')
403 			if ((name = strdup(p->ptap_name)) == NULL)
404 				return ENOMEM;
405 
406 	newthread = NULL;
407 
408 	/*
409 	 * Try to reclaim a dead thread.
410 	 */
411 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
412 		pthread_mutex_lock(&pthread__deadqueue_lock);
413 		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
414 			/* Still running? */
415 			if (newthread->pt_lwpctl->lc_curcpu ==
416 			    LWPCTL_CPU_EXITED ||
417 			    (_lwp_kill(newthread->pt_lid, 0) == -1 &&
418 			    errno == ESRCH))
419 				break;
420 		}
421 		if (newthread)
422 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
423 		pthread_mutex_unlock(&pthread__deadqueue_lock);
424 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
425 		if (newthread && newthread->pt_tls) {
426 			_rtld_tls_free(newthread->pt_tls);
427 			newthread->pt_tls = NULL;
428 		}
429 #endif
430 	}
431 
432 	/*
433 	 * If necessary set up a stack, allocate space for a pthread_st,
434 	 * and initialize it.
435 	 */
436 	if (newthread == NULL) {
437 		newthread = malloc(sizeof(*newthread));
438 		if (newthread == NULL) {
439 			free(name);
440 			return ENOMEM;
441 		}
442 		newthread->pt_stack_allocated = false;
443 
444 		if (pthread__getstack(newthread, attr)) {
445 			free(newthread);
446 			free(name);
447 			return ENOMEM;
448 		}
449 
450 		/* This is used only when creating the thread. */
451 		_INITCONTEXT_U(&newthread->pt_uc);
452 		newthread->pt_uc.uc_stack = newthread->pt_stack;
453 		newthread->pt_uc.uc_link = NULL;
454 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
455 		newthread->pt_tls = NULL;
456 #endif
457 
458 		/* Add to list of all threads. */
459 		pthread_rwlock_wrlock(&pthread__alltree_lock);
460 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
461 		RB_INSERT(__pthread__alltree, &pthread__alltree, newthread);
462 		pthread_rwlock_unlock(&pthread__alltree_lock);
463 
464 		/* Will be reset by the thread upon exit. */
465 		pthread__initthread(newthread);
466 	} else {
467 		if (pthread__getstack(newthread, attr)) {
468 			pthread_mutex_lock(&pthread__deadqueue_lock);
469 			PTQ_INSERT_TAIL(&pthread__deadqueue, newthread, pt_deadq);
470 			pthread_mutex_unlock(&pthread__deadqueue_lock);
471 			return ENOMEM;
472 		}
473 		_INITCONTEXT_U(&newthread->pt_uc);
474 		newthread->pt_uc.uc_stack = newthread->pt_stack;
475 		newthread->pt_uc.uc_link = NULL;
476 	}
477 
478 	/*
479 	 * Create the new LWP.
480 	 */
481 	pthread__scrubthread(newthread, name, nattr.pta_flags);
482 	newthread->pt_func = startfunc;
483 	newthread->pt_arg = arg;
484 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
485 	private_area = newthread->pt_tls = _rtld_tls_allocate();
486 	newthread->pt_tls->tcb_pthread = newthread;
487 #else
488 	private_area = newthread;
489 #endif
490 
491 	_lwp_makecontext(&newthread->pt_uc, pthread__create_tramp,
492 	    newthread, private_area, newthread->pt_stack.ss_sp,
493 	    newthread->pt_stack.ss_size);
494 
495 	flag = LWP_DETACHED;
496 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
497 	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
498 		flag |= LWP_SUSPENDED;
499 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
500 	if (ret != 0) {
501 		ret = errno;
502 		pthread_mutex_lock(&newthread->pt_lock);
503 		/* Will unlock and free name. */
504 		pthread__reap(newthread);
505 		return ret;
506 	}
507 
508 	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
509 		if (p != NULL) {
510 			(void)pthread_setschedparam(newthread, p->ptap_policy,
511 			    &p->ptap_sp);
512 		}
513 		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
514 			(void)_lwp_continue(newthread->pt_lid);
515 		}
516 	}
517 
518 	*thread = newthread;
519 
520 	return 0;
521 }
522 
523 
524 __dead static void
525 pthread__create_tramp(void *cookie)
526 {
527 	pthread_t self;
528 	void *retval;
529 
530 	self = cookie;
531 
532 	/*
533 	 * Throw away some stack in a feeble attempt to reduce cache
534 	 * thrash.  May help for SMT processors.  XXX We should not
535 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
536 	 * thread register or decent thread local storage.
537 	 *
538 	 * Note that we may race with the kernel in _lwp_create(),
539 	 * and so pt_lid can be unset at this point, but we don't
540 	 * care.
541 	 */
542 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
543 
544 	if (self->pt_name != NULL) {
545 		pthread_mutex_lock(&self->pt_lock);
546 		if (self->pt_name != NULL)
547 			(void)_lwp_setname(0, self->pt_name);
548 		pthread_mutex_unlock(&self->pt_lock);
549 	}
550 
551 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
552 		err(1, "_lwp_ctl");
553 	}
554 
555 	retval = (*self->pt_func)(self->pt_arg);
556 
557 	pthread_exit(retval);
558 
559 	/*NOTREACHED*/
560 	pthread__abort();
561 }
562 
563 int
564 pthread_suspend_np(pthread_t thread)
565 {
566 	pthread_t self;
567 
568 	self = pthread__self();
569 	if (self == thread) {
570 		return EDEADLK;
571 	}
572 	if (pthread__find(thread) != 0)
573 		return ESRCH;
574 	if (_lwp_suspend(thread->pt_lid) == 0)
575 		return 0;
576 	return errno;
577 }
578 
579 int
580 pthread_resume_np(pthread_t thread)
581 {
582 
583 	if (pthread__find(thread) != 0)
584 		return ESRCH;
585 	if (_lwp_continue(thread->pt_lid) == 0)
586 		return 0;
587 	return errno;
588 }
589 
590 void
591 pthread_exit(void *retval)
592 {
593 	pthread_t self;
594 	struct pt_clean_t *cleanup;
595 	char *name;
596 
597 	self = pthread__self();
598 
599 	/* Disable cancellability. */
600 	pthread_mutex_lock(&self->pt_lock);
601 	self->pt_flags |= PT_FLAG_CS_DISABLED;
602 	self->pt_cancel = 0;
603 
604 	/* Call any cancellation cleanup handlers */
605 	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
606 		pthread_mutex_unlock(&self->pt_lock);
607 		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
608 			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
609 			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
610 			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
611 		}
612 		pthread_mutex_lock(&self->pt_lock);
613 	}
614 
615 	/* Perform cleanup of thread-specific data */
616 	pthread__destroy_tsd(self);
617 
618 	/* Signal our exit. */
619 	self->pt_exitval = retval;
620 	if (self->pt_flags & PT_FLAG_DETACHED) {
621 		self->pt_state = PT_STATE_DEAD;
622 		name = self->pt_name;
623 		self->pt_name = NULL;
624 		pthread_mutex_unlock(&self->pt_lock);
625 		if (name != NULL)
626 			free(name);
627 		pthread_mutex_lock(&pthread__deadqueue_lock);
628 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
629 		pthread_mutex_unlock(&pthread__deadqueue_lock);
630 		_lwp_exit();
631 	} else {
632 		self->pt_state = PT_STATE_ZOMBIE;
633 		pthread_cond_broadcast(&self->pt_joiners);
634 		pthread_mutex_unlock(&self->pt_lock);
635 		/* Note: name will be freed by the joiner. */
636 		_lwp_exit();
637 	}
638 
639 	/*NOTREACHED*/
640 	pthread__abort();
641 	exit(1);
642 }
643 
644 
645 int
646 pthread_join(pthread_t thread, void **valptr)
647 {
648 	pthread_t self;
649 	int error;
650 
651 	self = pthread__self();
652 
653 	if (pthread__find(thread) != 0)
654 		return ESRCH;
655 
656 	if (thread->pt_magic != PT_MAGIC)
657 		return EINVAL;
658 
659 	if (thread == self)
660 		return EDEADLK;
661 
662 	self->pt_droplock = &thread->pt_lock;
663 	pthread_mutex_lock(&thread->pt_lock);
664 	for (;;) {
665 		if (thread->pt_state == PT_STATE_ZOMBIE)
666 			break;
667 		if (thread->pt_state == PT_STATE_DEAD) {
668 			pthread_mutex_unlock(&thread->pt_lock);
669 			self->pt_droplock = NULL;
670 			return ESRCH;
671 		}
672 		if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
673 			pthread_mutex_unlock(&thread->pt_lock);
674 			self->pt_droplock = NULL;
675 			return EINVAL;
676 		}
677 		error = pthread_cond_wait(&thread->pt_joiners,
678 		    &thread->pt_lock);
679 		if (error != 0) {
680 			pthread__errorfunc(__FILE__, __LINE__,
681 			    __func__, "unexpected return from cond_wait()");
682 		}
683 
684 	}
685 	pthread__testcancel(self);
686 	if (valptr != NULL)
687 		*valptr = thread->pt_exitval;
688 	/* pthread__reap() will drop the lock. */
689 	pthread__reap(thread);
690 	self->pt_droplock = NULL;
691 
692 	return 0;
693 }
694 
695 static void
696 pthread__reap(pthread_t thread)
697 {
698 	char *name;
699 
700 	name = thread->pt_name;
701 	thread->pt_name = NULL;
702 	thread->pt_state = PT_STATE_DEAD;
703 	pthread_mutex_unlock(&thread->pt_lock);
704 
705 	pthread_mutex_lock(&pthread__deadqueue_lock);
706 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
707 	pthread_mutex_unlock(&pthread__deadqueue_lock);
708 
709 	if (name != NULL)
710 		free(name);
711 }
712 
713 int
714 pthread_equal(pthread_t t1, pthread_t t2)
715 {
716 
717 	/* Nothing special here. */
718 	return (t1 == t2);
719 }
720 
721 
722 int
723 pthread_detach(pthread_t thread)
724 {
725 
726 	if (pthread__find(thread) != 0)
727 		return ESRCH;
728 
729 	if (thread->pt_magic != PT_MAGIC)
730 		return EINVAL;
731 
732 	pthread_mutex_lock(&thread->pt_lock);
733 	thread->pt_flags |= PT_FLAG_DETACHED;
734 	if (thread->pt_state == PT_STATE_ZOMBIE) {
735 		/* pthread__reap() will drop the lock. */
736 		pthread__reap(thread);
737 	} else {
738 		/*
739 		 * Not valid for threads to be waiting in
740 		 * pthread_join() (there are intractable
741 		 * sync issues from the application
742 		 * perspective), but give those threads
743 		 * a chance anyway.
744 		 */
745 		pthread_cond_broadcast(&thread->pt_joiners);
746 		pthread_mutex_unlock(&thread->pt_lock);
747 	}
748 
749 	return 0;
750 }
751 
752 
753 int
754 pthread_getname_np(pthread_t thread, char *name, size_t len)
755 {
756 
757 	if (pthread__find(thread) != 0)
758 		return ESRCH;
759 
760 	if (thread->pt_magic != PT_MAGIC)
761 		return EINVAL;
762 
763 	pthread_mutex_lock(&thread->pt_lock);
764 	if (thread->pt_name == NULL)
765 		name[0] = '\0';
766 	else
767 		strlcpy(name, thread->pt_name, len);
768 	pthread_mutex_unlock(&thread->pt_lock);
769 
770 	return 0;
771 }
772 
773 
774 int
775 pthread_setname_np(pthread_t thread, const char *name, void *arg)
776 {
777 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
778 	int namelen;
779 
780 	if (pthread__find(thread) != 0)
781 		return ESRCH;
782 
783 	if (thread->pt_magic != PT_MAGIC)
784 		return EINVAL;
785 
786 	namelen = snprintf(newname, sizeof(newname), name, arg);
787 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
788 		return EINVAL;
789 
790 	cp = strdup(newname);
791 	if (cp == NULL)
792 		return ENOMEM;
793 
794 	pthread_mutex_lock(&thread->pt_lock);
795 	oldname = thread->pt_name;
796 	thread->pt_name = cp;
797 	(void)_lwp_setname(thread->pt_lid, cp);
798 	pthread_mutex_unlock(&thread->pt_lock);
799 
800 	if (oldname != NULL)
801 		free(oldname);
802 
803 	return 0;
804 }
805 
806 
807 
808 /*
809  * XXX There should be a way for applications to use the efficent
810  *  inline version, but there are opacity/namespace issues.
811  */
812 pthread_t
813 pthread_self(void)
814 {
815 
816 	return pthread__self();
817 }
818 
819 
820 int
821 pthread_cancel(pthread_t thread)
822 {
823 
824 	if (pthread__find(thread) != 0)
825 		return ESRCH;
826 	pthread_mutex_lock(&thread->pt_lock);
827 	thread->pt_flags |= PT_FLAG_CS_PENDING;
828 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
829 		thread->pt_cancel = 1;
830 		pthread_mutex_unlock(&thread->pt_lock);
831 		_lwp_wakeup(thread->pt_lid);
832 	} else
833 		pthread_mutex_unlock(&thread->pt_lock);
834 
835 	return 0;
836 }
837 
838 
839 int
840 pthread_setcancelstate(int state, int *oldstate)
841 {
842 	pthread_t self;
843 	int retval;
844 
845 	self = pthread__self();
846 	retval = 0;
847 
848 	pthread_mutex_lock(&self->pt_lock);
849 
850 	if (oldstate != NULL) {
851 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
852 			*oldstate = PTHREAD_CANCEL_DISABLE;
853 		else
854 			*oldstate = PTHREAD_CANCEL_ENABLE;
855 	}
856 
857 	if (state == PTHREAD_CANCEL_DISABLE) {
858 		self->pt_flags |= PT_FLAG_CS_DISABLED;
859 		if (self->pt_cancel) {
860 			self->pt_flags |= PT_FLAG_CS_PENDING;
861 			self->pt_cancel = 0;
862 		}
863 	} else if (state == PTHREAD_CANCEL_ENABLE) {
864 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
865 		/*
866 		 * If a cancellation was requested while cancellation
867 		 * was disabled, note that fact for future
868 		 * cancellation tests.
869 		 */
870 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
871 			self->pt_cancel = 1;
872 			/* This is not a deferred cancellation point. */
873 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
874 				pthread_mutex_unlock(&self->pt_lock);
875 				pthread__cancelled();
876 			}
877 		}
878 	} else
879 		retval = EINVAL;
880 
881 	pthread_mutex_unlock(&self->pt_lock);
882 
883 	return retval;
884 }
885 
886 
887 int
888 pthread_setcanceltype(int type, int *oldtype)
889 {
890 	pthread_t self;
891 	int retval;
892 
893 	self = pthread__self();
894 	retval = 0;
895 
896 	pthread_mutex_lock(&self->pt_lock);
897 
898 	if (oldtype != NULL) {
899 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
900 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
901 		else
902 			*oldtype = PTHREAD_CANCEL_DEFERRED;
903 	}
904 
905 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
906 		self->pt_flags |= PT_FLAG_CS_ASYNC;
907 		if (self->pt_cancel) {
908 			pthread_mutex_unlock(&self->pt_lock);
909 			pthread__cancelled();
910 		}
911 	} else if (type == PTHREAD_CANCEL_DEFERRED)
912 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
913 	else
914 		retval = EINVAL;
915 
916 	pthread_mutex_unlock(&self->pt_lock);
917 
918 	return retval;
919 }
920 
921 
922 void
923 pthread_testcancel(void)
924 {
925 	pthread_t self;
926 
927 	self = pthread__self();
928 	if (self->pt_cancel)
929 		pthread__cancelled();
930 }
931 
932 
933 /*
934  * POSIX requires that certain functions return an error rather than
935  * invoking undefined behavior even when handed completely bogus
936  * pthread_t values, e.g. stack garbage or (pthread_t)666. This
937  * utility routine searches the list of threads for the pthread_t
938  * value without dereferencing it.
939  */
940 int
941 pthread__find(pthread_t id)
942 {
943 	pthread_t target;
944 
945 	pthread_rwlock_rdlock(&pthread__alltree_lock);
946 	/* LINTED */
947 	target = RB_FIND(__pthread__alltree, &pthread__alltree, id);
948 	pthread_rwlock_unlock(&pthread__alltree_lock);
949 
950 	if (target == NULL || target->pt_state == PT_STATE_DEAD)
951 		return ESRCH;
952 
953 	return 0;
954 }
955 
956 
957 void
958 pthread__testcancel(pthread_t self)
959 {
960 
961 	if (self->pt_cancel)
962 		pthread__cancelled();
963 }
964 
965 
966 void
967 pthread__cancelled(void)
968 {
969 	pthread_mutex_t *droplock;
970 	pthread_t self;
971 
972 	self = pthread__self();
973 	droplock = self->pt_droplock;
974 	self->pt_droplock = NULL;
975 
976 	if (droplock != NULL && pthread_mutex_held_np(droplock))
977 		pthread_mutex_unlock(droplock);
978 
979 	pthread_exit(PTHREAD_CANCELED);
980 }
981 
982 
983 void
984 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
985 {
986 	pthread_t self;
987 	struct pt_clean_t *entry;
988 
989 	self = pthread__self();
990 	entry = store;
991 	entry->ptc_cleanup = cleanup;
992 	entry->ptc_arg = arg;
993 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
994 }
995 
996 
997 void
998 pthread__cleanup_pop(int ex, void *store)
999 {
1000 	pthread_t self;
1001 	struct pt_clean_t *entry;
1002 
1003 	self = pthread__self();
1004 	entry = store;
1005 
1006 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
1007 	if (ex)
1008 		(*entry->ptc_cleanup)(entry->ptc_arg);
1009 }
1010 
1011 
1012 int *
1013 pthread__errno(void)
1014 {
1015 	pthread_t self;
1016 
1017 	self = pthread__self();
1018 
1019 	return &(self->pt_errno);
1020 }
1021 
1022 ssize_t	_sys_write(int, const void *, size_t);
1023 
1024 void
1025 pthread__assertfunc(const char *file, int line, const char *function,
1026 		    const char *expr)
1027 {
1028 	char buf[1024];
1029 	int len;
1030 
1031 	/*
1032 	 * snprintf should not acquire any locks, or we could
1033 	 * end up deadlocked if the assert caller held locks.
1034 	 */
1035 	len = snprintf(buf, 1024,
1036 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
1037 	    expr, file, line,
1038 	    function ? ", function \"" : "",
1039 	    function ? function : "",
1040 	    function ? "\"" : "");
1041 
1042 	_sys_write(STDERR_FILENO, buf, (size_t)len);
1043 	(void)kill(getpid(), SIGABRT);
1044 
1045 	_exit(1);
1046 }
1047 
1048 
1049 void
1050 pthread__errorfunc(const char *file, int line, const char *function,
1051 		   const char *msg)
1052 {
1053 	char buf[1024];
1054 	size_t len;
1055 
1056 	if (pthread__diagassert == 0)
1057 		return;
1058 
1059 	/*
1060 	 * snprintf should not acquire any locks, or we could
1061 	 * end up deadlocked if the assert caller held locks.
1062 	 */
1063 	len = snprintf(buf, 1024,
1064 	    "%s: Error detected by libpthread: %s.\n"
1065 	    "Detected by file \"%s\", line %d%s%s%s.\n"
1066 	    "See pthread(3) for information.\n",
1067 	    getprogname(), msg, file, line,
1068 	    function ? ", function \"" : "",
1069 	    function ? function : "",
1070 	    function ? "\"" : "");
1071 
1072 	if (pthread__diagassert & DIAGASSERT_STDERR)
1073 		_sys_write(STDERR_FILENO, buf, len);
1074 
1075 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
1076 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
1077 
1078 	if (pthread__diagassert & DIAGASSERT_ABORT) {
1079 		(void)kill(getpid(), SIGABRT);
1080 		_exit(1);
1081 	}
1082 }
1083 
1084 /*
1085  * Thread park/unpark operations.  The kernel operations are
1086  * modelled after a brief description from "Multithreading in
1087  * the Solaris Operating Environment":
1088  *
1089  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1090  */
1091 
1092 #define	OOPS(msg)			\
1093     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
1094 
1095 int
1096 pthread__park(pthread_t self, pthread_mutex_t *lock,
1097 	      pthread_queue_t *queue, const struct timespec *abstime,
1098 	      int cancelpt, const void *hint)
1099 {
1100 	int rv, error;
1101 	void *obj;
1102 
1103 	/*
1104 	 * For non-interlocked release of mutexes we need a store
1105 	 * barrier before incrementing pt_blocking away from zero.
1106 	 * This is provided by pthread_mutex_unlock().
1107 	 */
1108 	self->pt_willpark = 1;
1109 	pthread_mutex_unlock(lock);
1110 	self->pt_willpark = 0;
1111 	self->pt_blocking++;
1112 
1113 	/*
1114 	 * Wait until we are awoken by a pending unpark operation,
1115 	 * a signal, an unpark posted after we have gone asleep,
1116 	 * or an expired timeout.
1117 	 *
1118 	 * It is fine to test the value of pt_sleepobj without
1119 	 * holding any locks, because:
1120 	 *
1121 	 * o Only the blocking thread (this thread) ever sets them
1122 	 *   to a non-NULL value.
1123 	 *
1124 	 * o Other threads may set them NULL, but if they do so they
1125 	 *   must also make this thread return from _lwp_park.
1126 	 *
1127 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1128 	 *   calls and all make use of spinlocks in the kernel.  So
1129 	 *   these system calls act as full memory barriers, and will
1130 	 *   ensure that the calling CPU's store buffers are drained.
1131 	 *   In combination with the spinlock release before unpark,
1132 	 *   this means that modification of pt_sleepobj/onq by another
1133 	 *   thread will become globally visible before that thread
1134 	 *   schedules an unpark operation on this thread.
1135 	 *
1136 	 * Note: the test in the while() statement dodges the park op if
1137 	 * we have already been awoken, unless there is another thread to
1138 	 * awaken.  This saves a syscall - if we were already awakened,
1139 	 * the next call to _lwp_park() would need to return early in order
1140 	 * to eat the previous wakeup.
1141 	 */
1142 	rv = 0;
1143 	do {
1144 		/*
1145 		 * If we deferred unparking a thread, arrange to
1146 		 * have _lwp_park() restart it before blocking.
1147 		 */
1148 		error = _lwp_park(abstime, self->pt_unpark, hint, hint);
1149 		self->pt_unpark = 0;
1150 		if (error != 0) {
1151 			switch (rv = errno) {
1152 			case EINTR:
1153 			case EALREADY:
1154 				rv = 0;
1155 				break;
1156 			case ETIMEDOUT:
1157 				break;
1158 			default:
1159 				OOPS("_lwp_park failed");
1160 				break;
1161 			}
1162 		}
1163 		/* Check for cancellation. */
1164 		if (cancelpt && self->pt_cancel)
1165 			rv = EINTR;
1166 	} while (self->pt_sleepobj != NULL && rv == 0);
1167 
1168 	/*
1169 	 * If we have been awoken early but are still on the queue,
1170 	 * then remove ourself.  Again, it's safe to do the test
1171 	 * without holding any locks.
1172 	 */
1173 	if (__predict_false(self->pt_sleepobj != NULL)) {
1174 		pthread_mutex_lock(lock);
1175 		if ((obj = self->pt_sleepobj) != NULL) {
1176 			PTQ_REMOVE(queue, self, pt_sleep);
1177 			self->pt_sleepobj = NULL;
1178 			if (obj != NULL && self->pt_early != NULL)
1179 				(*self->pt_early)(obj);
1180 		}
1181 		pthread_mutex_unlock(lock);
1182 	}
1183 	self->pt_early = NULL;
1184 	self->pt_blocking--;
1185 	membar_sync();
1186 
1187 	return rv;
1188 }
1189 
1190 void
1191 pthread__unpark(pthread_queue_t *queue, pthread_t self,
1192 		pthread_mutex_t *interlock)
1193 {
1194 	pthread_t target;
1195 	u_int max;
1196 	size_t nwaiters;
1197 
1198 	max = pthread__unpark_max;
1199 	nwaiters = self->pt_nwaiters;
1200 	target = PTQ_FIRST(queue);
1201 	if (nwaiters == max) {
1202 		/* Overflow. */
1203 		(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1204 		    __UNVOLATILE(&interlock->ptm_waiters));
1205 		nwaiters = 0;
1206 	}
1207 	target->pt_sleepobj = NULL;
1208 	self->pt_waiters[nwaiters++] = target->pt_lid;
1209 	PTQ_REMOVE(queue, target, pt_sleep);
1210 	self->pt_nwaiters = nwaiters;
1211 	pthread__mutex_deferwake(self, interlock);
1212 }
1213 
1214 void
1215 pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1216 		    pthread_mutex_t *interlock)
1217 {
1218 	pthread_t target;
1219 	u_int max;
1220 	size_t nwaiters;
1221 
1222 	max = pthread__unpark_max;
1223 	nwaiters = self->pt_nwaiters;
1224 	PTQ_FOREACH(target, queue, pt_sleep) {
1225 		if (nwaiters == max) {
1226 			/* Overflow. */
1227 			(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1228 			    __UNVOLATILE(&interlock->ptm_waiters));
1229 			nwaiters = 0;
1230 		}
1231 		target->pt_sleepobj = NULL;
1232 		self->pt_waiters[nwaiters++] = target->pt_lid;
1233 	}
1234 	self->pt_nwaiters = nwaiters;
1235 	PTQ_INIT(queue);
1236 	pthread__mutex_deferwake(self, interlock);
1237 }
1238 
1239 #undef	OOPS
1240 
1241 static void
1242 pthread__initmainstack(void)
1243 {
1244 	struct rlimit slimit;
1245 	const AuxInfo *aux;
1246 	size_t size;
1247 
1248 	_DIAGASSERT(_dlauxinfo() != NULL);
1249 
1250 	if (getrlimit(RLIMIT_STACK, &slimit) == -1)
1251 		err(1, "Couldn't get stack resource consumption limits");
1252 	size = slimit.rlim_cur;
1253 	pthread__main.pt_stack.ss_size = size;
1254 
1255 	for (aux = _dlauxinfo(); aux->a_type != AT_NULL; ++aux) {
1256 		if (aux->a_type == AT_STACKBASE) {
1257 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1258 #ifdef __MACHINE_STACK_GROWS_UP
1259 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1260 #else
1261 			pthread__main.pt_stack.ss_sp = (char *)aux->a_v - size;
1262 #endif
1263 			break;
1264 		}
1265 	}
1266 }
1267 
1268 /*
1269  * Set up the slightly special stack for the "initial" thread, which
1270  * runs on the normal system stack, and thus gets slightly different
1271  * treatment.
1272  */
1273 static void
1274 pthread__initmain(pthread_t *newt)
1275 {
1276 	char *value;
1277 
1278 	pthread__initmainstack();
1279 
1280 	value = pthread__getenv("PTHREAD_STACKSIZE");
1281 	if (value != NULL) {
1282 		pthread__stacksize = atoi(value) * 1024;
1283 		if (pthread__stacksize > pthread__main.pt_stack.ss_size)
1284 			pthread__stacksize = pthread__main.pt_stack.ss_size;
1285 	}
1286 	if (pthread__stacksize == 0)
1287 		pthread__stacksize = pthread__main.pt_stack.ss_size;
1288 	pthread__stacksize += pthread__pagesize - 1;
1289 	pthread__stacksize &= ~(pthread__pagesize - 1);
1290 	if (pthread__stacksize < 4 * pthread__pagesize)
1291 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1292 		    4 * pthread__pagesize / 1024);
1293 
1294 	*newt = &pthread__main;
1295 #ifdef __HAVE___LWP_GETTCB_FAST
1296 	pthread__main.pt_tls = __lwp_gettcb_fast();
1297 #else
1298 	pthread__main.pt_tls = _lwp_getprivate();
1299 #endif
1300 	pthread__main.pt_tls->tcb_pthread = &pthread__main;
1301 }
1302 
1303 #ifndef lint
1304 static int
1305 pthread__cmp(struct __pthread_st *a, struct __pthread_st *b)
1306 {
1307 
1308 	if ((uintptr_t)a < (uintptr_t)b)
1309 		return (-1);
1310 	else if (a == b)
1311 		return 0;
1312 	else
1313 		return 1;
1314 }
1315 RB_GENERATE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
1316 #endif
1317 
1318 /* Because getenv() wants to use locks. */
1319 char *
1320 pthread__getenv(const char *name)
1321 {
1322 	extern char **environ;
1323 	size_t l_name, offset;
1324 
1325 	l_name = strlen(name);
1326 	for (offset = 0; environ[offset] != NULL; offset++) {
1327 		if (strncmp(name, environ[offset], l_name) == 0 &&
1328 		    environ[offset][l_name] == '=') {
1329 			return environ[offset] + l_name + 1;
1330 		}
1331 	}
1332 
1333 	return NULL;
1334 }
1335 
1336 pthread_mutex_t *
1337 pthread__hashlock(volatile const void *p)
1338 {
1339 	uintptr_t v;
1340 
1341 	v = (uintptr_t)p;
1342 	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1343 }
1344 
1345 int
1346 pthread__checkpri(int pri)
1347 {
1348 	static int havepri;
1349 	static long min, max;
1350 
1351 	if (!havepri) {
1352 		min = sysconf(_SC_SCHED_PRI_MIN);
1353 		max = sysconf(_SC_SCHED_PRI_MAX);
1354 		havepri = 1;
1355 	}
1356 	return (pri < min || pri > max) ? EINVAL : 0;
1357 }
1358