xref: /netbsd-src/lib/libpthread/pthread.c (revision ba65fde2d7fefa7d39838fa5fa855e62bd606b5e)
1 /*	$NetBSD: pthread.c,v 1.142 2013/01/01 18:42:39 dsl Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: pthread.c,v 1.142 2013/01/01 18:42:39 dsl Exp $");
34 
35 #define	__EXPOSE_STACK	1
36 
37 #include <sys/param.h>
38 #include <sys/exec_elf.h>
39 #include <sys/mman.h>
40 #include <sys/lwp.h>
41 #include <sys/lwpctl.h>
42 #include <sys/resource.h>
43 #include <sys/tls.h>
44 
45 #include <assert.h>
46 #include <dlfcn.h>
47 #include <err.h>
48 #include <errno.h>
49 #include <lwp.h>
50 #include <signal.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <stddef.h>
54 #include <string.h>
55 #include <syslog.h>
56 #include <ucontext.h>
57 #include <unistd.h>
58 #include <sched.h>
59 
60 #include "pthread.h"
61 #include "pthread_int.h"
62 
63 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
64 static rb_tree_t	pthread__alltree;
65 
66 static signed int	pthread__cmp(void *, const void *, const void *);
67 
68 static const rb_tree_ops_t pthread__alltree_ops = {
69 	.rbto_compare_nodes = pthread__cmp,
70 	.rbto_compare_key = pthread__cmp,
71 	.rbto_node_offset = offsetof(struct __pthread_st, pt_alltree),
72 	.rbto_context = NULL
73 };
74 
75 static void	pthread__create_tramp(void *);
76 static void	pthread__initthread(pthread_t);
77 static void	pthread__scrubthread(pthread_t, char *, int);
78 static void	pthread__initmain(pthread_t *);
79 static void	pthread__fork_callback(void);
80 static void	pthread__reap(pthread_t);
81 static void	pthread__child_callback(void);
82 static void	pthread__start(void);
83 
84 void	pthread__init(void);
85 
86 int pthread__started;
87 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
88 pthread_queue_t pthread__deadqueue;
89 pthread_queue_t pthread__allqueue;
90 
91 static pthread_attr_t pthread_default_attr;
92 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
93 
94 enum {
95 	DIAGASSERT_ABORT =	1<<0,
96 	DIAGASSERT_STDERR =	1<<1,
97 	DIAGASSERT_SYSLOG =	1<<2
98 };
99 
100 static int pthread__diagassert;
101 
102 int pthread__concurrency;
103 int pthread__nspins;
104 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
105 int pthread__dbg;	/* set by libpthread_dbg if active */
106 
107 /*
108  * We have to initialize the pthread_stack* variables here because
109  * mutexes are used before pthread_init() and thus pthread__initmain()
110  * are called.  Since mutexes only save the stack pointer and not a
111  * pointer to the thread data, it is safe to change the mapping from
112  * stack pointer to thread data afterwards.
113  */
114 size_t	pthread__stacksize;
115 size_t	pthread__pagesize;
116 static struct __pthread_st pthread__main;
117 
118 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
119 
120 __strong_alias(__libc_thr_self,pthread_self)
121 __strong_alias(__libc_thr_create,pthread_create)
122 __strong_alias(__libc_thr_exit,pthread_exit)
123 __strong_alias(__libc_thr_errno,pthread__errno)
124 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
125 __strong_alias(__libc_thr_equal,pthread_equal)
126 __strong_alias(__libc_thr_init,pthread__init)
127 
128 /*
129  * Static library kludge.  Place a reference to a symbol any library
130  * file which does not already have a reference here.
131  */
132 extern int pthread__cancel_stub_binder;
133 
134 void *pthread__static_lib_binder[] = {
135 	&pthread__cancel_stub_binder,
136 	pthread_cond_init,
137 	pthread_mutex_init,
138 	pthread_rwlock_init,
139 	pthread_barrier_init,
140 	pthread_key_create,
141 	pthread_setspecific,
142 };
143 
144 #define	NHASHLOCK	64
145 
146 static union hashlock {
147 	pthread_mutex_t	mutex;
148 	char		pad[64];
149 } hashlocks[NHASHLOCK] __aligned(64);
150 
151 /*
152  * This needs to be started by the library loading code, before main()
153  * gets to run, for various things that use the state of the initial thread
154  * to work properly (thread-specific data is an application-visible example;
155  * spinlock counts for mutexes is an internal example).
156  */
157 void
158 pthread__init(void)
159 {
160 	pthread_t first;
161 	char *p;
162 	int i;
163 	extern int __isthreaded;
164 
165 	pthread__pagesize = (size_t)sysconf(_SC_PAGESIZE);
166 	pthread__concurrency = (int)sysconf(_SC_NPROCESSORS_CONF);
167 
168 	/* Initialize locks first; they're needed elsewhere. */
169 	pthread__lockprim_init();
170 	for (i = 0; i < NHASHLOCK; i++) {
171 		pthread_mutex_init(&hashlocks[i].mutex, NULL);
172 	}
173 
174 	/* Fetch parameters. */
175 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
176 	if (i == -1)
177 		err(1, "_lwp_unpark_all");
178 	if (i < pthread__unpark_max)
179 		pthread__unpark_max = i;
180 
181 	/* Basic data structure setup */
182 	pthread_attr_init(&pthread_default_attr);
183 	PTQ_INIT(&pthread__allqueue);
184 	PTQ_INIT(&pthread__deadqueue);
185 
186 	rb_tree_init(&pthread__alltree, &pthread__alltree_ops);
187 
188 	/* Create the thread structure corresponding to main() */
189 	pthread__initmain(&first);
190 	pthread__initthread(first);
191 	pthread__scrubthread(first, NULL, 0);
192 
193 	first->pt_lid = _lwp_self();
194 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
195 	(void)rb_tree_insert_node(&pthread__alltree, first);
196 
197 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
198 		err(1, "_lwp_ctl");
199 	}
200 
201 	/* Start subsystems */
202 	PTHREAD_MD_INIT
203 
204 	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
205 		switch (*p) {
206 		case 'a':
207 			pthread__diagassert |= DIAGASSERT_ABORT;
208 			break;
209 		case 'A':
210 			pthread__diagassert &= ~DIAGASSERT_ABORT;
211 			break;
212 		case 'e':
213 			pthread__diagassert |= DIAGASSERT_STDERR;
214 			break;
215 		case 'E':
216 			pthread__diagassert &= ~DIAGASSERT_STDERR;
217 			break;
218 		case 'l':
219 			pthread__diagassert |= DIAGASSERT_SYSLOG;
220 			break;
221 		case 'L':
222 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
223 			break;
224 		}
225 	}
226 
227 	/* Tell libc that we're here and it should role-play accordingly. */
228 	pthread_atfork(NULL, NULL, pthread__fork_callback);
229 	__isthreaded = 1;
230 }
231 
232 static void
233 pthread__fork_callback(void)
234 {
235 	struct __pthread_st *self = pthread__self();
236 
237 	/* lwpctl state is not copied across fork. */
238 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
239 		err(1, "_lwp_ctl");
240 	}
241 	self->pt_lid = _lwp_self();
242 }
243 
244 static void
245 pthread__child_callback(void)
246 {
247 
248 	/*
249 	 * Clean up data structures that a forked child process might
250 	 * trip over. Note that if threads have been created (causing
251 	 * this handler to be registered) the standards say that the
252 	 * child will trigger undefined behavior if it makes any
253 	 * pthread_* calls (or any other calls that aren't
254 	 * async-signal-safe), so we don't really have to clean up
255 	 * much. Anything that permits some pthread_* calls to work is
256 	 * merely being polite.
257 	 */
258 	pthread__started = 0;
259 }
260 
261 static void
262 pthread__start(void)
263 {
264 
265 	/*
266 	 * Per-process timers are cleared by fork(); despite the
267 	 * various restrictions on fork() and threads, it's legal to
268 	 * fork() before creating any threads.
269 	 */
270 	pthread_atfork(NULL, NULL, pthread__child_callback);
271 }
272 
273 
274 /* General-purpose thread data structure sanitization. */
275 /* ARGSUSED */
276 static void
277 pthread__initthread(pthread_t t)
278 {
279 
280 	t->pt_self = t;
281 	t->pt_magic = PT_MAGIC;
282 	t->pt_willpark = 0;
283 	t->pt_unpark = 0;
284 	t->pt_nwaiters = 0;
285 	t->pt_sleepobj = NULL;
286 	t->pt_signalled = 0;
287 	t->pt_havespecific = 0;
288 	t->pt_early = NULL;
289 	t->pt_lwpctl = &pthread__dummy_lwpctl;
290 	t->pt_blocking = 0;
291 	t->pt_droplock = NULL;
292 
293 	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
294 	pthread_mutex_init(&t->pt_lock, NULL);
295 	PTQ_INIT(&t->pt_cleanup_stack);
296 	pthread_cond_init(&t->pt_joiners, NULL);
297 	memset(&t->pt_specific, 0, sizeof(t->pt_specific));
298 }
299 
300 static void
301 pthread__scrubthread(pthread_t t, char *name, int flags)
302 {
303 
304 	t->pt_state = PT_STATE_RUNNING;
305 	t->pt_exitval = NULL;
306 	t->pt_flags = flags;
307 	t->pt_cancel = 0;
308 	t->pt_errno = 0;
309 	t->pt_name = name;
310 	t->pt_lid = 0;
311 }
312 
313 static int
314 pthread__getstack(pthread_t newthread, const pthread_attr_t *attr)
315 {
316 	void *stackbase, *stackbase2, *redzone;
317 	size_t stacksize, guardsize;
318 	bool allocated;
319 
320 	if (attr != NULL) {
321 		pthread_attr_getstack(attr, &stackbase, &stacksize);
322 	} else {
323 		stackbase = NULL;
324 		stacksize = 0;
325 	}
326 	if (stacksize == 0)
327 		stacksize = pthread__stacksize;
328 
329 	if (newthread->pt_stack_allocated) {
330 		if (stackbase == NULL &&
331 		    newthread->pt_stack.ss_size == stacksize)
332 			return 0;
333 		stackbase2 = newthread->pt_stack.ss_sp;
334 #ifndef __MACHINE_STACK_GROWS_UP
335 		stackbase2 = (char *)stackbase2 - newthread->pt_guardsize;
336 #endif
337 		munmap(stackbase2,
338 		    newthread->pt_stack.ss_size + newthread->pt_guardsize);
339 		newthread->pt_stack.ss_sp = NULL;
340 		newthread->pt_stack.ss_size = 0;
341 		newthread->pt_guardsize = 0;
342 		newthread->pt_stack_allocated = false;
343 	}
344 
345 	newthread->pt_stack_allocated = false;
346 
347 	if (stackbase == NULL) {
348 		stacksize = ((stacksize - 1) | (pthread__pagesize - 1)) + 1;
349 		guardsize = pthread__pagesize;
350 		stackbase = mmap(NULL, stacksize + guardsize,
351 		    PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, (off_t)0);
352 		if (stackbase == MAP_FAILED)
353 			return ENOMEM;
354 		allocated = true;
355 	} else {
356 		guardsize = 0;
357 		allocated = false;
358 	}
359 #ifdef __MACHINE_STACK_GROWS_UP
360 	redzone = (char *)stackbase + stacksize;
361 	stackbase2 = (char *)stackbase;
362 #else
363 	redzone = (char *)stackbase;
364 	stackbase2 = (char *)stackbase + guardsize;
365 #endif
366 	if (allocated && guardsize &&
367 	    mprotect(redzone, guardsize, PROT_NONE) == -1) {
368 		munmap(stackbase, stacksize + guardsize);
369 		return EPERM;
370 	}
371 	newthread->pt_stack.ss_size = stacksize;
372 	newthread->pt_stack.ss_sp = stackbase2;
373 	newthread->pt_guardsize = guardsize;
374 	newthread->pt_stack_allocated = allocated;
375 	return 0;
376 }
377 
378 int
379 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
380 	    void *(*startfunc)(void *), void *arg)
381 {
382 	pthread_t newthread;
383 	pthread_attr_t nattr;
384 	struct pthread_attr_private *p;
385 	char * volatile name;
386 	unsigned long flag;
387 	void *private_area;
388 	int ret;
389 
390 	/*
391 	 * It's okay to check this without a lock because there can
392 	 * only be one thread before it becomes true.
393 	 */
394 	if (pthread__started == 0) {
395 		pthread__start();
396 		pthread__started = 1;
397 	}
398 
399 	if (attr == NULL)
400 		nattr = pthread_default_attr;
401 	else if (attr->pta_magic == PT_ATTR_MAGIC)
402 		nattr = *attr;
403 	else
404 		return EINVAL;
405 
406 	/* Fetch misc. attributes from the attr structure. */
407 	name = NULL;
408 	if ((p = nattr.pta_private) != NULL)
409 		if (p->ptap_name[0] != '\0')
410 			if ((name = strdup(p->ptap_name)) == NULL)
411 				return ENOMEM;
412 
413 	newthread = NULL;
414 
415 	/*
416 	 * Try to reclaim a dead thread.
417 	 */
418 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
419 		pthread_mutex_lock(&pthread__deadqueue_lock);
420 		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
421 			/* Still running? */
422 			if (newthread->pt_lwpctl->lc_curcpu ==
423 			    LWPCTL_CPU_EXITED ||
424 			    (_lwp_kill(newthread->pt_lid, 0) == -1 &&
425 			    errno == ESRCH))
426 				break;
427 		}
428 		if (newthread)
429 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
430 		pthread_mutex_unlock(&pthread__deadqueue_lock);
431 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
432 		if (newthread && newthread->pt_tls) {
433 			_rtld_tls_free(newthread->pt_tls);
434 			newthread->pt_tls = NULL;
435 		}
436 #endif
437 	}
438 
439 	/*
440 	 * If necessary set up a stack, allocate space for a pthread_st,
441 	 * and initialize it.
442 	 */
443 	if (newthread == NULL) {
444 		newthread = malloc(sizeof(*newthread));
445 		if (newthread == NULL) {
446 			free(name);
447 			return ENOMEM;
448 		}
449 		newthread->pt_stack_allocated = false;
450 
451 		if (pthread__getstack(newthread, attr)) {
452 			free(newthread);
453 			free(name);
454 			return ENOMEM;
455 		}
456 
457 		/* This is used only when creating the thread. */
458 		_INITCONTEXT_U(&newthread->pt_uc);
459 		newthread->pt_uc.uc_stack = newthread->pt_stack;
460 		newthread->pt_uc.uc_link = NULL;
461 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
462 		newthread->pt_tls = NULL;
463 #endif
464 
465 		/* Add to list of all threads. */
466 		pthread_rwlock_wrlock(&pthread__alltree_lock);
467 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
468 		(void)rb_tree_insert_node(&pthread__alltree, newthread);
469 		pthread_rwlock_unlock(&pthread__alltree_lock);
470 
471 		/* Will be reset by the thread upon exit. */
472 		pthread__initthread(newthread);
473 	} else {
474 		if (pthread__getstack(newthread, attr)) {
475 			pthread_mutex_lock(&pthread__deadqueue_lock);
476 			PTQ_INSERT_TAIL(&pthread__deadqueue, newthread, pt_deadq);
477 			pthread_mutex_unlock(&pthread__deadqueue_lock);
478 			return ENOMEM;
479 		}
480 		_INITCONTEXT_U(&newthread->pt_uc);
481 		newthread->pt_uc.uc_stack = newthread->pt_stack;
482 		newthread->pt_uc.uc_link = NULL;
483 	}
484 
485 	/*
486 	 * Create the new LWP.
487 	 */
488 	pthread__scrubthread(newthread, name, nattr.pta_flags);
489 	newthread->pt_func = startfunc;
490 	newthread->pt_arg = arg;
491 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
492 	private_area = newthread->pt_tls = _rtld_tls_allocate();
493 	newthread->pt_tls->tcb_pthread = newthread;
494 #else
495 	private_area = newthread;
496 #endif
497 
498 	_lwp_makecontext(&newthread->pt_uc, pthread__create_tramp,
499 	    newthread, private_area, newthread->pt_stack.ss_sp,
500 	    newthread->pt_stack.ss_size);
501 
502 	flag = LWP_DETACHED;
503 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
504 	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
505 		flag |= LWP_SUSPENDED;
506 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
507 	if (ret != 0) {
508 		ret = errno;
509 		pthread_mutex_lock(&newthread->pt_lock);
510 		/* Will unlock and free name. */
511 		pthread__reap(newthread);
512 		return ret;
513 	}
514 
515 	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
516 		if (p != NULL) {
517 			(void)pthread_setschedparam(newthread, p->ptap_policy,
518 			    &p->ptap_sp);
519 		}
520 		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
521 			(void)_lwp_continue(newthread->pt_lid);
522 		}
523 	}
524 
525 	*thread = newthread;
526 
527 	return 0;
528 }
529 
530 
531 __dead static void
532 pthread__create_tramp(void *cookie)
533 {
534 	pthread_t self;
535 	void *retval;
536 
537 	self = cookie;
538 
539 	/*
540 	 * Throw away some stack in a feeble attempt to reduce cache
541 	 * thrash.  May help for SMT processors.  XXX We should not
542 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
543 	 * thread register or decent thread local storage.
544 	 *
545 	 * Note that we may race with the kernel in _lwp_create(),
546 	 * and so pt_lid can be unset at this point, but we don't
547 	 * care.
548 	 */
549 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
550 
551 	if (self->pt_name != NULL) {
552 		pthread_mutex_lock(&self->pt_lock);
553 		if (self->pt_name != NULL)
554 			(void)_lwp_setname(0, self->pt_name);
555 		pthread_mutex_unlock(&self->pt_lock);
556 	}
557 
558 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
559 		err(1, "_lwp_ctl");
560 	}
561 
562 	retval = (*self->pt_func)(self->pt_arg);
563 
564 	pthread_exit(retval);
565 
566 	/*NOTREACHED*/
567 	pthread__abort();
568 }
569 
570 int
571 pthread_suspend_np(pthread_t thread)
572 {
573 	pthread_t self;
574 
575 	self = pthread__self();
576 	if (self == thread) {
577 		return EDEADLK;
578 	}
579 	if (pthread__find(thread) != 0)
580 		return ESRCH;
581 	if (_lwp_suspend(thread->pt_lid) == 0)
582 		return 0;
583 	return errno;
584 }
585 
586 int
587 pthread_resume_np(pthread_t thread)
588 {
589 
590 	if (pthread__find(thread) != 0)
591 		return ESRCH;
592 	if (_lwp_continue(thread->pt_lid) == 0)
593 		return 0;
594 	return errno;
595 }
596 
597 void
598 pthread_exit(void *retval)
599 {
600 	pthread_t self;
601 	struct pt_clean_t *cleanup;
602 	char *name;
603 
604 	self = pthread__self();
605 
606 	/* Disable cancellability. */
607 	pthread_mutex_lock(&self->pt_lock);
608 	self->pt_flags |= PT_FLAG_CS_DISABLED;
609 	self->pt_cancel = 0;
610 
611 	/* Call any cancellation cleanup handlers */
612 	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
613 		pthread_mutex_unlock(&self->pt_lock);
614 		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
615 			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
616 			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
617 			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
618 		}
619 		pthread_mutex_lock(&self->pt_lock);
620 	}
621 
622 	/* Perform cleanup of thread-specific data */
623 	pthread__destroy_tsd(self);
624 
625 	/* Signal our exit. */
626 	self->pt_exitval = retval;
627 	if (self->pt_flags & PT_FLAG_DETACHED) {
628 		self->pt_state = PT_STATE_DEAD;
629 		name = self->pt_name;
630 		self->pt_name = NULL;
631 		pthread_mutex_unlock(&self->pt_lock);
632 		if (name != NULL)
633 			free(name);
634 		pthread_mutex_lock(&pthread__deadqueue_lock);
635 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
636 		pthread_mutex_unlock(&pthread__deadqueue_lock);
637 		_lwp_exit();
638 	} else {
639 		self->pt_state = PT_STATE_ZOMBIE;
640 		pthread_cond_broadcast(&self->pt_joiners);
641 		pthread_mutex_unlock(&self->pt_lock);
642 		/* Note: name will be freed by the joiner. */
643 		_lwp_exit();
644 	}
645 
646 	/*NOTREACHED*/
647 	pthread__abort();
648 	exit(1);
649 }
650 
651 
652 int
653 pthread_join(pthread_t thread, void **valptr)
654 {
655 	pthread_t self;
656 	int error;
657 
658 	self = pthread__self();
659 
660 	if (pthread__find(thread) != 0)
661 		return ESRCH;
662 
663 	if (thread->pt_magic != PT_MAGIC)
664 		return EINVAL;
665 
666 	if (thread == self)
667 		return EDEADLK;
668 
669 	self->pt_droplock = &thread->pt_lock;
670 	pthread_mutex_lock(&thread->pt_lock);
671 	for (;;) {
672 		if (thread->pt_state == PT_STATE_ZOMBIE)
673 			break;
674 		if (thread->pt_state == PT_STATE_DEAD) {
675 			pthread_mutex_unlock(&thread->pt_lock);
676 			self->pt_droplock = NULL;
677 			return ESRCH;
678 		}
679 		if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
680 			pthread_mutex_unlock(&thread->pt_lock);
681 			self->pt_droplock = NULL;
682 			return EINVAL;
683 		}
684 		error = pthread_cond_wait(&thread->pt_joiners,
685 		    &thread->pt_lock);
686 		if (error != 0) {
687 			pthread__errorfunc(__FILE__, __LINE__,
688 			    __func__, "unexpected return from cond_wait()");
689 		}
690 
691 	}
692 	pthread__testcancel(self);
693 	if (valptr != NULL)
694 		*valptr = thread->pt_exitval;
695 	/* pthread__reap() will drop the lock. */
696 	pthread__reap(thread);
697 	self->pt_droplock = NULL;
698 
699 	return 0;
700 }
701 
702 static void
703 pthread__reap(pthread_t thread)
704 {
705 	char *name;
706 
707 	name = thread->pt_name;
708 	thread->pt_name = NULL;
709 	thread->pt_state = PT_STATE_DEAD;
710 	pthread_mutex_unlock(&thread->pt_lock);
711 
712 	pthread_mutex_lock(&pthread__deadqueue_lock);
713 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
714 	pthread_mutex_unlock(&pthread__deadqueue_lock);
715 
716 	if (name != NULL)
717 		free(name);
718 }
719 
720 int
721 pthread_equal(pthread_t t1, pthread_t t2)
722 {
723 
724 	/* Nothing special here. */
725 	return (t1 == t2);
726 }
727 
728 
729 int
730 pthread_detach(pthread_t thread)
731 {
732 
733 	if (pthread__find(thread) != 0)
734 		return ESRCH;
735 
736 	if (thread->pt_magic != PT_MAGIC)
737 		return EINVAL;
738 
739 	pthread_mutex_lock(&thread->pt_lock);
740 	thread->pt_flags |= PT_FLAG_DETACHED;
741 	if (thread->pt_state == PT_STATE_ZOMBIE) {
742 		/* pthread__reap() will drop the lock. */
743 		pthread__reap(thread);
744 	} else {
745 		/*
746 		 * Not valid for threads to be waiting in
747 		 * pthread_join() (there are intractable
748 		 * sync issues from the application
749 		 * perspective), but give those threads
750 		 * a chance anyway.
751 		 */
752 		pthread_cond_broadcast(&thread->pt_joiners);
753 		pthread_mutex_unlock(&thread->pt_lock);
754 	}
755 
756 	return 0;
757 }
758 
759 
760 int
761 pthread_getname_np(pthread_t thread, char *name, size_t len)
762 {
763 
764 	if (pthread__find(thread) != 0)
765 		return ESRCH;
766 
767 	if (thread->pt_magic != PT_MAGIC)
768 		return EINVAL;
769 
770 	pthread_mutex_lock(&thread->pt_lock);
771 	if (thread->pt_name == NULL)
772 		name[0] = '\0';
773 	else
774 		strlcpy(name, thread->pt_name, len);
775 	pthread_mutex_unlock(&thread->pt_lock);
776 
777 	return 0;
778 }
779 
780 
781 int
782 pthread_setname_np(pthread_t thread, const char *name, void *arg)
783 {
784 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
785 	int namelen;
786 
787 	if (pthread__find(thread) != 0)
788 		return ESRCH;
789 
790 	if (thread->pt_magic != PT_MAGIC)
791 		return EINVAL;
792 
793 	namelen = snprintf(newname, sizeof(newname), name, arg);
794 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
795 		return EINVAL;
796 
797 	cp = strdup(newname);
798 	if (cp == NULL)
799 		return ENOMEM;
800 
801 	pthread_mutex_lock(&thread->pt_lock);
802 	oldname = thread->pt_name;
803 	thread->pt_name = cp;
804 	(void)_lwp_setname(thread->pt_lid, cp);
805 	pthread_mutex_unlock(&thread->pt_lock);
806 
807 	if (oldname != NULL)
808 		free(oldname);
809 
810 	return 0;
811 }
812 
813 
814 
815 /*
816  * XXX There should be a way for applications to use the efficent
817  *  inline version, but there are opacity/namespace issues.
818  */
819 pthread_t
820 pthread_self(void)
821 {
822 
823 	return pthread__self();
824 }
825 
826 
827 int
828 pthread_cancel(pthread_t thread)
829 {
830 
831 	if (pthread__find(thread) != 0)
832 		return ESRCH;
833 	pthread_mutex_lock(&thread->pt_lock);
834 	thread->pt_flags |= PT_FLAG_CS_PENDING;
835 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
836 		thread->pt_cancel = 1;
837 		pthread_mutex_unlock(&thread->pt_lock);
838 		_lwp_wakeup(thread->pt_lid);
839 	} else
840 		pthread_mutex_unlock(&thread->pt_lock);
841 
842 	return 0;
843 }
844 
845 
846 int
847 pthread_setcancelstate(int state, int *oldstate)
848 {
849 	pthread_t self;
850 	int retval;
851 
852 	self = pthread__self();
853 	retval = 0;
854 
855 	pthread_mutex_lock(&self->pt_lock);
856 
857 	if (oldstate != NULL) {
858 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
859 			*oldstate = PTHREAD_CANCEL_DISABLE;
860 		else
861 			*oldstate = PTHREAD_CANCEL_ENABLE;
862 	}
863 
864 	if (state == PTHREAD_CANCEL_DISABLE) {
865 		self->pt_flags |= PT_FLAG_CS_DISABLED;
866 		if (self->pt_cancel) {
867 			self->pt_flags |= PT_FLAG_CS_PENDING;
868 			self->pt_cancel = 0;
869 		}
870 	} else if (state == PTHREAD_CANCEL_ENABLE) {
871 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
872 		/*
873 		 * If a cancellation was requested while cancellation
874 		 * was disabled, note that fact for future
875 		 * cancellation tests.
876 		 */
877 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
878 			self->pt_cancel = 1;
879 			/* This is not a deferred cancellation point. */
880 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
881 				pthread_mutex_unlock(&self->pt_lock);
882 				pthread__cancelled();
883 			}
884 		}
885 	} else
886 		retval = EINVAL;
887 
888 	pthread_mutex_unlock(&self->pt_lock);
889 
890 	return retval;
891 }
892 
893 
894 int
895 pthread_setcanceltype(int type, int *oldtype)
896 {
897 	pthread_t self;
898 	int retval;
899 
900 	self = pthread__self();
901 	retval = 0;
902 
903 	pthread_mutex_lock(&self->pt_lock);
904 
905 	if (oldtype != NULL) {
906 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
907 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
908 		else
909 			*oldtype = PTHREAD_CANCEL_DEFERRED;
910 	}
911 
912 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
913 		self->pt_flags |= PT_FLAG_CS_ASYNC;
914 		if (self->pt_cancel) {
915 			pthread_mutex_unlock(&self->pt_lock);
916 			pthread__cancelled();
917 		}
918 	} else if (type == PTHREAD_CANCEL_DEFERRED)
919 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
920 	else
921 		retval = EINVAL;
922 
923 	pthread_mutex_unlock(&self->pt_lock);
924 
925 	return retval;
926 }
927 
928 
929 void
930 pthread_testcancel(void)
931 {
932 	pthread_t self;
933 
934 	self = pthread__self();
935 	if (self->pt_cancel)
936 		pthread__cancelled();
937 }
938 
939 
940 /*
941  * POSIX requires that certain functions return an error rather than
942  * invoking undefined behavior even when handed completely bogus
943  * pthread_t values, e.g. stack garbage.
944  */
945 int
946 pthread__find(pthread_t id)
947 {
948 	pthread_t target;
949 	int error;
950 
951 	pthread_rwlock_rdlock(&pthread__alltree_lock);
952 	target = rb_tree_find_node(&pthread__alltree, id);
953 	error = (target && target->pt_state != PT_STATE_DEAD) ? 0 : ESRCH;
954 	pthread_rwlock_unlock(&pthread__alltree_lock);
955 
956 	return error;
957 }
958 
959 
960 void
961 pthread__testcancel(pthread_t self)
962 {
963 
964 	if (self->pt_cancel)
965 		pthread__cancelled();
966 }
967 
968 
969 void
970 pthread__cancelled(void)
971 {
972 	pthread_mutex_t *droplock;
973 	pthread_t self;
974 
975 	self = pthread__self();
976 	droplock = self->pt_droplock;
977 	self->pt_droplock = NULL;
978 
979 	if (droplock != NULL && pthread_mutex_held_np(droplock))
980 		pthread_mutex_unlock(droplock);
981 
982 	pthread_exit(PTHREAD_CANCELED);
983 }
984 
985 
986 void
987 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
988 {
989 	pthread_t self;
990 	struct pt_clean_t *entry;
991 
992 	self = pthread__self();
993 	entry = store;
994 	entry->ptc_cleanup = cleanup;
995 	entry->ptc_arg = arg;
996 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
997 }
998 
999 
1000 void
1001 pthread__cleanup_pop(int ex, void *store)
1002 {
1003 	pthread_t self;
1004 	struct pt_clean_t *entry;
1005 
1006 	self = pthread__self();
1007 	entry = store;
1008 
1009 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
1010 	if (ex)
1011 		(*entry->ptc_cleanup)(entry->ptc_arg);
1012 }
1013 
1014 
1015 int *
1016 pthread__errno(void)
1017 {
1018 	pthread_t self;
1019 
1020 	self = pthread__self();
1021 
1022 	return &(self->pt_errno);
1023 }
1024 
1025 ssize_t	_sys_write(int, const void *, size_t);
1026 
1027 void
1028 pthread__assertfunc(const char *file, int line, const char *function,
1029 		    const char *expr)
1030 {
1031 	char buf[1024];
1032 	int len;
1033 
1034 	/*
1035 	 * snprintf should not acquire any locks, or we could
1036 	 * end up deadlocked if the assert caller held locks.
1037 	 */
1038 	len = snprintf(buf, 1024,
1039 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
1040 	    expr, file, line,
1041 	    function ? ", function \"" : "",
1042 	    function ? function : "",
1043 	    function ? "\"" : "");
1044 
1045 	_sys_write(STDERR_FILENO, buf, (size_t)len);
1046 	(void)kill(getpid(), SIGABRT);
1047 
1048 	_exit(1);
1049 }
1050 
1051 
1052 void
1053 pthread__errorfunc(const char *file, int line, const char *function,
1054 		   const char *msg)
1055 {
1056 	char buf[1024];
1057 	size_t len;
1058 
1059 	if (pthread__diagassert == 0)
1060 		return;
1061 
1062 	/*
1063 	 * snprintf should not acquire any locks, or we could
1064 	 * end up deadlocked if the assert caller held locks.
1065 	 */
1066 	len = snprintf(buf, 1024,
1067 	    "%s: Error detected by libpthread: %s.\n"
1068 	    "Detected by file \"%s\", line %d%s%s%s.\n"
1069 	    "See pthread(3) for information.\n",
1070 	    getprogname(), msg, file, line,
1071 	    function ? ", function \"" : "",
1072 	    function ? function : "",
1073 	    function ? "\"" : "");
1074 
1075 	if (pthread__diagassert & DIAGASSERT_STDERR)
1076 		_sys_write(STDERR_FILENO, buf, len);
1077 
1078 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
1079 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
1080 
1081 	if (pthread__diagassert & DIAGASSERT_ABORT) {
1082 		(void)kill(getpid(), SIGABRT);
1083 		_exit(1);
1084 	}
1085 }
1086 
1087 /*
1088  * Thread park/unpark operations.  The kernel operations are
1089  * modelled after a brief description from "Multithreading in
1090  * the Solaris Operating Environment":
1091  *
1092  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1093  */
1094 
1095 #define	OOPS(msg)			\
1096     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
1097 
1098 int
1099 pthread__park(pthread_t self, pthread_mutex_t *lock,
1100 	      pthread_queue_t *queue, const struct timespec *abstime,
1101 	      int cancelpt, const void *hint)
1102 {
1103 	int rv, error;
1104 	void *obj;
1105 
1106 	/*
1107 	 * For non-interlocked release of mutexes we need a store
1108 	 * barrier before incrementing pt_blocking away from zero.
1109 	 * This is provided by pthread_mutex_unlock().
1110 	 */
1111 	self->pt_willpark = 1;
1112 	pthread_mutex_unlock(lock);
1113 	self->pt_willpark = 0;
1114 	self->pt_blocking++;
1115 
1116 	/*
1117 	 * Wait until we are awoken by a pending unpark operation,
1118 	 * a signal, an unpark posted after we have gone asleep,
1119 	 * or an expired timeout.
1120 	 *
1121 	 * It is fine to test the value of pt_sleepobj without
1122 	 * holding any locks, because:
1123 	 *
1124 	 * o Only the blocking thread (this thread) ever sets them
1125 	 *   to a non-NULL value.
1126 	 *
1127 	 * o Other threads may set them NULL, but if they do so they
1128 	 *   must also make this thread return from _lwp_park.
1129 	 *
1130 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1131 	 *   calls and all make use of spinlocks in the kernel.  So
1132 	 *   these system calls act as full memory barriers, and will
1133 	 *   ensure that the calling CPU's store buffers are drained.
1134 	 *   In combination with the spinlock release before unpark,
1135 	 *   this means that modification of pt_sleepobj/onq by another
1136 	 *   thread will become globally visible before that thread
1137 	 *   schedules an unpark operation on this thread.
1138 	 *
1139 	 * Note: the test in the while() statement dodges the park op if
1140 	 * we have already been awoken, unless there is another thread to
1141 	 * awaken.  This saves a syscall - if we were already awakened,
1142 	 * the next call to _lwp_park() would need to return early in order
1143 	 * to eat the previous wakeup.
1144 	 */
1145 	rv = 0;
1146 	do {
1147 		/*
1148 		 * If we deferred unparking a thread, arrange to
1149 		 * have _lwp_park() restart it before blocking.
1150 		 */
1151 		error = _lwp_park(abstime, self->pt_unpark, hint, hint);
1152 		self->pt_unpark = 0;
1153 		if (error != 0) {
1154 			switch (rv = errno) {
1155 			case EINTR:
1156 			case EALREADY:
1157 				rv = 0;
1158 				break;
1159 			case ETIMEDOUT:
1160 				break;
1161 			default:
1162 				OOPS("_lwp_park failed");
1163 				break;
1164 			}
1165 		}
1166 		/* Check for cancellation. */
1167 		if (cancelpt && self->pt_cancel)
1168 			rv = EINTR;
1169 	} while (self->pt_sleepobj != NULL && rv == 0);
1170 
1171 	/*
1172 	 * If we have been awoken early but are still on the queue,
1173 	 * then remove ourself.  Again, it's safe to do the test
1174 	 * without holding any locks.
1175 	 */
1176 	if (__predict_false(self->pt_sleepobj != NULL)) {
1177 		pthread_mutex_lock(lock);
1178 		if ((obj = self->pt_sleepobj) != NULL) {
1179 			PTQ_REMOVE(queue, self, pt_sleep);
1180 			self->pt_sleepobj = NULL;
1181 			if (obj != NULL && self->pt_early != NULL)
1182 				(*self->pt_early)(obj);
1183 		}
1184 		pthread_mutex_unlock(lock);
1185 	}
1186 	self->pt_early = NULL;
1187 	self->pt_blocking--;
1188 	membar_sync();
1189 
1190 	return rv;
1191 }
1192 
1193 void
1194 pthread__unpark(pthread_queue_t *queue, pthread_t self,
1195 		pthread_mutex_t *interlock)
1196 {
1197 	pthread_t target;
1198 	u_int max;
1199 	size_t nwaiters;
1200 
1201 	max = pthread__unpark_max;
1202 	nwaiters = self->pt_nwaiters;
1203 	target = PTQ_FIRST(queue);
1204 	if (nwaiters == max) {
1205 		/* Overflow. */
1206 		(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1207 		    __UNVOLATILE(&interlock->ptm_waiters));
1208 		nwaiters = 0;
1209 	}
1210 	target->pt_sleepobj = NULL;
1211 	self->pt_waiters[nwaiters++] = target->pt_lid;
1212 	PTQ_REMOVE(queue, target, pt_sleep);
1213 	self->pt_nwaiters = nwaiters;
1214 	pthread__mutex_deferwake(self, interlock);
1215 }
1216 
1217 void
1218 pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1219 		    pthread_mutex_t *interlock)
1220 {
1221 	pthread_t target;
1222 	u_int max;
1223 	size_t nwaiters;
1224 
1225 	max = pthread__unpark_max;
1226 	nwaiters = self->pt_nwaiters;
1227 	PTQ_FOREACH(target, queue, pt_sleep) {
1228 		if (nwaiters == max) {
1229 			/* Overflow. */
1230 			(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1231 			    __UNVOLATILE(&interlock->ptm_waiters));
1232 			nwaiters = 0;
1233 		}
1234 		target->pt_sleepobj = NULL;
1235 		self->pt_waiters[nwaiters++] = target->pt_lid;
1236 	}
1237 	self->pt_nwaiters = nwaiters;
1238 	PTQ_INIT(queue);
1239 	pthread__mutex_deferwake(self, interlock);
1240 }
1241 
1242 #undef	OOPS
1243 
1244 static void
1245 pthread__initmainstack(void)
1246 {
1247 	struct rlimit slimit;
1248 	const AuxInfo *aux;
1249 	size_t size;
1250 
1251 	_DIAGASSERT(_dlauxinfo() != NULL);
1252 
1253 	if (getrlimit(RLIMIT_STACK, &slimit) == -1)
1254 		err(1, "Couldn't get stack resource consumption limits");
1255 	size = slimit.rlim_cur;
1256 	pthread__main.pt_stack.ss_size = size;
1257 
1258 	for (aux = _dlauxinfo(); aux->a_type != AT_NULL; ++aux) {
1259 		if (aux->a_type == AT_STACKBASE) {
1260 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1261 #ifdef __MACHINE_STACK_GROWS_UP
1262 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1263 #else
1264 			pthread__main.pt_stack.ss_sp = (char *)aux->a_v - size;
1265 #endif
1266 			break;
1267 		}
1268 	}
1269 }
1270 
1271 /*
1272  * Set up the slightly special stack for the "initial" thread, which
1273  * runs on the normal system stack, and thus gets slightly different
1274  * treatment.
1275  */
1276 static void
1277 pthread__initmain(pthread_t *newt)
1278 {
1279 	char *value;
1280 
1281 	pthread__initmainstack();
1282 
1283 	value = pthread__getenv("PTHREAD_STACKSIZE");
1284 	if (value != NULL) {
1285 		pthread__stacksize = atoi(value) * 1024;
1286 		if (pthread__stacksize > pthread__main.pt_stack.ss_size)
1287 			pthread__stacksize = pthread__main.pt_stack.ss_size;
1288 	}
1289 	if (pthread__stacksize == 0)
1290 		pthread__stacksize = pthread__main.pt_stack.ss_size;
1291 	pthread__stacksize += pthread__pagesize - 1;
1292 	pthread__stacksize &= ~(pthread__pagesize - 1);
1293 	if (pthread__stacksize < 4 * pthread__pagesize)
1294 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1295 		    4 * pthread__pagesize / 1024);
1296 
1297 	*newt = &pthread__main;
1298 #ifdef __HAVE___LWP_GETTCB_FAST
1299 	pthread__main.pt_tls = __lwp_gettcb_fast();
1300 #else
1301 	pthread__main.pt_tls = _lwp_getprivate();
1302 #endif
1303 	pthread__main.pt_tls->tcb_pthread = &pthread__main;
1304 }
1305 
1306 static signed int
1307 /*ARGSUSED*/
1308 pthread__cmp(void *ctx, const void *n1, const void *n2)
1309 {
1310 	const uintptr_t p1 = (const uintptr_t)n1;
1311 	const uintptr_t p2 = (const uintptr_t)n2;
1312 
1313 	if (p1 < p2)
1314 		return -1;
1315 	if (p1 > p2)
1316 		return 1;
1317 	return 0;
1318 }
1319 
1320 /* Because getenv() wants to use locks. */
1321 char *
1322 pthread__getenv(const char *name)
1323 {
1324 	extern char **environ;
1325 	size_t l_name, offset;
1326 
1327 	l_name = strlen(name);
1328 	for (offset = 0; environ[offset] != NULL; offset++) {
1329 		if (strncmp(name, environ[offset], l_name) == 0 &&
1330 		    environ[offset][l_name] == '=') {
1331 			return environ[offset] + l_name + 1;
1332 		}
1333 	}
1334 
1335 	return NULL;
1336 }
1337 
1338 pthread_mutex_t *
1339 pthread__hashlock(volatile const void *p)
1340 {
1341 	uintptr_t v;
1342 
1343 	v = (uintptr_t)p;
1344 	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1345 }
1346 
1347 int
1348 pthread__checkpri(int pri)
1349 {
1350 	static int havepri;
1351 	static long min, max;
1352 
1353 	if (!havepri) {
1354 		min = sysconf(_SC_SCHED_PRI_MIN);
1355 		max = sysconf(_SC_SCHED_PRI_MAX);
1356 		havepri = 1;
1357 	}
1358 	return (pri < min || pri > max) ? EINVAL : 0;
1359 }
1360