xref: /netbsd-src/lib/libpthread/pthread.c (revision 88fcb00c0357f2d7c1774f86a352637bfda96184)
1 /*	$NetBSD: pthread.c,v 1.123 2011/03/30 00:03:26 joerg Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: pthread.c,v 1.123 2011/03/30 00:03:26 joerg Exp $");
34 
35 #define	__EXPOSE_STACK	1
36 
37 #include <sys/param.h>
38 #include <sys/mman.h>
39 #include <sys/sysctl.h>
40 #include <sys/lwpctl.h>
41 #include <sys/tls.h>
42 
43 #include <err.h>
44 #include <errno.h>
45 #include <lwp.h>
46 #include <signal.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <syslog.h>
51 #include <ucontext.h>
52 #include <unistd.h>
53 #include <sched.h>
54 
55 #include "pthread.h"
56 #include "pthread_int.h"
57 
58 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
59 RB_HEAD(__pthread__alltree, __pthread_st) pthread__alltree;
60 
61 #ifndef lint
62 static int	pthread__cmp(struct __pthread_st *, struct __pthread_st *);
63 RB_PROTOTYPE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
64 #endif
65 
66 static void	pthread__create_tramp(void *);
67 static void	pthread__initthread(pthread_t);
68 static void	pthread__scrubthread(pthread_t, char *, int);
69 static int	pthread__stackid_setup(void *, size_t, pthread_t *);
70 static int	pthread__stackalloc(pthread_t *);
71 static void	pthread__initmain(pthread_t *);
72 static void	pthread__fork_callback(void);
73 static void	pthread__reap(pthread_t);
74 static void	pthread__child_callback(void);
75 static void	pthread__start(void);
76 
77 void	pthread__init(void);
78 
79 int pthread__started;
80 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
81 pthread_queue_t pthread__deadqueue;
82 pthread_queue_t pthread__allqueue;
83 
84 static pthread_attr_t pthread_default_attr;
85 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
86 static pthread_t pthread__first;
87 
88 enum {
89 	DIAGASSERT_ABORT =	1<<0,
90 	DIAGASSERT_STDERR =	1<<1,
91 	DIAGASSERT_SYSLOG =	1<<2
92 };
93 
94 static int pthread__diagassert;
95 
96 int pthread__concurrency;
97 int pthread__nspins;
98 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
99 
100 /*
101  * We have to initialize the pthread_stack* variables here because
102  * mutexes are used before pthread_init() and thus pthread__initmain()
103  * are called.  Since mutexes only save the stack pointer and not a
104  * pointer to the thread data, it is safe to change the mapping from
105  * stack pointer to thread data afterwards.
106  */
107 #define	_STACKSIZE_LG 18
108 int	pthread__stacksize_lg = _STACKSIZE_LG;
109 size_t	pthread__stacksize = 1 << _STACKSIZE_LG;
110 vaddr_t	pthread__stackmask = (1 << _STACKSIZE_LG) - 1;
111 vaddr_t pthread__threadmask = (vaddr_t)~((1 << _STACKSIZE_LG) - 1);
112 vaddr_t	pthread__mainbase = 0;
113 vaddr_t	pthread__mainstruct = 0;
114 #undef	_STACKSIZE_LG
115 
116 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
117 
118 __strong_alias(__libc_thr_self,pthread_self)
119 __strong_alias(__libc_thr_create,pthread_create)
120 __strong_alias(__libc_thr_exit,pthread_exit)
121 __strong_alias(__libc_thr_errno,pthread__errno)
122 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
123 __strong_alias(__libc_thr_equal,pthread_equal)
124 __strong_alias(__libc_thr_init,pthread__init)
125 
126 /*
127  * Static library kludge.  Place a reference to a symbol any library
128  * file which does not already have a reference here.
129  */
130 extern int pthread__cancel_stub_binder;
131 
132 void *pthread__static_lib_binder[] = {
133 	&pthread__cancel_stub_binder,
134 	pthread_cond_init,
135 	pthread_mutex_init,
136 	pthread_rwlock_init,
137 	pthread_barrier_init,
138 	pthread_key_create,
139 	pthread_setspecific,
140 };
141 
142 #define	NHASHLOCK	64
143 
144 static union hashlock {
145 	pthread_mutex_t	mutex;
146 	char		pad[64];
147 } hashlocks[NHASHLOCK] __aligned(64);
148 
149 /*
150  * This needs to be started by the library loading code, before main()
151  * gets to run, for various things that use the state of the initial thread
152  * to work properly (thread-specific data is an application-visible example;
153  * spinlock counts for mutexes is an internal example).
154  */
155 void
156 pthread__init(void)
157 {
158 	pthread_t first;
159 	char *p;
160 	int i, mib[2];
161 	size_t len;
162 	extern int __isthreaded;
163 
164 	mib[0] = CTL_HW;
165 	mib[1] = HW_NCPU;
166 
167 	len = sizeof(pthread__concurrency);
168 	if (sysctl(mib, 2, &pthread__concurrency, &len, NULL, 0) == -1)
169 		err(1, "sysctl(hw.ncpu");
170 
171 	mib[0] = CTL_KERN;
172 	mib[1] = KERN_OSREV;
173 
174 	/* Initialize locks first; they're needed elsewhere. */
175 	pthread__lockprim_init();
176 	for (i = 0; i < NHASHLOCK; i++) {
177 		pthread_mutex_init(&hashlocks[i].mutex, NULL);
178 	}
179 
180 	/* Fetch parameters. */
181 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
182 	if (i == -1)
183 		err(1, "_lwp_unpark_all");
184 	if (i < pthread__unpark_max)
185 		pthread__unpark_max = i;
186 
187 	/* Basic data structure setup */
188 	pthread_attr_init(&pthread_default_attr);
189 	PTQ_INIT(&pthread__allqueue);
190 	PTQ_INIT(&pthread__deadqueue);
191 	RB_INIT(&pthread__alltree);
192 
193 	/* Create the thread structure corresponding to main() */
194 	pthread__initmain(&first);
195 	pthread__initthread(first);
196 	pthread__scrubthread(first, NULL, 0);
197 
198 	first->pt_lid = _lwp_self();
199 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
200 	RB_INSERT(__pthread__alltree, &pthread__alltree, first);
201 
202 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
203 		err(1, "_lwp_ctl");
204 	}
205 
206 	/* Start subsystems */
207 	PTHREAD_MD_INIT
208 
209 	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
210 		switch (*p) {
211 		case 'a':
212 			pthread__diagassert |= DIAGASSERT_ABORT;
213 			break;
214 		case 'A':
215 			pthread__diagassert &= ~DIAGASSERT_ABORT;
216 			break;
217 		case 'e':
218 			pthread__diagassert |= DIAGASSERT_STDERR;
219 			break;
220 		case 'E':
221 			pthread__diagassert &= ~DIAGASSERT_STDERR;
222 			break;
223 		case 'l':
224 			pthread__diagassert |= DIAGASSERT_SYSLOG;
225 			break;
226 		case 'L':
227 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
228 			break;
229 		}
230 	}
231 
232 	/* Tell libc that we're here and it should role-play accordingly. */
233 	pthread__first = first;
234 	pthread_atfork(NULL, NULL, pthread__fork_callback);
235 	__isthreaded = 1;
236 }
237 
238 static void
239 pthread__fork_callback(void)
240 {
241 	struct __pthread_st *self;
242 
243 	/* lwpctl state is not copied across fork. */
244 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &pthread__first->pt_lwpctl)) {
245 		err(1, "_lwp_ctl");
246 	}
247 	self = pthread__self();
248 	self->pt_lid = _lwp_self();
249 }
250 
251 static void
252 pthread__child_callback(void)
253 {
254 
255 	/*
256 	 * Clean up data structures that a forked child process might
257 	 * trip over. Note that if threads have been created (causing
258 	 * this handler to be registered) the standards say that the
259 	 * child will trigger undefined behavior if it makes any
260 	 * pthread_* calls (or any other calls that aren't
261 	 * async-signal-safe), so we don't really have to clean up
262 	 * much. Anything that permits some pthread_* calls to work is
263 	 * merely being polite.
264 	 */
265 	pthread__started = 0;
266 }
267 
268 static void
269 pthread__start(void)
270 {
271 
272 	/*
273 	 * Per-process timers are cleared by fork(); despite the
274 	 * various restrictions on fork() and threads, it's legal to
275 	 * fork() before creating any threads.
276 	 */
277 	pthread_atfork(NULL, NULL, pthread__child_callback);
278 }
279 
280 
281 /* General-purpose thread data structure sanitization. */
282 /* ARGSUSED */
283 static void
284 pthread__initthread(pthread_t t)
285 {
286 
287 	t->pt_self = t;
288 	t->pt_magic = PT_MAGIC;
289 	t->pt_willpark = 0;
290 	t->pt_unpark = 0;
291 	t->pt_nwaiters = 0;
292 	t->pt_sleepobj = NULL;
293 	t->pt_signalled = 0;
294 	t->pt_havespecific = 0;
295 	t->pt_early = NULL;
296 	t->pt_lwpctl = &pthread__dummy_lwpctl;
297 	t->pt_blocking = 0;
298 	t->pt_droplock = NULL;
299 
300 	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
301 	pthread_mutex_init(&t->pt_lock, NULL);
302 	PTQ_INIT(&t->pt_cleanup_stack);
303 	pthread_cond_init(&t->pt_joiners, NULL);
304 	memset(&t->pt_specific, 0, sizeof(t->pt_specific));
305 }
306 
307 static void
308 pthread__scrubthread(pthread_t t, char *name, int flags)
309 {
310 
311 	t->pt_state = PT_STATE_RUNNING;
312 	t->pt_exitval = NULL;
313 	t->pt_flags = flags;
314 	t->pt_cancel = 0;
315 	t->pt_errno = 0;
316 	t->pt_name = name;
317 	t->pt_lid = 0;
318 }
319 
320 
321 int
322 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
323 	    void *(*startfunc)(void *), void *arg)
324 {
325 	pthread_t newthread;
326 	pthread_attr_t nattr;
327 	struct pthread_attr_private *p;
328 	char * volatile name;
329 	unsigned long flag;
330 	void *private_area;
331 	int ret;
332 
333 	/*
334 	 * It's okay to check this without a lock because there can
335 	 * only be one thread before it becomes true.
336 	 */
337 	if (pthread__started == 0) {
338 		pthread__start();
339 		pthread__started = 1;
340 	}
341 
342 	if (attr == NULL)
343 		nattr = pthread_default_attr;
344 	else if (attr->pta_magic == PT_ATTR_MAGIC)
345 		nattr = *attr;
346 	else
347 		return EINVAL;
348 
349 	/* Fetch misc. attributes from the attr structure. */
350 	name = NULL;
351 	if ((p = nattr.pta_private) != NULL)
352 		if (p->ptap_name[0] != '\0')
353 			if ((name = strdup(p->ptap_name)) == NULL)
354 				return ENOMEM;
355 
356 	newthread = NULL;
357 
358 	/*
359 	 * Try to reclaim a dead thread.
360 	 */
361 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
362 		pthread_mutex_lock(&pthread__deadqueue_lock);
363 		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
364 			/* Still running? */
365 			if (newthread->pt_lwpctl->lc_curcpu ==
366 			    LWPCTL_CPU_EXITED ||
367 			    (_lwp_kill(newthread->pt_lid, 0) == -1 &&
368 			    errno == ESRCH))
369 				break;
370 		}
371 		if (newthread)
372 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
373 		pthread_mutex_unlock(&pthread__deadqueue_lock);
374 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
375 		if (newthread && newthread->pt_tls) {
376 			_rtld_tls_free(newthread->pt_tls);
377 			newthread->pt_tls = NULL;
378 		}
379 #endif
380 	}
381 
382 	/*
383 	 * If necessary set up a stack, allocate space for a pthread_st,
384 	 * and initialize it.
385 	 */
386 	if (newthread == NULL) {
387 		ret = pthread__stackalloc(&newthread);
388 		if (ret != 0) {
389 			if (name)
390 				free(name);
391 			return ret;
392 		}
393 
394 		/* This is used only when creating the thread. */
395 		_INITCONTEXT_U(&newthread->pt_uc);
396 #ifdef PTHREAD_MACHINE_HAS_ID_REGISTER
397 		pthread__uc_id(&newthread->pt_uc) = newthread;
398 #endif
399 		newthread->pt_uc.uc_stack = newthread->pt_stack;
400 		newthread->pt_uc.uc_link = NULL;
401 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
402 		newthread->pt_tls = NULL;
403 #endif
404 
405 		/* Add to list of all threads. */
406 		pthread_rwlock_wrlock(&pthread__alltree_lock);
407 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
408 		RB_INSERT(__pthread__alltree, &pthread__alltree, newthread);
409 		pthread_rwlock_unlock(&pthread__alltree_lock);
410 
411 		/* Will be reset by the thread upon exit. */
412 		pthread__initthread(newthread);
413 	}
414 
415 	/*
416 	 * Create the new LWP.
417 	 */
418 	pthread__scrubthread(newthread, name, nattr.pta_flags);
419 	newthread->pt_func = startfunc;
420 	newthread->pt_arg = arg;
421 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
422 	private_area = newthread->pt_tls = _rtld_tls_allocate();
423 	newthread->pt_tls->tcb_pthread = newthread;
424 #else
425 	private_area = newthread;
426 #endif
427 
428 	_lwp_makecontext(&newthread->pt_uc, pthread__create_tramp,
429 	    newthread, private_area, newthread->pt_stack.ss_sp,
430 	    newthread->pt_stack.ss_size);
431 
432 	flag = LWP_DETACHED;
433 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
434 	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
435 		flag |= LWP_SUSPENDED;
436 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
437 	if (ret != 0) {
438 		pthread_mutex_lock(&newthread->pt_lock);
439 		/* Will unlock and free name. */
440 		pthread__reap(newthread);
441 		return ret;
442 	}
443 
444 	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
445 		if (p != NULL) {
446 			(void)pthread_setschedparam(newthread, p->ptap_policy,
447 			    &p->ptap_sp);
448 		}
449 		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
450 			(void)_lwp_continue(newthread->pt_lid);
451 		}
452 	}
453 
454 	*thread = newthread;
455 
456 	return 0;
457 }
458 
459 
460 static void
461 pthread__create_tramp(void *cookie)
462 {
463 	pthread_t self;
464 	void *retval;
465 
466 	self = cookie;
467 
468 	/*
469 	 * Throw away some stack in a feeble attempt to reduce cache
470 	 * thrash.  May help for SMT processors.  XXX We should not
471 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
472 	 * thread register or decent thread local storage.
473 	 *
474 	 * Note that we may race with the kernel in _lwp_create(),
475 	 * and so pt_lid can be unset at this point, but we don't
476 	 * care.
477 	 */
478 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
479 
480 	if (self->pt_name != NULL) {
481 		pthread_mutex_lock(&self->pt_lock);
482 		if (self->pt_name != NULL)
483 			(void)_lwp_setname(0, self->pt_name);
484 		pthread_mutex_unlock(&self->pt_lock);
485 	}
486 
487 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
488 		err(1, "_lwp_ctl");
489 	}
490 
491 	retval = (*self->pt_func)(self->pt_arg);
492 
493 	pthread_exit(retval);
494 
495 	/*NOTREACHED*/
496 	pthread__abort();
497 }
498 
499 int
500 pthread_suspend_np(pthread_t thread)
501 {
502 	pthread_t self;
503 
504 	self = pthread__self();
505 	if (self == thread) {
506 		return EDEADLK;
507 	}
508 	if (pthread__find(thread) != 0)
509 		return ESRCH;
510 	if (_lwp_suspend(thread->pt_lid) == 0)
511 		return 0;
512 	return errno;
513 }
514 
515 int
516 pthread_resume_np(pthread_t thread)
517 {
518 
519 	if (pthread__find(thread) != 0)
520 		return ESRCH;
521 	if (_lwp_continue(thread->pt_lid) == 0)
522 		return 0;
523 	return errno;
524 }
525 
526 void
527 pthread_exit(void *retval)
528 {
529 	pthread_t self;
530 	struct pt_clean_t *cleanup;
531 	char *name;
532 
533 	self = pthread__self();
534 
535 	/* Disable cancellability. */
536 	pthread_mutex_lock(&self->pt_lock);
537 	self->pt_flags |= PT_FLAG_CS_DISABLED;
538 	self->pt_cancel = 0;
539 
540 	/* Call any cancellation cleanup handlers */
541 	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
542 		pthread_mutex_unlock(&self->pt_lock);
543 		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
544 			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
545 			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
546 			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
547 		}
548 		pthread_mutex_lock(&self->pt_lock);
549 	}
550 
551 	/* Perform cleanup of thread-specific data */
552 	pthread__destroy_tsd(self);
553 
554 	/* Signal our exit. */
555 	self->pt_exitval = retval;
556 	if (self->pt_flags & PT_FLAG_DETACHED) {
557 		self->pt_state = PT_STATE_DEAD;
558 		name = self->pt_name;
559 		self->pt_name = NULL;
560 		pthread_mutex_unlock(&self->pt_lock);
561 		if (name != NULL)
562 			free(name);
563 		pthread_mutex_lock(&pthread__deadqueue_lock);
564 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
565 		pthread_mutex_unlock(&pthread__deadqueue_lock);
566 		_lwp_exit();
567 	} else {
568 		self->pt_state = PT_STATE_ZOMBIE;
569 		pthread_cond_broadcast(&self->pt_joiners);
570 		pthread_mutex_unlock(&self->pt_lock);
571 		/* Note: name will be freed by the joiner. */
572 		_lwp_exit();
573 	}
574 
575 	/*NOTREACHED*/
576 	pthread__abort();
577 	exit(1);
578 }
579 
580 
581 int
582 pthread_join(pthread_t thread, void **valptr)
583 {
584 	pthread_t self;
585 	int error;
586 
587 	self = pthread__self();
588 
589 	if (pthread__find(thread) != 0)
590 		return ESRCH;
591 
592 	if (thread->pt_magic != PT_MAGIC)
593 		return EINVAL;
594 
595 	if (thread == self)
596 		return EDEADLK;
597 
598 	self->pt_droplock = &thread->pt_lock;
599 	pthread_mutex_lock(&thread->pt_lock);
600 	for (;;) {
601 		if (thread->pt_state == PT_STATE_ZOMBIE)
602 			break;
603 		if (thread->pt_state == PT_STATE_DEAD) {
604 			pthread_mutex_unlock(&thread->pt_lock);
605 			self->pt_droplock = NULL;
606 			return ESRCH;
607 		}
608 		if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
609 			pthread_mutex_unlock(&thread->pt_lock);
610 			self->pt_droplock = NULL;
611 			return EINVAL;
612 		}
613 		error = pthread_cond_wait(&thread->pt_joiners,
614 		    &thread->pt_lock);
615 		if (error != 0) {
616 			pthread__errorfunc(__FILE__, __LINE__,
617 			    __func__, "unexpected return from cond_wait()");
618 		}
619 
620 	}
621 	pthread__testcancel(self);
622 	if (valptr != NULL)
623 		*valptr = thread->pt_exitval;
624 	/* pthread__reap() will drop the lock. */
625 	pthread__reap(thread);
626 	self->pt_droplock = NULL;
627 
628 	return 0;
629 }
630 
631 static void
632 pthread__reap(pthread_t thread)
633 {
634 	char *name;
635 
636 	name = thread->pt_name;
637 	thread->pt_name = NULL;
638 	thread->pt_state = PT_STATE_DEAD;
639 	pthread_mutex_unlock(&thread->pt_lock);
640 
641 	pthread_mutex_lock(&pthread__deadqueue_lock);
642 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
643 	pthread_mutex_unlock(&pthread__deadqueue_lock);
644 
645 	if (name != NULL)
646 		free(name);
647 }
648 
649 int
650 pthread_equal(pthread_t t1, pthread_t t2)
651 {
652 
653 	/* Nothing special here. */
654 	return (t1 == t2);
655 }
656 
657 
658 int
659 pthread_detach(pthread_t thread)
660 {
661 
662 	if (pthread__find(thread) != 0)
663 		return ESRCH;
664 
665 	if (thread->pt_magic != PT_MAGIC)
666 		return EINVAL;
667 
668 	pthread_mutex_lock(&thread->pt_lock);
669 	thread->pt_flags |= PT_FLAG_DETACHED;
670 	if (thread->pt_state == PT_STATE_ZOMBIE) {
671 		/* pthread__reap() will drop the lock. */
672 		pthread__reap(thread);
673 	} else {
674 		/*
675 		 * Not valid for threads to be waiting in
676 		 * pthread_join() (there are intractable
677 		 * sync issues from the application
678 		 * perspective), but give those threads
679 		 * a chance anyway.
680 		 */
681 		pthread_cond_broadcast(&thread->pt_joiners);
682 		pthread_mutex_unlock(&thread->pt_lock);
683 	}
684 
685 	return 0;
686 }
687 
688 
689 int
690 pthread_getname_np(pthread_t thread, char *name, size_t len)
691 {
692 
693 	if (pthread__find(thread) != 0)
694 		return ESRCH;
695 
696 	if (thread->pt_magic != PT_MAGIC)
697 		return EINVAL;
698 
699 	pthread_mutex_lock(&thread->pt_lock);
700 	if (thread->pt_name == NULL)
701 		name[0] = '\0';
702 	else
703 		strlcpy(name, thread->pt_name, len);
704 	pthread_mutex_unlock(&thread->pt_lock);
705 
706 	return 0;
707 }
708 
709 
710 int
711 pthread_setname_np(pthread_t thread, const char *name, void *arg)
712 {
713 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
714 	int namelen;
715 
716 	if (pthread__find(thread) != 0)
717 		return ESRCH;
718 
719 	if (thread->pt_magic != PT_MAGIC)
720 		return EINVAL;
721 
722 	namelen = snprintf(newname, sizeof(newname), name, arg);
723 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
724 		return EINVAL;
725 
726 	cp = strdup(newname);
727 	if (cp == NULL)
728 		return ENOMEM;
729 
730 	pthread_mutex_lock(&thread->pt_lock);
731 	oldname = thread->pt_name;
732 	thread->pt_name = cp;
733 	(void)_lwp_setname(thread->pt_lid, cp);
734 	pthread_mutex_unlock(&thread->pt_lock);
735 
736 	if (oldname != NULL)
737 		free(oldname);
738 
739 	return 0;
740 }
741 
742 
743 
744 /*
745  * XXX There should be a way for applications to use the efficent
746  *  inline version, but there are opacity/namespace issues.
747  */
748 pthread_t
749 pthread_self(void)
750 {
751 
752 	return pthread__self();
753 }
754 
755 
756 int
757 pthread_cancel(pthread_t thread)
758 {
759 
760 	if (pthread__find(thread) != 0)
761 		return ESRCH;
762 	pthread_mutex_lock(&thread->pt_lock);
763 	thread->pt_flags |= PT_FLAG_CS_PENDING;
764 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
765 		thread->pt_cancel = 1;
766 		pthread_mutex_unlock(&thread->pt_lock);
767 		_lwp_wakeup(thread->pt_lid);
768 	} else
769 		pthread_mutex_unlock(&thread->pt_lock);
770 
771 	return 0;
772 }
773 
774 
775 int
776 pthread_setcancelstate(int state, int *oldstate)
777 {
778 	pthread_t self;
779 	int retval;
780 
781 	self = pthread__self();
782 	retval = 0;
783 
784 	pthread_mutex_lock(&self->pt_lock);
785 
786 	if (oldstate != NULL) {
787 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
788 			*oldstate = PTHREAD_CANCEL_DISABLE;
789 		else
790 			*oldstate = PTHREAD_CANCEL_ENABLE;
791 	}
792 
793 	if (state == PTHREAD_CANCEL_DISABLE) {
794 		self->pt_flags |= PT_FLAG_CS_DISABLED;
795 		if (self->pt_cancel) {
796 			self->pt_flags |= PT_FLAG_CS_PENDING;
797 			self->pt_cancel = 0;
798 		}
799 	} else if (state == PTHREAD_CANCEL_ENABLE) {
800 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
801 		/*
802 		 * If a cancellation was requested while cancellation
803 		 * was disabled, note that fact for future
804 		 * cancellation tests.
805 		 */
806 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
807 			self->pt_cancel = 1;
808 			/* This is not a deferred cancellation point. */
809 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
810 				pthread_mutex_unlock(&self->pt_lock);
811 				pthread__cancelled();
812 			}
813 		}
814 	} else
815 		retval = EINVAL;
816 
817 	pthread_mutex_unlock(&self->pt_lock);
818 
819 	return retval;
820 }
821 
822 
823 int
824 pthread_setcanceltype(int type, int *oldtype)
825 {
826 	pthread_t self;
827 	int retval;
828 
829 	self = pthread__self();
830 	retval = 0;
831 
832 	pthread_mutex_lock(&self->pt_lock);
833 
834 	if (oldtype != NULL) {
835 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
836 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
837 		else
838 			*oldtype = PTHREAD_CANCEL_DEFERRED;
839 	}
840 
841 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
842 		self->pt_flags |= PT_FLAG_CS_ASYNC;
843 		if (self->pt_cancel) {
844 			pthread_mutex_unlock(&self->pt_lock);
845 			pthread__cancelled();
846 		}
847 	} else if (type == PTHREAD_CANCEL_DEFERRED)
848 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
849 	else
850 		retval = EINVAL;
851 
852 	pthread_mutex_unlock(&self->pt_lock);
853 
854 	return retval;
855 }
856 
857 
858 void
859 pthread_testcancel(void)
860 {
861 	pthread_t self;
862 
863 	self = pthread__self();
864 	if (self->pt_cancel)
865 		pthread__cancelled();
866 }
867 
868 
869 /*
870  * POSIX requires that certain functions return an error rather than
871  * invoking undefined behavior even when handed completely bogus
872  * pthread_t values, e.g. stack garbage or (pthread_t)666. This
873  * utility routine searches the list of threads for the pthread_t
874  * value without dereferencing it.
875  */
876 int
877 pthread__find(pthread_t id)
878 {
879 	pthread_t target;
880 
881 	pthread_rwlock_rdlock(&pthread__alltree_lock);
882 	/* LINTED */
883 	target = RB_FIND(__pthread__alltree, &pthread__alltree, id);
884 	pthread_rwlock_unlock(&pthread__alltree_lock);
885 
886 	if (target == NULL || target->pt_state == PT_STATE_DEAD)
887 		return ESRCH;
888 
889 	return 0;
890 }
891 
892 
893 void
894 pthread__testcancel(pthread_t self)
895 {
896 
897 	if (self->pt_cancel)
898 		pthread__cancelled();
899 }
900 
901 
902 void
903 pthread__cancelled(void)
904 {
905 	pthread_mutex_t *droplock;
906 	pthread_t self;
907 
908 	self = pthread__self();
909 	droplock = self->pt_droplock;
910 	self->pt_droplock = NULL;
911 
912 	if (droplock != NULL && pthread_mutex_held_np(droplock))
913 		pthread_mutex_unlock(droplock);
914 
915 	pthread_exit(PTHREAD_CANCELED);
916 }
917 
918 
919 void
920 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
921 {
922 	pthread_t self;
923 	struct pt_clean_t *entry;
924 
925 	self = pthread__self();
926 	entry = store;
927 	entry->ptc_cleanup = cleanup;
928 	entry->ptc_arg = arg;
929 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
930 }
931 
932 
933 void
934 pthread__cleanup_pop(int ex, void *store)
935 {
936 	pthread_t self;
937 	struct pt_clean_t *entry;
938 
939 	self = pthread__self();
940 	entry = store;
941 
942 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
943 	if (ex)
944 		(*entry->ptc_cleanup)(entry->ptc_arg);
945 }
946 
947 
948 int *
949 pthread__errno(void)
950 {
951 	pthread_t self;
952 
953 	self = pthread__self();
954 
955 	return &(self->pt_errno);
956 }
957 
958 ssize_t	_sys_write(int, const void *, size_t);
959 
960 void
961 pthread__assertfunc(const char *file, int line, const char *function,
962 		    const char *expr)
963 {
964 	char buf[1024];
965 	int len;
966 
967 	/*
968 	 * snprintf should not acquire any locks, or we could
969 	 * end up deadlocked if the assert caller held locks.
970 	 */
971 	len = snprintf(buf, 1024,
972 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
973 	    expr, file, line,
974 	    function ? ", function \"" : "",
975 	    function ? function : "",
976 	    function ? "\"" : "");
977 
978 	_sys_write(STDERR_FILENO, buf, (size_t)len);
979 	(void)kill(getpid(), SIGABRT);
980 
981 	_exit(1);
982 }
983 
984 
985 void
986 pthread__errorfunc(const char *file, int line, const char *function,
987 		   const char *msg)
988 {
989 	char buf[1024];
990 	size_t len;
991 
992 	if (pthread__diagassert == 0)
993 		return;
994 
995 	/*
996 	 * snprintf should not acquire any locks, or we could
997 	 * end up deadlocked if the assert caller held locks.
998 	 */
999 	len = snprintf(buf, 1024,
1000 	    "%s: Error detected by libpthread: %s.\n"
1001 	    "Detected by file \"%s\", line %d%s%s%s.\n"
1002 	    "See pthread(3) for information.\n",
1003 	    getprogname(), msg, file, line,
1004 	    function ? ", function \"" : "",
1005 	    function ? function : "",
1006 	    function ? "\"" : "");
1007 
1008 	if (pthread__diagassert & DIAGASSERT_STDERR)
1009 		_sys_write(STDERR_FILENO, buf, len);
1010 
1011 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
1012 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
1013 
1014 	if (pthread__diagassert & DIAGASSERT_ABORT) {
1015 		(void)kill(getpid(), SIGABRT);
1016 		_exit(1);
1017 	}
1018 }
1019 
1020 /*
1021  * Thread park/unpark operations.  The kernel operations are
1022  * modelled after a brief description from "Multithreading in
1023  * the Solaris Operating Environment":
1024  *
1025  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1026  */
1027 
1028 #define	OOPS(msg)			\
1029     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
1030 
1031 int
1032 pthread__park(pthread_t self, pthread_mutex_t *lock,
1033 	      pthread_queue_t *queue, const struct timespec *abstime,
1034 	      int cancelpt, const void *hint)
1035 {
1036 	int rv, error;
1037 	void *obj;
1038 
1039 	/*
1040 	 * For non-interlocked release of mutexes we need a store
1041 	 * barrier before incrementing pt_blocking away from zero.
1042 	 * This is provided by pthread_mutex_unlock().
1043 	 */
1044 	self->pt_willpark = 1;
1045 	pthread_mutex_unlock(lock);
1046 	self->pt_willpark = 0;
1047 	self->pt_blocking++;
1048 
1049 	/*
1050 	 * Wait until we are awoken by a pending unpark operation,
1051 	 * a signal, an unpark posted after we have gone asleep,
1052 	 * or an expired timeout.
1053 	 *
1054 	 * It is fine to test the value of pt_sleepobj without
1055 	 * holding any locks, because:
1056 	 *
1057 	 * o Only the blocking thread (this thread) ever sets them
1058 	 *   to a non-NULL value.
1059 	 *
1060 	 * o Other threads may set them NULL, but if they do so they
1061 	 *   must also make this thread return from _lwp_park.
1062 	 *
1063 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1064 	 *   calls and all make use of spinlocks in the kernel.  So
1065 	 *   these system calls act as full memory barriers, and will
1066 	 *   ensure that the calling CPU's store buffers are drained.
1067 	 *   In combination with the spinlock release before unpark,
1068 	 *   this means that modification of pt_sleepobj/onq by another
1069 	 *   thread will become globally visible before that thread
1070 	 *   schedules an unpark operation on this thread.
1071 	 *
1072 	 * Note: the test in the while() statement dodges the park op if
1073 	 * we have already been awoken, unless there is another thread to
1074 	 * awaken.  This saves a syscall - if we were already awakened,
1075 	 * the next call to _lwp_park() would need to return early in order
1076 	 * to eat the previous wakeup.
1077 	 */
1078 	rv = 0;
1079 	do {
1080 		/*
1081 		 * If we deferred unparking a thread, arrange to
1082 		 * have _lwp_park() restart it before blocking.
1083 		 */
1084 		error = _lwp_park(abstime, self->pt_unpark, hint, hint);
1085 		self->pt_unpark = 0;
1086 		if (error != 0) {
1087 			switch (rv = errno) {
1088 			case EINTR:
1089 			case EALREADY:
1090 				rv = 0;
1091 				break;
1092 			case ETIMEDOUT:
1093 				break;
1094 			default:
1095 				OOPS("_lwp_park failed");
1096 				break;
1097 			}
1098 		}
1099 		/* Check for cancellation. */
1100 		if (cancelpt && self->pt_cancel)
1101 			rv = EINTR;
1102 	} while (self->pt_sleepobj != NULL && rv == 0);
1103 
1104 	/*
1105 	 * If we have been awoken early but are still on the queue,
1106 	 * then remove ourself.  Again, it's safe to do the test
1107 	 * without holding any locks.
1108 	 */
1109 	if (__predict_false(self->pt_sleepobj != NULL)) {
1110 		pthread_mutex_lock(lock);
1111 		if ((obj = self->pt_sleepobj) != NULL) {
1112 			PTQ_REMOVE(queue, self, pt_sleep);
1113 			self->pt_sleepobj = NULL;
1114 			if (obj != NULL && self->pt_early != NULL)
1115 				(*self->pt_early)(obj);
1116 		}
1117 		pthread_mutex_unlock(lock);
1118 	}
1119 	self->pt_early = NULL;
1120 	self->pt_blocking--;
1121 	membar_sync();
1122 
1123 	return rv;
1124 }
1125 
1126 void
1127 pthread__unpark(pthread_queue_t *queue, pthread_t self,
1128 		pthread_mutex_t *interlock)
1129 {
1130 	pthread_t target;
1131 	u_int max;
1132 	size_t nwaiters;
1133 
1134 	max = pthread__unpark_max;
1135 	nwaiters = self->pt_nwaiters;
1136 	target = PTQ_FIRST(queue);
1137 	if (nwaiters == max) {
1138 		/* Overflow. */
1139 		(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1140 		    __UNVOLATILE(&interlock->ptm_waiters));
1141 		nwaiters = 0;
1142 	}
1143 	target->pt_sleepobj = NULL;
1144 	self->pt_waiters[nwaiters++] = target->pt_lid;
1145 	PTQ_REMOVE(queue, target, pt_sleep);
1146 	self->pt_nwaiters = nwaiters;
1147 	pthread__mutex_deferwake(self, interlock);
1148 }
1149 
1150 void
1151 pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1152 		    pthread_mutex_t *interlock)
1153 {
1154 	pthread_t target;
1155 	u_int max;
1156 	size_t nwaiters;
1157 
1158 	max = pthread__unpark_max;
1159 	nwaiters = self->pt_nwaiters;
1160 	PTQ_FOREACH(target, queue, pt_sleep) {
1161 		if (nwaiters == max) {
1162 			/* Overflow. */
1163 			(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1164 			    __UNVOLATILE(&interlock->ptm_waiters));
1165 			nwaiters = 0;
1166 		}
1167 		target->pt_sleepobj = NULL;
1168 		self->pt_waiters[nwaiters++] = target->pt_lid;
1169 	}
1170 	self->pt_nwaiters = nwaiters;
1171 	PTQ_INIT(queue);
1172 	pthread__mutex_deferwake(self, interlock);
1173 }
1174 
1175 #undef	OOPS
1176 
1177 /*
1178  * Allocate a stack for a thread, and set it up. It needs to be aligned, so
1179  * that a thread can find itself by its stack pointer.
1180  */
1181 static int
1182 pthread__stackalloc(pthread_t *newt)
1183 {
1184 	void *addr;
1185 
1186 	addr = mmap(NULL, pthread__stacksize, PROT_READ|PROT_WRITE,
1187 	    MAP_ANON|MAP_PRIVATE | MAP_ALIGNED(pthread__stacksize_lg),
1188 	    -1, (off_t)0);
1189 
1190 	if (addr == MAP_FAILED)
1191 		return ENOMEM;
1192 
1193 	pthread__assert(((intptr_t)addr & pthread__stackmask) == 0);
1194 
1195 	return pthread__stackid_setup(addr, pthread__stacksize, newt);
1196 }
1197 
1198 
1199 /*
1200  * Set up the slightly special stack for the "initial" thread, which
1201  * runs on the normal system stack, and thus gets slightly different
1202  * treatment.
1203  */
1204 static void
1205 pthread__initmain(pthread_t *newt)
1206 {
1207 	struct rlimit slimit;
1208 	size_t pagesize;
1209 	pthread_t t;
1210 	void *base;
1211 	size_t size;
1212 	int error, ret;
1213 	char *value;
1214 
1215 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1216 	pthread__stacksize = 0;
1217 	ret = getrlimit(RLIMIT_STACK, &slimit);
1218 	if (ret == -1)
1219 		err(1, "Couldn't get stack resource consumption limits");
1220 
1221 	value = pthread__getenv("PTHREAD_STACKSIZE");
1222 	if (value != NULL) {
1223 		pthread__stacksize = atoi(value) * 1024;
1224 		if (pthread__stacksize > slimit.rlim_cur)
1225 			pthread__stacksize = (size_t)slimit.rlim_cur;
1226 	}
1227 	if (pthread__stacksize == 0)
1228 		pthread__stacksize = (size_t)slimit.rlim_cur;
1229 	if (pthread__stacksize < 4 * pagesize)
1230 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1231 		    4 * pagesize / 1024);
1232 
1233 	pthread__stacksize_lg = -1;
1234 	while (pthread__stacksize) {
1235 		pthread__stacksize >>= 1;
1236 		pthread__stacksize_lg++;
1237 	}
1238 
1239 	pthread__stacksize = (1 << pthread__stacksize_lg);
1240 	pthread__stackmask = pthread__stacksize - 1;
1241 	pthread__threadmask = ~pthread__stackmask;
1242 
1243 	base = (void *)(pthread__sp() & pthread__threadmask);
1244 	if ((pthread__sp() - (uintptr_t)base) < 4 * pagesize) {
1245 		pthread__mainbase = (vaddr_t)base;
1246 		base = STACK_GROW(base, pthread__stacksize);
1247 		pthread__mainstruct = (vaddr_t)base;
1248 		if (mprotect(base, pthread__stacksize,
1249 		    PROT_READ|PROT_WRITE) == -1)
1250 			err(1, "mprotect stack");
1251 	}
1252 	size = pthread__stacksize;
1253 
1254 	error = pthread__stackid_setup(base, size, &t);
1255 	if (error) {
1256 		/* XXX */
1257 		errx(2, "failed to setup main thread: error=%d", error);
1258 	}
1259 
1260 	*newt = t;
1261 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
1262 #  ifdef __HAVE___LWP_GETTCB_FAST
1263 	t->pt_tls = __lwp_gettcb_fast();
1264 #  else
1265 	t->pt_tls = _lwp_getprivate();
1266 #  endif
1267 	t->pt_tls->tcb_pthread = t;
1268 #else
1269 	_lwp_setprivate(t);
1270 #endif
1271 }
1272 
1273 static int
1274 /*ARGSUSED*/
1275 pthread__stackid_setup(void *base, size_t size, pthread_t *tp)
1276 {
1277 	pthread_t t;
1278 	void *redaddr;
1279 	size_t pagesize;
1280 	int ret;
1281 
1282 	t = base;
1283 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1284 
1285 	/*
1286 	 * Put a pointer to the pthread in the bottom (but
1287          * redzone-protected section) of the stack.
1288 	 */
1289 	redaddr = STACK_SHRINK(STACK_MAX(base, size), pagesize);
1290 	t->pt_stack.ss_size = size - 2 * pagesize;
1291 #ifdef __MACHINE_STACK_GROWS_UP
1292 	t->pt_stack.ss_sp = (char *)(void *)base + pagesize;
1293 #else
1294 	t->pt_stack.ss_sp = (char *)(void *)base + 2 * pagesize;
1295 #endif
1296 	/* Protect the next-to-bottom stack page as a red zone. */
1297 	ret = mprotect(redaddr, pagesize, PROT_NONE);
1298 	if (ret == -1) {
1299 		return errno;
1300 	}
1301 	*tp = t;
1302 	return 0;
1303 }
1304 
1305 #ifndef lint
1306 static int
1307 pthread__cmp(struct __pthread_st *a, struct __pthread_st *b)
1308 {
1309 
1310 	if ((uintptr_t)a < (uintptr_t)b)
1311 		return (-1);
1312 	else if (a == b)
1313 		return 0;
1314 	else
1315 		return 1;
1316 }
1317 RB_GENERATE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
1318 #endif
1319 
1320 /* Because getenv() wants to use locks. */
1321 char *
1322 pthread__getenv(const char *name)
1323 {
1324 	extern char **environ;
1325 	size_t l_name, offset;
1326 
1327 	l_name = strlen(name);
1328 	for (offset = 0; environ[offset] != NULL; offset++) {
1329 		if (strncmp(name, environ[offset], l_name) == 0 &&
1330 		    environ[offset][l_name] == '=') {
1331 			return environ[offset] + l_name + 1;
1332 		}
1333 	}
1334 
1335 	return NULL;
1336 }
1337 
1338 pthread_mutex_t *
1339 pthread__hashlock(volatile const void *p)
1340 {
1341 	uintptr_t v;
1342 
1343 	v = (uintptr_t)p;
1344 	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1345 }
1346 
1347 int
1348 pthread__checkpri(int pri)
1349 {
1350 	static int havepri;
1351 	static long min, max;
1352 
1353 	if (!havepri) {
1354 		min = sysconf(_SC_SCHED_PRI_MIN);
1355 		max = sysconf(_SC_SCHED_PRI_MAX);
1356 		havepri = 1;
1357 	}
1358 	return (pri < min || pri > max) ? EINVAL : 0;
1359 }
1360