xref: /netbsd-src/lib/libpthread/pthread.c (revision 213144e1de7024d4193d04aa51005ba3a5ad95e7)
1 /*	$NetBSD: pthread.c,v 1.120 2010/12/22 22:41:45 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: pthread.c,v 1.120 2010/12/22 22:41:45 christos Exp $");
34 
35 #define	__EXPOSE_STACK	1
36 
37 #include <sys/param.h>
38 #include <sys/mman.h>
39 #include <sys/sysctl.h>
40 #include <sys/lwpctl.h>
41 
42 #include <err.h>
43 #include <errno.h>
44 #include <lwp.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <syslog.h>
50 #include <ucontext.h>
51 #include <unistd.h>
52 #include <sched.h>
53 
54 #include "pthread.h"
55 #include "pthread_int.h"
56 
57 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
58 RB_HEAD(__pthread__alltree, __pthread_st) pthread__alltree;
59 
60 #ifndef lint
61 static int	pthread__cmp(struct __pthread_st *, struct __pthread_st *);
62 RB_PROTOTYPE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
63 #endif
64 
65 static void	pthread__create_tramp(void *);
66 static void	pthread__initthread(pthread_t);
67 static void	pthread__scrubthread(pthread_t, char *, int);
68 static int	pthread__stackid_setup(void *, size_t, pthread_t *);
69 static int	pthread__stackalloc(pthread_t *);
70 static void	pthread__initmain(pthread_t *);
71 static void	pthread__fork_callback(void);
72 static void	pthread__reap(pthread_t);
73 static void	pthread__child_callback(void);
74 static void	pthread__start(void);
75 
76 void	pthread__init(void);
77 
78 int pthread__started;
79 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
80 pthread_queue_t pthread__deadqueue;
81 pthread_queue_t pthread__allqueue;
82 
83 static pthread_attr_t pthread_default_attr;
84 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
85 static pthread_t pthread__first;
86 
87 enum {
88 	DIAGASSERT_ABORT =	1<<0,
89 	DIAGASSERT_STDERR =	1<<1,
90 	DIAGASSERT_SYSLOG =	1<<2
91 };
92 
93 static int pthread__diagassert;
94 
95 int pthread__concurrency;
96 int pthread__nspins;
97 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
98 
99 /*
100  * We have to initialize the pthread_stack* variables here because
101  * mutexes are used before pthread_init() and thus pthread__initmain()
102  * are called.  Since mutexes only save the stack pointer and not a
103  * pointer to the thread data, it is safe to change the mapping from
104  * stack pointer to thread data afterwards.
105  */
106 #define	_STACKSIZE_LG 18
107 int	pthread__stacksize_lg = _STACKSIZE_LG;
108 size_t	pthread__stacksize = 1 << _STACKSIZE_LG;
109 vaddr_t	pthread__stackmask = (1 << _STACKSIZE_LG) - 1;
110 vaddr_t pthread__threadmask = (vaddr_t)~((1 << _STACKSIZE_LG) - 1);
111 vaddr_t	pthread__mainbase = 0;
112 vaddr_t	pthread__mainstruct = 0;
113 #undef	_STACKSIZE_LG
114 
115 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
116 
117 __strong_alias(__libc_thr_self,pthread_self)
118 __strong_alias(__libc_thr_create,pthread_create)
119 __strong_alias(__libc_thr_exit,pthread_exit)
120 __strong_alias(__libc_thr_errno,pthread__errno)
121 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
122 __strong_alias(__libc_thr_equal,pthread_equal)
123 __strong_alias(__libc_thr_init,pthread__init)
124 
125 /*
126  * Static library kludge.  Place a reference to a symbol any library
127  * file which does not already have a reference here.
128  */
129 extern int pthread__cancel_stub_binder;
130 
131 void *pthread__static_lib_binder[] = {
132 	&pthread__cancel_stub_binder,
133 	pthread_cond_init,
134 	pthread_mutex_init,
135 	pthread_rwlock_init,
136 	pthread_barrier_init,
137 	pthread_key_create,
138 	pthread_setspecific,
139 };
140 
141 #define	NHASHLOCK	64
142 
143 static union hashlock {
144 	pthread_mutex_t	mutex;
145 	char		pad[64];
146 } hashlocks[NHASHLOCK] __aligned(64);
147 
148 /*
149  * This needs to be started by the library loading code, before main()
150  * gets to run, for various things that use the state of the initial thread
151  * to work properly (thread-specific data is an application-visible example;
152  * spinlock counts for mutexes is an internal example).
153  */
154 void
155 pthread__init(void)
156 {
157 	pthread_t first;
158 	char *p;
159 	int i, mib[2];
160 	size_t len;
161 	extern int __isthreaded;
162 
163 	mib[0] = CTL_HW;
164 	mib[1] = HW_NCPU;
165 
166 	len = sizeof(pthread__concurrency);
167 	if (sysctl(mib, 2, &pthread__concurrency, &len, NULL, 0) == -1)
168 		err(1, "sysctl(hw.ncpu");
169 
170 	mib[0] = CTL_KERN;
171 	mib[1] = KERN_OSREV;
172 
173 	/* Initialize locks first; they're needed elsewhere. */
174 	pthread__lockprim_init();
175 	for (i = 0; i < NHASHLOCK; i++) {
176 		pthread_mutex_init(&hashlocks[i].mutex, NULL);
177 	}
178 
179 	/* Fetch parameters. */
180 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
181 	if (i == -1)
182 		err(1, "_lwp_unpark_all");
183 	if (i < pthread__unpark_max)
184 		pthread__unpark_max = i;
185 
186 	/* Basic data structure setup */
187 	pthread_attr_init(&pthread_default_attr);
188 	PTQ_INIT(&pthread__allqueue);
189 	PTQ_INIT(&pthread__deadqueue);
190 	RB_INIT(&pthread__alltree);
191 
192 	/* Create the thread structure corresponding to main() */
193 	pthread__initmain(&first);
194 	pthread__initthread(first);
195 	pthread__scrubthread(first, NULL, 0);
196 
197 	first->pt_lid = _lwp_self();
198 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
199 	RB_INSERT(__pthread__alltree, &pthread__alltree, first);
200 
201 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
202 		err(1, "_lwp_ctl");
203 	}
204 
205 	/* Start subsystems */
206 	PTHREAD_MD_INIT
207 
208 	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
209 		switch (*p) {
210 		case 'a':
211 			pthread__diagassert |= DIAGASSERT_ABORT;
212 			break;
213 		case 'A':
214 			pthread__diagassert &= ~DIAGASSERT_ABORT;
215 			break;
216 		case 'e':
217 			pthread__diagassert |= DIAGASSERT_STDERR;
218 			break;
219 		case 'E':
220 			pthread__diagassert &= ~DIAGASSERT_STDERR;
221 			break;
222 		case 'l':
223 			pthread__diagassert |= DIAGASSERT_SYSLOG;
224 			break;
225 		case 'L':
226 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
227 			break;
228 		}
229 	}
230 
231 	/* Tell libc that we're here and it should role-play accordingly. */
232 	pthread__first = first;
233 	pthread_atfork(NULL, NULL, pthread__fork_callback);
234 	__isthreaded = 1;
235 }
236 
237 static void
238 pthread__fork_callback(void)
239 {
240 	struct __pthread_st *self;
241 
242 	/* lwpctl state is not copied across fork. */
243 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &pthread__first->pt_lwpctl)) {
244 		err(1, "_lwp_ctl");
245 	}
246 	self = pthread__self();
247 	self->pt_lid = _lwp_self();
248 }
249 
250 static void
251 pthread__child_callback(void)
252 {
253 
254 	/*
255 	 * Clean up data structures that a forked child process might
256 	 * trip over. Note that if threads have been created (causing
257 	 * this handler to be registered) the standards say that the
258 	 * child will trigger undefined behavior if it makes any
259 	 * pthread_* calls (or any other calls that aren't
260 	 * async-signal-safe), so we don't really have to clean up
261 	 * much. Anything that permits some pthread_* calls to work is
262 	 * merely being polite.
263 	 */
264 	pthread__started = 0;
265 }
266 
267 static void
268 pthread__start(void)
269 {
270 
271 	/*
272 	 * Per-process timers are cleared by fork(); despite the
273 	 * various restrictions on fork() and threads, it's legal to
274 	 * fork() before creating any threads.
275 	 */
276 	pthread_atfork(NULL, NULL, pthread__child_callback);
277 }
278 
279 
280 /* General-purpose thread data structure sanitization. */
281 /* ARGSUSED */
282 static void
283 pthread__initthread(pthread_t t)
284 {
285 
286 	t->pt_self = t;
287 	t->pt_magic = PT_MAGIC;
288 	t->pt_willpark = 0;
289 	t->pt_unpark = 0;
290 	t->pt_nwaiters = 0;
291 	t->pt_sleepobj = NULL;
292 	t->pt_signalled = 0;
293 	t->pt_havespecific = 0;
294 	t->pt_early = NULL;
295 	t->pt_lwpctl = &pthread__dummy_lwpctl;
296 	t->pt_blocking = 0;
297 	t->pt_droplock = NULL;
298 
299 	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
300 	pthread_mutex_init(&t->pt_lock, NULL);
301 	PTQ_INIT(&t->pt_cleanup_stack);
302 	pthread_cond_init(&t->pt_joiners, NULL);
303 	memset(&t->pt_specific, 0, sizeof(t->pt_specific));
304 }
305 
306 static void
307 pthread__scrubthread(pthread_t t, char *name, int flags)
308 {
309 
310 	t->pt_state = PT_STATE_RUNNING;
311 	t->pt_exitval = NULL;
312 	t->pt_flags = flags;
313 	t->pt_cancel = 0;
314 	t->pt_errno = 0;
315 	t->pt_name = name;
316 	t->pt_lid = 0;
317 }
318 
319 
320 int
321 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
322 	    void *(*startfunc)(void *), void *arg)
323 {
324 	pthread_t newthread;
325 	pthread_attr_t nattr;
326 	struct pthread_attr_private *p;
327 	char * volatile name;
328 	unsigned long flag;
329 	int ret;
330 
331 	/*
332 	 * It's okay to check this without a lock because there can
333 	 * only be one thread before it becomes true.
334 	 */
335 	if (pthread__started == 0) {
336 		pthread__start();
337 		pthread__started = 1;
338 	}
339 
340 	if (attr == NULL)
341 		nattr = pthread_default_attr;
342 	else if (attr->pta_magic == PT_ATTR_MAGIC)
343 		nattr = *attr;
344 	else
345 		return EINVAL;
346 
347 	/* Fetch misc. attributes from the attr structure. */
348 	name = NULL;
349 	if ((p = nattr.pta_private) != NULL)
350 		if (p->ptap_name[0] != '\0')
351 			if ((name = strdup(p->ptap_name)) == NULL)
352 				return ENOMEM;
353 
354 	newthread = NULL;
355 
356 	/*
357 	 * Try to reclaim a dead thread.
358 	 */
359 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
360 		pthread_mutex_lock(&pthread__deadqueue_lock);
361 		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
362 			/* Still running? */
363 			if (newthread->pt_lwpctl->lc_curcpu ==
364 			    LWPCTL_CPU_EXITED ||
365 			    (_lwp_kill(newthread->pt_lid, 0) == -1 &&
366 			    errno == ESRCH))
367 				break;
368 		}
369 		if (newthread)
370 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
371 		pthread_mutex_unlock(&pthread__deadqueue_lock);
372 	}
373 
374 	/*
375 	 * If necessary set up a stack, allocate space for a pthread_st,
376 	 * and initialize it.
377 	 */
378 	if (newthread == NULL) {
379 		ret = pthread__stackalloc(&newthread);
380 		if (ret != 0) {
381 			if (name)
382 				free(name);
383 			return ret;
384 		}
385 
386 		/* This is used only when creating the thread. */
387 		_INITCONTEXT_U(&newthread->pt_uc);
388 #ifdef PTHREAD_MACHINE_HAS_ID_REGISTER
389 		pthread__uc_id(&newthread->pt_uc) = newthread;
390 #endif
391 		newthread->pt_uc.uc_stack = newthread->pt_stack;
392 		newthread->pt_uc.uc_link = NULL;
393 
394 		/* Add to list of all threads. */
395 		pthread_rwlock_wrlock(&pthread__alltree_lock);
396 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
397 		RB_INSERT(__pthread__alltree, &pthread__alltree, newthread);
398 		pthread_rwlock_unlock(&pthread__alltree_lock);
399 
400 		/* Will be reset by the thread upon exit. */
401 		pthread__initthread(newthread);
402 	}
403 
404 	/*
405 	 * Create the new LWP.
406 	 */
407 	pthread__scrubthread(newthread, name, nattr.pta_flags);
408 	newthread->pt_func = startfunc;
409 	newthread->pt_arg = arg;
410 	_lwp_makecontext(&newthread->pt_uc, pthread__create_tramp,
411 	    newthread, newthread, newthread->pt_stack.ss_sp,
412 	    newthread->pt_stack.ss_size);
413 
414 	flag = LWP_DETACHED;
415 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
416 	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
417 		flag |= LWP_SUSPENDED;
418 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
419 	if (ret != 0) {
420 		pthread_mutex_lock(&newthread->pt_lock);
421 		/* Will unlock and free name. */
422 		pthread__reap(newthread);
423 		return ret;
424 	}
425 
426 	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
427 		if (p != NULL) {
428 			(void)pthread_setschedparam(newthread, p->ptap_policy,
429 			    &p->ptap_sp);
430 		}
431 		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
432 			(void)_lwp_continue(newthread->pt_lid);
433 		}
434 	}
435 
436 	*thread = newthread;
437 
438 	return 0;
439 }
440 
441 
442 static void
443 pthread__create_tramp(void *cookie)
444 {
445 	pthread_t self;
446 	void *retval;
447 
448 	self = cookie;
449 
450 	/*
451 	 * Throw away some stack in a feeble attempt to reduce cache
452 	 * thrash.  May help for SMT processors.  XXX We should not
453 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
454 	 * thread register or decent thread local storage.
455 	 *
456 	 * Note that we may race with the kernel in _lwp_create(),
457 	 * and so pt_lid can be unset at this point, but we don't
458 	 * care.
459 	 */
460 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
461 
462 	if (self->pt_name != NULL) {
463 		pthread_mutex_lock(&self->pt_lock);
464 		if (self->pt_name != NULL)
465 			(void)_lwp_setname(0, self->pt_name);
466 		pthread_mutex_unlock(&self->pt_lock);
467 	}
468 
469 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
470 		err(1, "_lwp_ctl");
471 	}
472 
473 	retval = (*self->pt_func)(self->pt_arg);
474 
475 	pthread_exit(retval);
476 
477 	/*NOTREACHED*/
478 	pthread__abort();
479 }
480 
481 int
482 pthread_suspend_np(pthread_t thread)
483 {
484 	pthread_t self;
485 
486 	self = pthread__self();
487 	if (self == thread) {
488 		return EDEADLK;
489 	}
490 	if (pthread__find(thread) != 0)
491 		return ESRCH;
492 	if (_lwp_suspend(thread->pt_lid) == 0)
493 		return 0;
494 	return errno;
495 }
496 
497 int
498 pthread_resume_np(pthread_t thread)
499 {
500 
501 	if (pthread__find(thread) != 0)
502 		return ESRCH;
503 	if (_lwp_continue(thread->pt_lid) == 0)
504 		return 0;
505 	return errno;
506 }
507 
508 void
509 pthread_exit(void *retval)
510 {
511 	pthread_t self;
512 	struct pt_clean_t *cleanup;
513 	char *name;
514 
515 	self = pthread__self();
516 
517 	/* Disable cancellability. */
518 	pthread_mutex_lock(&self->pt_lock);
519 	self->pt_flags |= PT_FLAG_CS_DISABLED;
520 	self->pt_cancel = 0;
521 
522 	/* Call any cancellation cleanup handlers */
523 	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
524 		pthread_mutex_unlock(&self->pt_lock);
525 		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
526 			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
527 			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
528 			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
529 		}
530 		pthread_mutex_lock(&self->pt_lock);
531 	}
532 
533 	/* Perform cleanup of thread-specific data */
534 	pthread__destroy_tsd(self);
535 
536 	/* Signal our exit. */
537 	self->pt_exitval = retval;
538 	if (self->pt_flags & PT_FLAG_DETACHED) {
539 		self->pt_state = PT_STATE_DEAD;
540 		name = self->pt_name;
541 		self->pt_name = NULL;
542 		pthread_mutex_unlock(&self->pt_lock);
543 		if (name != NULL)
544 			free(name);
545 		pthread_mutex_lock(&pthread__deadqueue_lock);
546 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
547 		pthread_mutex_unlock(&pthread__deadqueue_lock);
548 		_lwp_exit();
549 	} else {
550 		self->pt_state = PT_STATE_ZOMBIE;
551 		pthread_cond_broadcast(&self->pt_joiners);
552 		pthread_mutex_unlock(&self->pt_lock);
553 		/* Note: name will be freed by the joiner. */
554 		_lwp_exit();
555 	}
556 
557 	/*NOTREACHED*/
558 	pthread__abort();
559 	exit(1);
560 }
561 
562 
563 int
564 pthread_join(pthread_t thread, void **valptr)
565 {
566 	pthread_t self;
567 	int error;
568 
569 	self = pthread__self();
570 
571 	if (pthread__find(thread) != 0)
572 		return ESRCH;
573 
574 	if (thread->pt_magic != PT_MAGIC)
575 		return EINVAL;
576 
577 	if (thread == self)
578 		return EDEADLK;
579 
580 	self->pt_droplock = &thread->pt_lock;
581 	pthread_mutex_lock(&thread->pt_lock);
582 	for (;;) {
583 		if (thread->pt_state == PT_STATE_ZOMBIE)
584 			break;
585 		if (thread->pt_state == PT_STATE_DEAD) {
586 			pthread_mutex_unlock(&thread->pt_lock);
587 			self->pt_droplock = NULL;
588 			return ESRCH;
589 		}
590 		if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
591 			pthread_mutex_unlock(&thread->pt_lock);
592 			self->pt_droplock = NULL;
593 			return EINVAL;
594 		}
595 		error = pthread_cond_wait(&thread->pt_joiners,
596 		    &thread->pt_lock);
597 		if (error != 0) {
598 			pthread__errorfunc(__FILE__, __LINE__,
599 			    __func__, "unexpected return from cond_wait()");
600 		}
601 
602 	}
603 	pthread__testcancel(self);
604 	if (valptr != NULL)
605 		*valptr = thread->pt_exitval;
606 	/* pthread__reap() will drop the lock. */
607 	pthread__reap(thread);
608 	self->pt_droplock = NULL;
609 
610 	return 0;
611 }
612 
613 static void
614 pthread__reap(pthread_t thread)
615 {
616 	char *name;
617 
618 	name = thread->pt_name;
619 	thread->pt_name = NULL;
620 	thread->pt_state = PT_STATE_DEAD;
621 	pthread_mutex_unlock(&thread->pt_lock);
622 
623 	pthread_mutex_lock(&pthread__deadqueue_lock);
624 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
625 	pthread_mutex_unlock(&pthread__deadqueue_lock);
626 
627 	if (name != NULL)
628 		free(name);
629 }
630 
631 int
632 pthread_equal(pthread_t t1, pthread_t t2)
633 {
634 
635 	/* Nothing special here. */
636 	return (t1 == t2);
637 }
638 
639 
640 int
641 pthread_detach(pthread_t thread)
642 {
643 
644 	if (pthread__find(thread) != 0)
645 		return ESRCH;
646 
647 	if (thread->pt_magic != PT_MAGIC)
648 		return EINVAL;
649 
650 	pthread_mutex_lock(&thread->pt_lock);
651 	thread->pt_flags |= PT_FLAG_DETACHED;
652 	if (thread->pt_state == PT_STATE_ZOMBIE) {
653 		/* pthread__reap() will drop the lock. */
654 		pthread__reap(thread);
655 	} else {
656 		/*
657 		 * Not valid for threads to be waiting in
658 		 * pthread_join() (there are intractable
659 		 * sync issues from the application
660 		 * perspective), but give those threads
661 		 * a chance anyway.
662 		 */
663 		pthread_cond_broadcast(&thread->pt_joiners);
664 		pthread_mutex_unlock(&thread->pt_lock);
665 	}
666 
667 	return 0;
668 }
669 
670 
671 int
672 pthread_getname_np(pthread_t thread, char *name, size_t len)
673 {
674 
675 	if (pthread__find(thread) != 0)
676 		return ESRCH;
677 
678 	if (thread->pt_magic != PT_MAGIC)
679 		return EINVAL;
680 
681 	pthread_mutex_lock(&thread->pt_lock);
682 	if (thread->pt_name == NULL)
683 		name[0] = '\0';
684 	else
685 		strlcpy(name, thread->pt_name, len);
686 	pthread_mutex_unlock(&thread->pt_lock);
687 
688 	return 0;
689 }
690 
691 
692 int
693 pthread_setname_np(pthread_t thread, const char *name, void *arg)
694 {
695 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
696 	int namelen;
697 
698 	if (pthread__find(thread) != 0)
699 		return ESRCH;
700 
701 	if (thread->pt_magic != PT_MAGIC)
702 		return EINVAL;
703 
704 	namelen = snprintf(newname, sizeof(newname), name, arg);
705 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
706 		return EINVAL;
707 
708 	cp = strdup(newname);
709 	if (cp == NULL)
710 		return ENOMEM;
711 
712 	pthread_mutex_lock(&thread->pt_lock);
713 	oldname = thread->pt_name;
714 	thread->pt_name = cp;
715 	(void)_lwp_setname(thread->pt_lid, cp);
716 	pthread_mutex_unlock(&thread->pt_lock);
717 
718 	if (oldname != NULL)
719 		free(oldname);
720 
721 	return 0;
722 }
723 
724 
725 
726 /*
727  * XXX There should be a way for applications to use the efficent
728  *  inline version, but there are opacity/namespace issues.
729  */
730 pthread_t
731 pthread_self(void)
732 {
733 
734 	return pthread__self();
735 }
736 
737 
738 int
739 pthread_cancel(pthread_t thread)
740 {
741 
742 	if (pthread__find(thread) != 0)
743 		return ESRCH;
744 	pthread_mutex_lock(&thread->pt_lock);
745 	thread->pt_flags |= PT_FLAG_CS_PENDING;
746 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
747 		thread->pt_cancel = 1;
748 		pthread_mutex_unlock(&thread->pt_lock);
749 		_lwp_wakeup(thread->pt_lid);
750 	} else
751 		pthread_mutex_unlock(&thread->pt_lock);
752 
753 	return 0;
754 }
755 
756 
757 int
758 pthread_setcancelstate(int state, int *oldstate)
759 {
760 	pthread_t self;
761 	int retval;
762 
763 	self = pthread__self();
764 	retval = 0;
765 
766 	pthread_mutex_lock(&self->pt_lock);
767 
768 	if (oldstate != NULL) {
769 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
770 			*oldstate = PTHREAD_CANCEL_DISABLE;
771 		else
772 			*oldstate = PTHREAD_CANCEL_ENABLE;
773 	}
774 
775 	if (state == PTHREAD_CANCEL_DISABLE) {
776 		self->pt_flags |= PT_FLAG_CS_DISABLED;
777 		if (self->pt_cancel) {
778 			self->pt_flags |= PT_FLAG_CS_PENDING;
779 			self->pt_cancel = 0;
780 		}
781 	} else if (state == PTHREAD_CANCEL_ENABLE) {
782 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
783 		/*
784 		 * If a cancellation was requested while cancellation
785 		 * was disabled, note that fact for future
786 		 * cancellation tests.
787 		 */
788 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
789 			self->pt_cancel = 1;
790 			/* This is not a deferred cancellation point. */
791 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
792 				pthread_mutex_unlock(&self->pt_lock);
793 				pthread__cancelled();
794 			}
795 		}
796 	} else
797 		retval = EINVAL;
798 
799 	pthread_mutex_unlock(&self->pt_lock);
800 
801 	return retval;
802 }
803 
804 
805 int
806 pthread_setcanceltype(int type, int *oldtype)
807 {
808 	pthread_t self;
809 	int retval;
810 
811 	self = pthread__self();
812 	retval = 0;
813 
814 	pthread_mutex_lock(&self->pt_lock);
815 
816 	if (oldtype != NULL) {
817 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
818 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
819 		else
820 			*oldtype = PTHREAD_CANCEL_DEFERRED;
821 	}
822 
823 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
824 		self->pt_flags |= PT_FLAG_CS_ASYNC;
825 		if (self->pt_cancel) {
826 			pthread_mutex_unlock(&self->pt_lock);
827 			pthread__cancelled();
828 		}
829 	} else if (type == PTHREAD_CANCEL_DEFERRED)
830 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
831 	else
832 		retval = EINVAL;
833 
834 	pthread_mutex_unlock(&self->pt_lock);
835 
836 	return retval;
837 }
838 
839 
840 void
841 pthread_testcancel(void)
842 {
843 	pthread_t self;
844 
845 	self = pthread__self();
846 	if (self->pt_cancel)
847 		pthread__cancelled();
848 }
849 
850 
851 /*
852  * POSIX requires that certain functions return an error rather than
853  * invoking undefined behavior even when handed completely bogus
854  * pthread_t values, e.g. stack garbage or (pthread_t)666. This
855  * utility routine searches the list of threads for the pthread_t
856  * value without dereferencing it.
857  */
858 int
859 pthread__find(pthread_t id)
860 {
861 	pthread_t target;
862 
863 	pthread_rwlock_rdlock(&pthread__alltree_lock);
864 	/* LINTED */
865 	target = RB_FIND(__pthread__alltree, &pthread__alltree, id);
866 	pthread_rwlock_unlock(&pthread__alltree_lock);
867 
868 	if (target == NULL || target->pt_state == PT_STATE_DEAD)
869 		return ESRCH;
870 
871 	return 0;
872 }
873 
874 
875 void
876 pthread__testcancel(pthread_t self)
877 {
878 
879 	if (self->pt_cancel)
880 		pthread__cancelled();
881 }
882 
883 
884 void
885 pthread__cancelled(void)
886 {
887 	pthread_mutex_t *droplock;
888 	pthread_t self;
889 
890 	self = pthread__self();
891 	droplock = self->pt_droplock;
892 	self->pt_droplock = NULL;
893 
894 	if (droplock != NULL && pthread_mutex_held_np(droplock))
895 		pthread_mutex_unlock(droplock);
896 
897 	pthread_exit(PTHREAD_CANCELED);
898 }
899 
900 
901 void
902 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
903 {
904 	pthread_t self;
905 	struct pt_clean_t *entry;
906 
907 	self = pthread__self();
908 	entry = store;
909 	entry->ptc_cleanup = cleanup;
910 	entry->ptc_arg = arg;
911 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
912 }
913 
914 
915 void
916 pthread__cleanup_pop(int ex, void *store)
917 {
918 	pthread_t self;
919 	struct pt_clean_t *entry;
920 
921 	self = pthread__self();
922 	entry = store;
923 
924 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
925 	if (ex)
926 		(*entry->ptc_cleanup)(entry->ptc_arg);
927 }
928 
929 
930 int *
931 pthread__errno(void)
932 {
933 	pthread_t self;
934 
935 	self = pthread__self();
936 
937 	return &(self->pt_errno);
938 }
939 
940 ssize_t	_sys_write(int, const void *, size_t);
941 
942 void
943 pthread__assertfunc(const char *file, int line, const char *function,
944 		    const char *expr)
945 {
946 	char buf[1024];
947 	int len;
948 
949 	/*
950 	 * snprintf should not acquire any locks, or we could
951 	 * end up deadlocked if the assert caller held locks.
952 	 */
953 	len = snprintf(buf, 1024,
954 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
955 	    expr, file, line,
956 	    function ? ", function \"" : "",
957 	    function ? function : "",
958 	    function ? "\"" : "");
959 
960 	_sys_write(STDERR_FILENO, buf, (size_t)len);
961 	(void)kill(getpid(), SIGABRT);
962 
963 	_exit(1);
964 }
965 
966 
967 void
968 pthread__errorfunc(const char *file, int line, const char *function,
969 		   const char *msg)
970 {
971 	char buf[1024];
972 	size_t len;
973 
974 	if (pthread__diagassert == 0)
975 		return;
976 
977 	/*
978 	 * snprintf should not acquire any locks, or we could
979 	 * end up deadlocked if the assert caller held locks.
980 	 */
981 	len = snprintf(buf, 1024,
982 	    "%s: Error detected by libpthread: %s.\n"
983 	    "Detected by file \"%s\", line %d%s%s%s.\n"
984 	    "See pthread(3) for information.\n",
985 	    getprogname(), msg, file, line,
986 	    function ? ", function \"" : "",
987 	    function ? function : "",
988 	    function ? "\"" : "");
989 
990 	if (pthread__diagassert & DIAGASSERT_STDERR)
991 		_sys_write(STDERR_FILENO, buf, len);
992 
993 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
994 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
995 
996 	if (pthread__diagassert & DIAGASSERT_ABORT) {
997 		(void)kill(getpid(), SIGABRT);
998 		_exit(1);
999 	}
1000 }
1001 
1002 /*
1003  * Thread park/unpark operations.  The kernel operations are
1004  * modelled after a brief description from "Multithreading in
1005  * the Solaris Operating Environment":
1006  *
1007  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1008  */
1009 
1010 #define	OOPS(msg)			\
1011     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
1012 
1013 int
1014 pthread__park(pthread_t self, pthread_mutex_t *lock,
1015 	      pthread_queue_t *queue, const struct timespec *abstime,
1016 	      int cancelpt, const void *hint)
1017 {
1018 	int rv, error;
1019 	void *obj;
1020 
1021 	/*
1022 	 * For non-interlocked release of mutexes we need a store
1023 	 * barrier before incrementing pt_blocking away from zero.
1024 	 * This is provided by pthread_mutex_unlock().
1025 	 */
1026 	self->pt_willpark = 1;
1027 	pthread_mutex_unlock(lock);
1028 	self->pt_willpark = 0;
1029 	self->pt_blocking++;
1030 
1031 	/*
1032 	 * Wait until we are awoken by a pending unpark operation,
1033 	 * a signal, an unpark posted after we have gone asleep,
1034 	 * or an expired timeout.
1035 	 *
1036 	 * It is fine to test the value of pt_sleepobj without
1037 	 * holding any locks, because:
1038 	 *
1039 	 * o Only the blocking thread (this thread) ever sets them
1040 	 *   to a non-NULL value.
1041 	 *
1042 	 * o Other threads may set them NULL, but if they do so they
1043 	 *   must also make this thread return from _lwp_park.
1044 	 *
1045 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1046 	 *   calls and all make use of spinlocks in the kernel.  So
1047 	 *   these system calls act as full memory barriers, and will
1048 	 *   ensure that the calling CPU's store buffers are drained.
1049 	 *   In combination with the spinlock release before unpark,
1050 	 *   this means that modification of pt_sleepobj/onq by another
1051 	 *   thread will become globally visible before that thread
1052 	 *   schedules an unpark operation on this thread.
1053 	 *
1054 	 * Note: the test in the while() statement dodges the park op if
1055 	 * we have already been awoken, unless there is another thread to
1056 	 * awaken.  This saves a syscall - if we were already awakened,
1057 	 * the next call to _lwp_park() would need to return early in order
1058 	 * to eat the previous wakeup.
1059 	 */
1060 	rv = 0;
1061 	do {
1062 		/*
1063 		 * If we deferred unparking a thread, arrange to
1064 		 * have _lwp_park() restart it before blocking.
1065 		 */
1066 		error = _lwp_park(abstime, self->pt_unpark, hint, hint);
1067 		self->pt_unpark = 0;
1068 		if (error != 0) {
1069 			switch (rv = errno) {
1070 			case EINTR:
1071 			case EALREADY:
1072 				rv = 0;
1073 				break;
1074 			case ETIMEDOUT:
1075 				break;
1076 			default:
1077 				OOPS("_lwp_park failed");
1078 				break;
1079 			}
1080 		}
1081 		/* Check for cancellation. */
1082 		if (cancelpt && self->pt_cancel)
1083 			rv = EINTR;
1084 	} while (self->pt_sleepobj != NULL && rv == 0);
1085 
1086 	/*
1087 	 * If we have been awoken early but are still on the queue,
1088 	 * then remove ourself.  Again, it's safe to do the test
1089 	 * without holding any locks.
1090 	 */
1091 	if (__predict_false(self->pt_sleepobj != NULL)) {
1092 		pthread_mutex_lock(lock);
1093 		if ((obj = self->pt_sleepobj) != NULL) {
1094 			PTQ_REMOVE(queue, self, pt_sleep);
1095 			self->pt_sleepobj = NULL;
1096 			if (obj != NULL && self->pt_early != NULL)
1097 				(*self->pt_early)(obj);
1098 		}
1099 		pthread_mutex_unlock(lock);
1100 	}
1101 	self->pt_early = NULL;
1102 	self->pt_blocking--;
1103 	membar_sync();
1104 
1105 	return rv;
1106 }
1107 
1108 void
1109 pthread__unpark(pthread_queue_t *queue, pthread_t self,
1110 		pthread_mutex_t *interlock)
1111 {
1112 	pthread_t target;
1113 	u_int max;
1114 	size_t nwaiters;
1115 
1116 	max = pthread__unpark_max;
1117 	nwaiters = self->pt_nwaiters;
1118 	target = PTQ_FIRST(queue);
1119 	if (nwaiters == max) {
1120 		/* Overflow. */
1121 		(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1122 		    __UNVOLATILE(&interlock->ptm_waiters));
1123 		nwaiters = 0;
1124 	}
1125 	target->pt_sleepobj = NULL;
1126 	self->pt_waiters[nwaiters++] = target->pt_lid;
1127 	PTQ_REMOVE(queue, target, pt_sleep);
1128 	self->pt_nwaiters = nwaiters;
1129 	pthread__mutex_deferwake(self, interlock);
1130 }
1131 
1132 void
1133 pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1134 		    pthread_mutex_t *interlock)
1135 {
1136 	pthread_t target;
1137 	u_int max;
1138 	size_t nwaiters;
1139 
1140 	max = pthread__unpark_max;
1141 	nwaiters = self->pt_nwaiters;
1142 	PTQ_FOREACH(target, queue, pt_sleep) {
1143 		if (nwaiters == max) {
1144 			/* Overflow. */
1145 			(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1146 			    __UNVOLATILE(&interlock->ptm_waiters));
1147 			nwaiters = 0;
1148 		}
1149 		target->pt_sleepobj = NULL;
1150 		self->pt_waiters[nwaiters++] = target->pt_lid;
1151 	}
1152 	self->pt_nwaiters = nwaiters;
1153 	PTQ_INIT(queue);
1154 	pthread__mutex_deferwake(self, interlock);
1155 }
1156 
1157 #undef	OOPS
1158 
1159 /*
1160  * Allocate a stack for a thread, and set it up. It needs to be aligned, so
1161  * that a thread can find itself by its stack pointer.
1162  */
1163 static int
1164 pthread__stackalloc(pthread_t *newt)
1165 {
1166 	void *addr;
1167 
1168 	addr = mmap(NULL, pthread__stacksize, PROT_READ|PROT_WRITE,
1169 	    MAP_ANON|MAP_PRIVATE | MAP_ALIGNED(pthread__stacksize_lg),
1170 	    -1, (off_t)0);
1171 
1172 	if (addr == MAP_FAILED)
1173 		return ENOMEM;
1174 
1175 	pthread__assert(((intptr_t)addr & pthread__stackmask) == 0);
1176 
1177 	return pthread__stackid_setup(addr, pthread__stacksize, newt);
1178 }
1179 
1180 
1181 /*
1182  * Set up the slightly special stack for the "initial" thread, which
1183  * runs on the normal system stack, and thus gets slightly different
1184  * treatment.
1185  */
1186 static void
1187 pthread__initmain(pthread_t *newt)
1188 {
1189 	struct rlimit slimit;
1190 	size_t pagesize;
1191 	pthread_t t;
1192 	void *base;
1193 	size_t size;
1194 	int error, ret;
1195 	char *value;
1196 
1197 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1198 	pthread__stacksize = 0;
1199 	ret = getrlimit(RLIMIT_STACK, &slimit);
1200 	if (ret == -1)
1201 		err(1, "Couldn't get stack resource consumption limits");
1202 
1203 	value = pthread__getenv("PTHREAD_STACKSIZE");
1204 	if (value != NULL) {
1205 		pthread__stacksize = atoi(value) * 1024;
1206 		if (pthread__stacksize > slimit.rlim_cur)
1207 			pthread__stacksize = (size_t)slimit.rlim_cur;
1208 	}
1209 	if (pthread__stacksize == 0)
1210 		pthread__stacksize = (size_t)slimit.rlim_cur;
1211 	if (pthread__stacksize < 4 * pagesize)
1212 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1213 		    4 * pagesize / 1024);
1214 
1215 	pthread__stacksize_lg = -1;
1216 	while (pthread__stacksize) {
1217 		pthread__stacksize >>= 1;
1218 		pthread__stacksize_lg++;
1219 	}
1220 
1221 	pthread__stacksize = (1 << pthread__stacksize_lg);
1222 	pthread__stackmask = pthread__stacksize - 1;
1223 	pthread__threadmask = ~pthread__stackmask;
1224 
1225 	base = (void *)(pthread__sp() & pthread__threadmask);
1226 	if ((pthread__sp() - (uintptr_t)base) < 4 * pagesize) {
1227 		pthread__mainbase = (vaddr_t)base;
1228 		base = STACK_GROW(base, pthread__stacksize);
1229 		pthread__mainstruct = (vaddr_t)base;
1230 		if (mprotect(base, pthread__stacksize,
1231 		    PROT_READ|PROT_WRITE) == -1)
1232 			err(1, "mprotect stack");
1233 	}
1234 	size = pthread__stacksize;
1235 
1236 	error = pthread__stackid_setup(base, size, &t);
1237 	if (error) {
1238 		/* XXX */
1239 		errx(2, "failed to setup main thread: error=%d", error);
1240 	}
1241 
1242 	*newt = t;
1243 
1244 	/* Set up identity register. */
1245 	(void)_lwp_setprivate(t);
1246 }
1247 
1248 static int
1249 /*ARGSUSED*/
1250 pthread__stackid_setup(void *base, size_t size, pthread_t *tp)
1251 {
1252 	pthread_t t;
1253 	void *redaddr;
1254 	size_t pagesize;
1255 	int ret;
1256 
1257 	t = base;
1258 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1259 
1260 	/*
1261 	 * Put a pointer to the pthread in the bottom (but
1262          * redzone-protected section) of the stack.
1263 	 */
1264 	redaddr = STACK_SHRINK(STACK_MAX(base, size), pagesize);
1265 	t->pt_stack.ss_size = size - 2 * pagesize;
1266 #ifdef __MACHINE_STACK_GROWS_UP
1267 	t->pt_stack.ss_sp = (char *)(void *)base + pagesize;
1268 #else
1269 	t->pt_stack.ss_sp = (char *)(void *)base + 2 * pagesize;
1270 #endif
1271 	/* Protect the next-to-bottom stack page as a red zone. */
1272 	ret = mprotect(redaddr, pagesize, PROT_NONE);
1273 	if (ret == -1) {
1274 		return errno;
1275 	}
1276 	*tp = t;
1277 	return 0;
1278 }
1279 
1280 #ifndef lint
1281 static int
1282 pthread__cmp(struct __pthread_st *a, struct __pthread_st *b)
1283 {
1284 
1285 	if ((uintptr_t)a < (uintptr_t)b)
1286 		return (-1);
1287 	else if (a == b)
1288 		return 0;
1289 	else
1290 		return 1;
1291 }
1292 RB_GENERATE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
1293 #endif
1294 
1295 /* Because getenv() wants to use locks. */
1296 char *
1297 pthread__getenv(const char *name)
1298 {
1299 	extern char **environ;
1300 	size_t l_name, offset;
1301 
1302 	l_name = strlen(name);
1303 	for (offset = 0; environ[offset] != NULL; offset++) {
1304 		if (strncmp(name, environ[offset], l_name) == 0 &&
1305 		    environ[offset][l_name] == '=') {
1306 			return environ[offset] + l_name + 1;
1307 		}
1308 	}
1309 
1310 	return NULL;
1311 }
1312 
1313 pthread_mutex_t *
1314 pthread__hashlock(volatile const void *p)
1315 {
1316 	uintptr_t v;
1317 
1318 	v = (uintptr_t)p;
1319 	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1320 }
1321 
1322 int
1323 pthread__checkpri(int pri)
1324 {
1325 	static int havepri;
1326 	static long min, max;
1327 
1328 	if (!havepri) {
1329 		min = sysconf(_SC_SCHED_PRI_MIN);
1330 		max = sysconf(_SC_SCHED_PRI_MAX);
1331 		havepri = 1;
1332 	}
1333 	return (pri < min || pri > max) ? EINVAL : 0;
1334 }
1335