xref: /netbsd-src/lib/libpthread/pthread.c (revision 9aa0541bdf64142d9a27c2cf274394d60182818f)
1 /*	$NetBSD: pthread.c,v 1.125 2011/10/02 18:18:56 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: pthread.c,v 1.125 2011/10/02 18:18:56 christos Exp $");
34 
35 #define	__EXPOSE_STACK	1
36 
37 #include <sys/param.h>
38 #include <sys/mman.h>
39 #include <sys/sysctl.h>
40 #include <sys/lwpctl.h>
41 #include <sys/tls.h>
42 
43 #include <err.h>
44 #include <errno.h>
45 #include <lwp.h>
46 #include <signal.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <syslog.h>
51 #include <ucontext.h>
52 #include <unistd.h>
53 #include <sched.h>
54 
55 #include "pthread.h"
56 #include "pthread_int.h"
57 
58 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
59 RB_HEAD(__pthread__alltree, __pthread_st) pthread__alltree;
60 
61 #ifndef lint
62 static int	pthread__cmp(struct __pthread_st *, struct __pthread_st *);
63 RB_PROTOTYPE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
64 #endif
65 
66 static void	pthread__create_tramp(void *);
67 static void	pthread__initthread(pthread_t);
68 static void	pthread__scrubthread(pthread_t, char *, int);
69 static int	pthread__stackid_setup(void *, size_t, pthread_t *);
70 static int	pthread__stackalloc(pthread_t *);
71 static void	pthread__initmain(pthread_t *);
72 static void	pthread__fork_callback(void);
73 static void	pthread__reap(pthread_t);
74 static void	pthread__child_callback(void);
75 static void	pthread__start(void);
76 
77 void	pthread__init(void);
78 
79 int pthread__started;
80 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
81 pthread_queue_t pthread__deadqueue;
82 pthread_queue_t pthread__allqueue;
83 
84 static pthread_attr_t pthread_default_attr;
85 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
86 static pthread_t pthread__first;
87 
88 enum {
89 	DIAGASSERT_ABORT =	1<<0,
90 	DIAGASSERT_STDERR =	1<<1,
91 	DIAGASSERT_SYSLOG =	1<<2
92 };
93 
94 static int pthread__diagassert;
95 
96 int pthread__concurrency;
97 int pthread__nspins;
98 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
99 int pthread__dbg;	/* set by libpthread_dbg if active */
100 
101 /*
102  * We have to initialize the pthread_stack* variables here because
103  * mutexes are used before pthread_init() and thus pthread__initmain()
104  * are called.  Since mutexes only save the stack pointer and not a
105  * pointer to the thread data, it is safe to change the mapping from
106  * stack pointer to thread data afterwards.
107  */
108 #define	_STACKSIZE_LG 18
109 int	pthread__stacksize_lg = _STACKSIZE_LG;
110 size_t	pthread__stacksize = 1 << _STACKSIZE_LG;
111 vaddr_t	pthread__stackmask = (1 << _STACKSIZE_LG) - 1;
112 vaddr_t pthread__threadmask = (vaddr_t)~((1 << _STACKSIZE_LG) - 1);
113 vaddr_t	pthread__mainbase = 0;
114 vaddr_t	pthread__mainstruct = 0;
115 #undef	_STACKSIZE_LG
116 
117 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
118 
119 __strong_alias(__libc_thr_self,pthread_self)
120 __strong_alias(__libc_thr_create,pthread_create)
121 __strong_alias(__libc_thr_exit,pthread_exit)
122 __strong_alias(__libc_thr_errno,pthread__errno)
123 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
124 __strong_alias(__libc_thr_equal,pthread_equal)
125 __strong_alias(__libc_thr_init,pthread__init)
126 
127 /*
128  * Static library kludge.  Place a reference to a symbol any library
129  * file which does not already have a reference here.
130  */
131 extern int pthread__cancel_stub_binder;
132 
133 void *pthread__static_lib_binder[] = {
134 	&pthread__cancel_stub_binder,
135 	pthread_cond_init,
136 	pthread_mutex_init,
137 	pthread_rwlock_init,
138 	pthread_barrier_init,
139 	pthread_key_create,
140 	pthread_setspecific,
141 };
142 
143 #define	NHASHLOCK	64
144 
145 static union hashlock {
146 	pthread_mutex_t	mutex;
147 	char		pad[64];
148 } hashlocks[NHASHLOCK] __aligned(64);
149 
150 /*
151  * This needs to be started by the library loading code, before main()
152  * gets to run, for various things that use the state of the initial thread
153  * to work properly (thread-specific data is an application-visible example;
154  * spinlock counts for mutexes is an internal example).
155  */
156 void
157 pthread__init(void)
158 {
159 	pthread_t first;
160 	char *p;
161 	int i, mib[2];
162 	size_t len;
163 	extern int __isthreaded;
164 
165 	mib[0] = CTL_HW;
166 	mib[1] = HW_NCPU;
167 
168 	len = sizeof(pthread__concurrency);
169 	if (sysctl(mib, 2, &pthread__concurrency, &len, NULL, 0) == -1)
170 		err(1, "sysctl(hw.ncpu");
171 
172 	mib[0] = CTL_KERN;
173 	mib[1] = KERN_OSREV;
174 
175 	/* Initialize locks first; they're needed elsewhere. */
176 	pthread__lockprim_init();
177 	for (i = 0; i < NHASHLOCK; i++) {
178 		pthread_mutex_init(&hashlocks[i].mutex, NULL);
179 	}
180 
181 	/* Fetch parameters. */
182 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
183 	if (i == -1)
184 		err(1, "_lwp_unpark_all");
185 	if (i < pthread__unpark_max)
186 		pthread__unpark_max = i;
187 
188 	/* Basic data structure setup */
189 	pthread_attr_init(&pthread_default_attr);
190 	PTQ_INIT(&pthread__allqueue);
191 	PTQ_INIT(&pthread__deadqueue);
192 	RB_INIT(&pthread__alltree);
193 
194 	/* Create the thread structure corresponding to main() */
195 	pthread__initmain(&first);
196 	pthread__initthread(first);
197 	pthread__scrubthread(first, NULL, 0);
198 
199 	first->pt_lid = _lwp_self();
200 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
201 	RB_INSERT(__pthread__alltree, &pthread__alltree, first);
202 
203 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
204 		err(1, "_lwp_ctl");
205 	}
206 
207 	/* Start subsystems */
208 	PTHREAD_MD_INIT
209 
210 	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
211 		switch (*p) {
212 		case 'a':
213 			pthread__diagassert |= DIAGASSERT_ABORT;
214 			break;
215 		case 'A':
216 			pthread__diagassert &= ~DIAGASSERT_ABORT;
217 			break;
218 		case 'e':
219 			pthread__diagassert |= DIAGASSERT_STDERR;
220 			break;
221 		case 'E':
222 			pthread__diagassert &= ~DIAGASSERT_STDERR;
223 			break;
224 		case 'l':
225 			pthread__diagassert |= DIAGASSERT_SYSLOG;
226 			break;
227 		case 'L':
228 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
229 			break;
230 		}
231 	}
232 
233 	/* Tell libc that we're here and it should role-play accordingly. */
234 	pthread__first = first;
235 	pthread_atfork(NULL, NULL, pthread__fork_callback);
236 	__isthreaded = 1;
237 }
238 
239 static void
240 pthread__fork_callback(void)
241 {
242 	struct __pthread_st *self;
243 
244 	/* lwpctl state is not copied across fork. */
245 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &pthread__first->pt_lwpctl)) {
246 		err(1, "_lwp_ctl");
247 	}
248 	self = pthread__self();
249 	self->pt_lid = _lwp_self();
250 }
251 
252 static void
253 pthread__child_callback(void)
254 {
255 
256 	/*
257 	 * Clean up data structures that a forked child process might
258 	 * trip over. Note that if threads have been created (causing
259 	 * this handler to be registered) the standards say that the
260 	 * child will trigger undefined behavior if it makes any
261 	 * pthread_* calls (or any other calls that aren't
262 	 * async-signal-safe), so we don't really have to clean up
263 	 * much. Anything that permits some pthread_* calls to work is
264 	 * merely being polite.
265 	 */
266 	pthread__started = 0;
267 }
268 
269 static void
270 pthread__start(void)
271 {
272 
273 	/*
274 	 * Per-process timers are cleared by fork(); despite the
275 	 * various restrictions on fork() and threads, it's legal to
276 	 * fork() before creating any threads.
277 	 */
278 	pthread_atfork(NULL, NULL, pthread__child_callback);
279 }
280 
281 
282 /* General-purpose thread data structure sanitization. */
283 /* ARGSUSED */
284 static void
285 pthread__initthread(pthread_t t)
286 {
287 
288 	t->pt_self = t;
289 	t->pt_magic = PT_MAGIC;
290 	t->pt_willpark = 0;
291 	t->pt_unpark = 0;
292 	t->pt_nwaiters = 0;
293 	t->pt_sleepobj = NULL;
294 	t->pt_signalled = 0;
295 	t->pt_havespecific = 0;
296 	t->pt_early = NULL;
297 	t->pt_lwpctl = &pthread__dummy_lwpctl;
298 	t->pt_blocking = 0;
299 	t->pt_droplock = NULL;
300 
301 	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
302 	pthread_mutex_init(&t->pt_lock, NULL);
303 	PTQ_INIT(&t->pt_cleanup_stack);
304 	pthread_cond_init(&t->pt_joiners, NULL);
305 	memset(&t->pt_specific, 0, sizeof(t->pt_specific));
306 }
307 
308 static void
309 pthread__scrubthread(pthread_t t, char *name, int flags)
310 {
311 
312 	t->pt_state = PT_STATE_RUNNING;
313 	t->pt_exitval = NULL;
314 	t->pt_flags = flags;
315 	t->pt_cancel = 0;
316 	t->pt_errno = 0;
317 	t->pt_name = name;
318 	t->pt_lid = 0;
319 }
320 
321 
322 int
323 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
324 	    void *(*startfunc)(void *), void *arg)
325 {
326 	pthread_t newthread;
327 	pthread_attr_t nattr;
328 	struct pthread_attr_private *p;
329 	char * volatile name;
330 	unsigned long flag;
331 	void *private_area;
332 	int ret;
333 
334 	/*
335 	 * It's okay to check this without a lock because there can
336 	 * only be one thread before it becomes true.
337 	 */
338 	if (pthread__started == 0) {
339 		pthread__start();
340 		pthread__started = 1;
341 	}
342 
343 	if (attr == NULL)
344 		nattr = pthread_default_attr;
345 	else if (attr->pta_magic == PT_ATTR_MAGIC)
346 		nattr = *attr;
347 	else
348 		return EINVAL;
349 
350 	/* Fetch misc. attributes from the attr structure. */
351 	name = NULL;
352 	if ((p = nattr.pta_private) != NULL)
353 		if (p->ptap_name[0] != '\0')
354 			if ((name = strdup(p->ptap_name)) == NULL)
355 				return ENOMEM;
356 
357 	newthread = NULL;
358 
359 	/*
360 	 * Try to reclaim a dead thread.
361 	 */
362 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
363 		pthread_mutex_lock(&pthread__deadqueue_lock);
364 		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
365 			/* Still running? */
366 			if (newthread->pt_lwpctl->lc_curcpu ==
367 			    LWPCTL_CPU_EXITED ||
368 			    (_lwp_kill(newthread->pt_lid, 0) == -1 &&
369 			    errno == ESRCH))
370 				break;
371 		}
372 		if (newthread)
373 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
374 		pthread_mutex_unlock(&pthread__deadqueue_lock);
375 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
376 		if (newthread && newthread->pt_tls) {
377 			_rtld_tls_free(newthread->pt_tls);
378 			newthread->pt_tls = NULL;
379 		}
380 #endif
381 	}
382 
383 	/*
384 	 * If necessary set up a stack, allocate space for a pthread_st,
385 	 * and initialize it.
386 	 */
387 	if (newthread == NULL) {
388 		ret = pthread__stackalloc(&newthread);
389 		if (ret != 0) {
390 			if (name)
391 				free(name);
392 			return ret;
393 		}
394 
395 		/* This is used only when creating the thread. */
396 		_INITCONTEXT_U(&newthread->pt_uc);
397 #ifdef PTHREAD_MACHINE_HAS_ID_REGISTER
398 		pthread__uc_id(&newthread->pt_uc) = newthread;
399 #endif
400 		newthread->pt_uc.uc_stack = newthread->pt_stack;
401 		newthread->pt_uc.uc_link = NULL;
402 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
403 		newthread->pt_tls = NULL;
404 #endif
405 
406 		/* Add to list of all threads. */
407 		pthread_rwlock_wrlock(&pthread__alltree_lock);
408 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
409 		RB_INSERT(__pthread__alltree, &pthread__alltree, newthread);
410 		pthread_rwlock_unlock(&pthread__alltree_lock);
411 
412 		/* Will be reset by the thread upon exit. */
413 		pthread__initthread(newthread);
414 	}
415 
416 	/*
417 	 * Create the new LWP.
418 	 */
419 	pthread__scrubthread(newthread, name, nattr.pta_flags);
420 	newthread->pt_func = startfunc;
421 	newthread->pt_arg = arg;
422 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
423 	private_area = newthread->pt_tls = _rtld_tls_allocate();
424 	newthread->pt_tls->tcb_pthread = newthread;
425 #else
426 	private_area = newthread;
427 #endif
428 
429 	_lwp_makecontext(&newthread->pt_uc, pthread__create_tramp,
430 	    newthread, private_area, newthread->pt_stack.ss_sp,
431 	    newthread->pt_stack.ss_size);
432 
433 	flag = LWP_DETACHED;
434 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
435 	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
436 		flag |= LWP_SUSPENDED;
437 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
438 	if (ret != 0) {
439 		pthread_mutex_lock(&newthread->pt_lock);
440 		/* Will unlock and free name. */
441 		pthread__reap(newthread);
442 		return ret;
443 	}
444 
445 	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
446 		if (p != NULL) {
447 			(void)pthread_setschedparam(newthread, p->ptap_policy,
448 			    &p->ptap_sp);
449 		}
450 		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
451 			(void)_lwp_continue(newthread->pt_lid);
452 		}
453 	}
454 
455 	*thread = newthread;
456 
457 	return 0;
458 }
459 
460 
461 __dead static void
462 pthread__create_tramp(void *cookie)
463 {
464 	pthread_t self;
465 	void *retval;
466 
467 	self = cookie;
468 
469 	/*
470 	 * Throw away some stack in a feeble attempt to reduce cache
471 	 * thrash.  May help for SMT processors.  XXX We should not
472 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
473 	 * thread register or decent thread local storage.
474 	 *
475 	 * Note that we may race with the kernel in _lwp_create(),
476 	 * and so pt_lid can be unset at this point, but we don't
477 	 * care.
478 	 */
479 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
480 
481 	if (self->pt_name != NULL) {
482 		pthread_mutex_lock(&self->pt_lock);
483 		if (self->pt_name != NULL)
484 			(void)_lwp_setname(0, self->pt_name);
485 		pthread_mutex_unlock(&self->pt_lock);
486 	}
487 
488 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
489 		err(1, "_lwp_ctl");
490 	}
491 
492 	retval = (*self->pt_func)(self->pt_arg);
493 
494 	pthread_exit(retval);
495 
496 	/*NOTREACHED*/
497 	pthread__abort();
498 }
499 
500 int
501 pthread_suspend_np(pthread_t thread)
502 {
503 	pthread_t self;
504 
505 	self = pthread__self();
506 	if (self == thread) {
507 		return EDEADLK;
508 	}
509 	if (pthread__find(thread) != 0)
510 		return ESRCH;
511 	if (_lwp_suspend(thread->pt_lid) == 0)
512 		return 0;
513 	return errno;
514 }
515 
516 int
517 pthread_resume_np(pthread_t thread)
518 {
519 
520 	if (pthread__find(thread) != 0)
521 		return ESRCH;
522 	if (_lwp_continue(thread->pt_lid) == 0)
523 		return 0;
524 	return errno;
525 }
526 
527 void
528 pthread_exit(void *retval)
529 {
530 	pthread_t self;
531 	struct pt_clean_t *cleanup;
532 	char *name;
533 
534 	self = pthread__self();
535 
536 	/* Disable cancellability. */
537 	pthread_mutex_lock(&self->pt_lock);
538 	self->pt_flags |= PT_FLAG_CS_DISABLED;
539 	self->pt_cancel = 0;
540 
541 	/* Call any cancellation cleanup handlers */
542 	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
543 		pthread_mutex_unlock(&self->pt_lock);
544 		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
545 			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
546 			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
547 			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
548 		}
549 		pthread_mutex_lock(&self->pt_lock);
550 	}
551 
552 	/* Perform cleanup of thread-specific data */
553 	pthread__destroy_tsd(self);
554 
555 	/* Signal our exit. */
556 	self->pt_exitval = retval;
557 	if (self->pt_flags & PT_FLAG_DETACHED) {
558 		self->pt_state = PT_STATE_DEAD;
559 		name = self->pt_name;
560 		self->pt_name = NULL;
561 		pthread_mutex_unlock(&self->pt_lock);
562 		if (name != NULL)
563 			free(name);
564 		pthread_mutex_lock(&pthread__deadqueue_lock);
565 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
566 		pthread_mutex_unlock(&pthread__deadqueue_lock);
567 		_lwp_exit();
568 	} else {
569 		self->pt_state = PT_STATE_ZOMBIE;
570 		pthread_cond_broadcast(&self->pt_joiners);
571 		pthread_mutex_unlock(&self->pt_lock);
572 		/* Note: name will be freed by the joiner. */
573 		_lwp_exit();
574 	}
575 
576 	/*NOTREACHED*/
577 	pthread__abort();
578 	exit(1);
579 }
580 
581 
582 int
583 pthread_join(pthread_t thread, void **valptr)
584 {
585 	pthread_t self;
586 	int error;
587 
588 	self = pthread__self();
589 
590 	if (pthread__find(thread) != 0)
591 		return ESRCH;
592 
593 	if (thread->pt_magic != PT_MAGIC)
594 		return EINVAL;
595 
596 	if (thread == self)
597 		return EDEADLK;
598 
599 	self->pt_droplock = &thread->pt_lock;
600 	pthread_mutex_lock(&thread->pt_lock);
601 	for (;;) {
602 		if (thread->pt_state == PT_STATE_ZOMBIE)
603 			break;
604 		if (thread->pt_state == PT_STATE_DEAD) {
605 			pthread_mutex_unlock(&thread->pt_lock);
606 			self->pt_droplock = NULL;
607 			return ESRCH;
608 		}
609 		if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
610 			pthread_mutex_unlock(&thread->pt_lock);
611 			self->pt_droplock = NULL;
612 			return EINVAL;
613 		}
614 		error = pthread_cond_wait(&thread->pt_joiners,
615 		    &thread->pt_lock);
616 		if (error != 0) {
617 			pthread__errorfunc(__FILE__, __LINE__,
618 			    __func__, "unexpected return from cond_wait()");
619 		}
620 
621 	}
622 	pthread__testcancel(self);
623 	if (valptr != NULL)
624 		*valptr = thread->pt_exitval;
625 	/* pthread__reap() will drop the lock. */
626 	pthread__reap(thread);
627 	self->pt_droplock = NULL;
628 
629 	return 0;
630 }
631 
632 static void
633 pthread__reap(pthread_t thread)
634 {
635 	char *name;
636 
637 	name = thread->pt_name;
638 	thread->pt_name = NULL;
639 	thread->pt_state = PT_STATE_DEAD;
640 	pthread_mutex_unlock(&thread->pt_lock);
641 
642 	pthread_mutex_lock(&pthread__deadqueue_lock);
643 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
644 	pthread_mutex_unlock(&pthread__deadqueue_lock);
645 
646 	if (name != NULL)
647 		free(name);
648 }
649 
650 int
651 pthread_equal(pthread_t t1, pthread_t t2)
652 {
653 
654 	/* Nothing special here. */
655 	return (t1 == t2);
656 }
657 
658 
659 int
660 pthread_detach(pthread_t thread)
661 {
662 
663 	if (pthread__find(thread) != 0)
664 		return ESRCH;
665 
666 	if (thread->pt_magic != PT_MAGIC)
667 		return EINVAL;
668 
669 	pthread_mutex_lock(&thread->pt_lock);
670 	thread->pt_flags |= PT_FLAG_DETACHED;
671 	if (thread->pt_state == PT_STATE_ZOMBIE) {
672 		/* pthread__reap() will drop the lock. */
673 		pthread__reap(thread);
674 	} else {
675 		/*
676 		 * Not valid for threads to be waiting in
677 		 * pthread_join() (there are intractable
678 		 * sync issues from the application
679 		 * perspective), but give those threads
680 		 * a chance anyway.
681 		 */
682 		pthread_cond_broadcast(&thread->pt_joiners);
683 		pthread_mutex_unlock(&thread->pt_lock);
684 	}
685 
686 	return 0;
687 }
688 
689 
690 int
691 pthread_getname_np(pthread_t thread, char *name, size_t len)
692 {
693 
694 	if (pthread__find(thread) != 0)
695 		return ESRCH;
696 
697 	if (thread->pt_magic != PT_MAGIC)
698 		return EINVAL;
699 
700 	pthread_mutex_lock(&thread->pt_lock);
701 	if (thread->pt_name == NULL)
702 		name[0] = '\0';
703 	else
704 		strlcpy(name, thread->pt_name, len);
705 	pthread_mutex_unlock(&thread->pt_lock);
706 
707 	return 0;
708 }
709 
710 
711 int
712 pthread_setname_np(pthread_t thread, const char *name, void *arg)
713 {
714 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
715 	int namelen;
716 
717 	if (pthread__find(thread) != 0)
718 		return ESRCH;
719 
720 	if (thread->pt_magic != PT_MAGIC)
721 		return EINVAL;
722 
723 	namelen = snprintf(newname, sizeof(newname), name, arg);
724 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
725 		return EINVAL;
726 
727 	cp = strdup(newname);
728 	if (cp == NULL)
729 		return ENOMEM;
730 
731 	pthread_mutex_lock(&thread->pt_lock);
732 	oldname = thread->pt_name;
733 	thread->pt_name = cp;
734 	(void)_lwp_setname(thread->pt_lid, cp);
735 	pthread_mutex_unlock(&thread->pt_lock);
736 
737 	if (oldname != NULL)
738 		free(oldname);
739 
740 	return 0;
741 }
742 
743 
744 
745 /*
746  * XXX There should be a way for applications to use the efficent
747  *  inline version, but there are opacity/namespace issues.
748  */
749 pthread_t
750 pthread_self(void)
751 {
752 
753 	return pthread__self();
754 }
755 
756 
757 int
758 pthread_cancel(pthread_t thread)
759 {
760 
761 	if (pthread__find(thread) != 0)
762 		return ESRCH;
763 	pthread_mutex_lock(&thread->pt_lock);
764 	thread->pt_flags |= PT_FLAG_CS_PENDING;
765 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
766 		thread->pt_cancel = 1;
767 		pthread_mutex_unlock(&thread->pt_lock);
768 		_lwp_wakeup(thread->pt_lid);
769 	} else
770 		pthread_mutex_unlock(&thread->pt_lock);
771 
772 	return 0;
773 }
774 
775 
776 int
777 pthread_setcancelstate(int state, int *oldstate)
778 {
779 	pthread_t self;
780 	int retval;
781 
782 	self = pthread__self();
783 	retval = 0;
784 
785 	pthread_mutex_lock(&self->pt_lock);
786 
787 	if (oldstate != NULL) {
788 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
789 			*oldstate = PTHREAD_CANCEL_DISABLE;
790 		else
791 			*oldstate = PTHREAD_CANCEL_ENABLE;
792 	}
793 
794 	if (state == PTHREAD_CANCEL_DISABLE) {
795 		self->pt_flags |= PT_FLAG_CS_DISABLED;
796 		if (self->pt_cancel) {
797 			self->pt_flags |= PT_FLAG_CS_PENDING;
798 			self->pt_cancel = 0;
799 		}
800 	} else if (state == PTHREAD_CANCEL_ENABLE) {
801 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
802 		/*
803 		 * If a cancellation was requested while cancellation
804 		 * was disabled, note that fact for future
805 		 * cancellation tests.
806 		 */
807 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
808 			self->pt_cancel = 1;
809 			/* This is not a deferred cancellation point. */
810 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
811 				pthread_mutex_unlock(&self->pt_lock);
812 				pthread__cancelled();
813 			}
814 		}
815 	} else
816 		retval = EINVAL;
817 
818 	pthread_mutex_unlock(&self->pt_lock);
819 
820 	return retval;
821 }
822 
823 
824 int
825 pthread_setcanceltype(int type, int *oldtype)
826 {
827 	pthread_t self;
828 	int retval;
829 
830 	self = pthread__self();
831 	retval = 0;
832 
833 	pthread_mutex_lock(&self->pt_lock);
834 
835 	if (oldtype != NULL) {
836 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
837 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
838 		else
839 			*oldtype = PTHREAD_CANCEL_DEFERRED;
840 	}
841 
842 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
843 		self->pt_flags |= PT_FLAG_CS_ASYNC;
844 		if (self->pt_cancel) {
845 			pthread_mutex_unlock(&self->pt_lock);
846 			pthread__cancelled();
847 		}
848 	} else if (type == PTHREAD_CANCEL_DEFERRED)
849 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
850 	else
851 		retval = EINVAL;
852 
853 	pthread_mutex_unlock(&self->pt_lock);
854 
855 	return retval;
856 }
857 
858 
859 void
860 pthread_testcancel(void)
861 {
862 	pthread_t self;
863 
864 	self = pthread__self();
865 	if (self->pt_cancel)
866 		pthread__cancelled();
867 }
868 
869 
870 /*
871  * POSIX requires that certain functions return an error rather than
872  * invoking undefined behavior even when handed completely bogus
873  * pthread_t values, e.g. stack garbage or (pthread_t)666. This
874  * utility routine searches the list of threads for the pthread_t
875  * value without dereferencing it.
876  */
877 int
878 pthread__find(pthread_t id)
879 {
880 	pthread_t target;
881 
882 	pthread_rwlock_rdlock(&pthread__alltree_lock);
883 	/* LINTED */
884 	target = RB_FIND(__pthread__alltree, &pthread__alltree, id);
885 	pthread_rwlock_unlock(&pthread__alltree_lock);
886 
887 	if (target == NULL || target->pt_state == PT_STATE_DEAD)
888 		return ESRCH;
889 
890 	return 0;
891 }
892 
893 
894 void
895 pthread__testcancel(pthread_t self)
896 {
897 
898 	if (self->pt_cancel)
899 		pthread__cancelled();
900 }
901 
902 
903 void
904 pthread__cancelled(void)
905 {
906 	pthread_mutex_t *droplock;
907 	pthread_t self;
908 
909 	self = pthread__self();
910 	droplock = self->pt_droplock;
911 	self->pt_droplock = NULL;
912 
913 	if (droplock != NULL && pthread_mutex_held_np(droplock))
914 		pthread_mutex_unlock(droplock);
915 
916 	pthread_exit(PTHREAD_CANCELED);
917 }
918 
919 
920 void
921 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
922 {
923 	pthread_t self;
924 	struct pt_clean_t *entry;
925 
926 	self = pthread__self();
927 	entry = store;
928 	entry->ptc_cleanup = cleanup;
929 	entry->ptc_arg = arg;
930 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
931 }
932 
933 
934 void
935 pthread__cleanup_pop(int ex, void *store)
936 {
937 	pthread_t self;
938 	struct pt_clean_t *entry;
939 
940 	self = pthread__self();
941 	entry = store;
942 
943 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
944 	if (ex)
945 		(*entry->ptc_cleanup)(entry->ptc_arg);
946 }
947 
948 
949 int *
950 pthread__errno(void)
951 {
952 	pthread_t self;
953 
954 	self = pthread__self();
955 
956 	return &(self->pt_errno);
957 }
958 
959 ssize_t	_sys_write(int, const void *, size_t);
960 
961 void
962 pthread__assertfunc(const char *file, int line, const char *function,
963 		    const char *expr)
964 {
965 	char buf[1024];
966 	int len;
967 
968 	/*
969 	 * snprintf should not acquire any locks, or we could
970 	 * end up deadlocked if the assert caller held locks.
971 	 */
972 	len = snprintf(buf, 1024,
973 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
974 	    expr, file, line,
975 	    function ? ", function \"" : "",
976 	    function ? function : "",
977 	    function ? "\"" : "");
978 
979 	_sys_write(STDERR_FILENO, buf, (size_t)len);
980 	(void)kill(getpid(), SIGABRT);
981 
982 	_exit(1);
983 }
984 
985 
986 void
987 pthread__errorfunc(const char *file, int line, const char *function,
988 		   const char *msg)
989 {
990 	char buf[1024];
991 	size_t len;
992 
993 	if (pthread__diagassert == 0)
994 		return;
995 
996 	/*
997 	 * snprintf should not acquire any locks, or we could
998 	 * end up deadlocked if the assert caller held locks.
999 	 */
1000 	len = snprintf(buf, 1024,
1001 	    "%s: Error detected by libpthread: %s.\n"
1002 	    "Detected by file \"%s\", line %d%s%s%s.\n"
1003 	    "See pthread(3) for information.\n",
1004 	    getprogname(), msg, file, line,
1005 	    function ? ", function \"" : "",
1006 	    function ? function : "",
1007 	    function ? "\"" : "");
1008 
1009 	if (pthread__diagassert & DIAGASSERT_STDERR)
1010 		_sys_write(STDERR_FILENO, buf, len);
1011 
1012 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
1013 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
1014 
1015 	if (pthread__diagassert & DIAGASSERT_ABORT) {
1016 		(void)kill(getpid(), SIGABRT);
1017 		_exit(1);
1018 	}
1019 }
1020 
1021 /*
1022  * Thread park/unpark operations.  The kernel operations are
1023  * modelled after a brief description from "Multithreading in
1024  * the Solaris Operating Environment":
1025  *
1026  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1027  */
1028 
1029 #define	OOPS(msg)			\
1030     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
1031 
1032 int
1033 pthread__park(pthread_t self, pthread_mutex_t *lock,
1034 	      pthread_queue_t *queue, const struct timespec *abstime,
1035 	      int cancelpt, const void *hint)
1036 {
1037 	int rv, error;
1038 	void *obj;
1039 
1040 	/*
1041 	 * For non-interlocked release of mutexes we need a store
1042 	 * barrier before incrementing pt_blocking away from zero.
1043 	 * This is provided by pthread_mutex_unlock().
1044 	 */
1045 	self->pt_willpark = 1;
1046 	pthread_mutex_unlock(lock);
1047 	self->pt_willpark = 0;
1048 	self->pt_blocking++;
1049 
1050 	/*
1051 	 * Wait until we are awoken by a pending unpark operation,
1052 	 * a signal, an unpark posted after we have gone asleep,
1053 	 * or an expired timeout.
1054 	 *
1055 	 * It is fine to test the value of pt_sleepobj without
1056 	 * holding any locks, because:
1057 	 *
1058 	 * o Only the blocking thread (this thread) ever sets them
1059 	 *   to a non-NULL value.
1060 	 *
1061 	 * o Other threads may set them NULL, but if they do so they
1062 	 *   must also make this thread return from _lwp_park.
1063 	 *
1064 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1065 	 *   calls and all make use of spinlocks in the kernel.  So
1066 	 *   these system calls act as full memory barriers, and will
1067 	 *   ensure that the calling CPU's store buffers are drained.
1068 	 *   In combination with the spinlock release before unpark,
1069 	 *   this means that modification of pt_sleepobj/onq by another
1070 	 *   thread will become globally visible before that thread
1071 	 *   schedules an unpark operation on this thread.
1072 	 *
1073 	 * Note: the test in the while() statement dodges the park op if
1074 	 * we have already been awoken, unless there is another thread to
1075 	 * awaken.  This saves a syscall - if we were already awakened,
1076 	 * the next call to _lwp_park() would need to return early in order
1077 	 * to eat the previous wakeup.
1078 	 */
1079 	rv = 0;
1080 	do {
1081 		/*
1082 		 * If we deferred unparking a thread, arrange to
1083 		 * have _lwp_park() restart it before blocking.
1084 		 */
1085 		error = _lwp_park(abstime, self->pt_unpark, hint, hint);
1086 		self->pt_unpark = 0;
1087 		if (error != 0) {
1088 			switch (rv = errno) {
1089 			case EINTR:
1090 			case EALREADY:
1091 				rv = 0;
1092 				break;
1093 			case ETIMEDOUT:
1094 				break;
1095 			default:
1096 				OOPS("_lwp_park failed");
1097 				break;
1098 			}
1099 		}
1100 		/* Check for cancellation. */
1101 		if (cancelpt && self->pt_cancel)
1102 			rv = EINTR;
1103 	} while (self->pt_sleepobj != NULL && rv == 0);
1104 
1105 	/*
1106 	 * If we have been awoken early but are still on the queue,
1107 	 * then remove ourself.  Again, it's safe to do the test
1108 	 * without holding any locks.
1109 	 */
1110 	if (__predict_false(self->pt_sleepobj != NULL)) {
1111 		pthread_mutex_lock(lock);
1112 		if ((obj = self->pt_sleepobj) != NULL) {
1113 			PTQ_REMOVE(queue, self, pt_sleep);
1114 			self->pt_sleepobj = NULL;
1115 			if (obj != NULL && self->pt_early != NULL)
1116 				(*self->pt_early)(obj);
1117 		}
1118 		pthread_mutex_unlock(lock);
1119 	}
1120 	self->pt_early = NULL;
1121 	self->pt_blocking--;
1122 	membar_sync();
1123 
1124 	return rv;
1125 }
1126 
1127 void
1128 pthread__unpark(pthread_queue_t *queue, pthread_t self,
1129 		pthread_mutex_t *interlock)
1130 {
1131 	pthread_t target;
1132 	u_int max;
1133 	size_t nwaiters;
1134 
1135 	max = pthread__unpark_max;
1136 	nwaiters = self->pt_nwaiters;
1137 	target = PTQ_FIRST(queue);
1138 	if (nwaiters == max) {
1139 		/* Overflow. */
1140 		(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1141 		    __UNVOLATILE(&interlock->ptm_waiters));
1142 		nwaiters = 0;
1143 	}
1144 	target->pt_sleepobj = NULL;
1145 	self->pt_waiters[nwaiters++] = target->pt_lid;
1146 	PTQ_REMOVE(queue, target, pt_sleep);
1147 	self->pt_nwaiters = nwaiters;
1148 	pthread__mutex_deferwake(self, interlock);
1149 }
1150 
1151 void
1152 pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1153 		    pthread_mutex_t *interlock)
1154 {
1155 	pthread_t target;
1156 	u_int max;
1157 	size_t nwaiters;
1158 
1159 	max = pthread__unpark_max;
1160 	nwaiters = self->pt_nwaiters;
1161 	PTQ_FOREACH(target, queue, pt_sleep) {
1162 		if (nwaiters == max) {
1163 			/* Overflow. */
1164 			(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1165 			    __UNVOLATILE(&interlock->ptm_waiters));
1166 			nwaiters = 0;
1167 		}
1168 		target->pt_sleepobj = NULL;
1169 		self->pt_waiters[nwaiters++] = target->pt_lid;
1170 	}
1171 	self->pt_nwaiters = nwaiters;
1172 	PTQ_INIT(queue);
1173 	pthread__mutex_deferwake(self, interlock);
1174 }
1175 
1176 #undef	OOPS
1177 
1178 /*
1179  * Allocate a stack for a thread, and set it up. It needs to be aligned, so
1180  * that a thread can find itself by its stack pointer.
1181  */
1182 static int
1183 pthread__stackalloc(pthread_t *newt)
1184 {
1185 	void *addr;
1186 
1187 	addr = mmap(NULL, pthread__stacksize, PROT_READ|PROT_WRITE,
1188 	    MAP_ANON|MAP_PRIVATE | MAP_ALIGNED(pthread__stacksize_lg),
1189 	    -1, (off_t)0);
1190 
1191 	if (addr == MAP_FAILED)
1192 		return ENOMEM;
1193 
1194 	pthread__assert(((intptr_t)addr & pthread__stackmask) == 0);
1195 
1196 	return pthread__stackid_setup(addr, pthread__stacksize, newt);
1197 }
1198 
1199 
1200 /*
1201  * Set up the slightly special stack for the "initial" thread, which
1202  * runs on the normal system stack, and thus gets slightly different
1203  * treatment.
1204  */
1205 static void
1206 pthread__initmain(pthread_t *newt)
1207 {
1208 	struct rlimit slimit;
1209 	size_t pagesize;
1210 	pthread_t t;
1211 	void *base;
1212 	size_t size;
1213 	int error, ret;
1214 	char *value;
1215 
1216 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1217 	pthread__stacksize = 0;
1218 	ret = getrlimit(RLIMIT_STACK, &slimit);
1219 	if (ret == -1)
1220 		err(1, "Couldn't get stack resource consumption limits");
1221 
1222 	value = pthread__getenv("PTHREAD_STACKSIZE");
1223 	if (value != NULL) {
1224 		pthread__stacksize = atoi(value) * 1024;
1225 		if (pthread__stacksize > slimit.rlim_cur)
1226 			pthread__stacksize = (size_t)slimit.rlim_cur;
1227 	}
1228 	if (pthread__stacksize == 0)
1229 		pthread__stacksize = (size_t)slimit.rlim_cur;
1230 	if (pthread__stacksize < 4 * pagesize)
1231 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1232 		    4 * pagesize / 1024);
1233 
1234 	pthread__stacksize_lg = -1;
1235 	while (pthread__stacksize) {
1236 		pthread__stacksize >>= 1;
1237 		pthread__stacksize_lg++;
1238 	}
1239 
1240 	pthread__stacksize = (1 << pthread__stacksize_lg);
1241 	pthread__stackmask = pthread__stacksize - 1;
1242 	pthread__threadmask = ~pthread__stackmask;
1243 
1244 	base = (void *)(pthread__sp() & pthread__threadmask);
1245 	if ((pthread__sp() - (uintptr_t)base) < 4 * pagesize) {
1246 		pthread__mainbase = (vaddr_t)base;
1247 		base = STACK_GROW(base, pthread__stacksize);
1248 		pthread__mainstruct = (vaddr_t)base;
1249 		if (mprotect(base, pthread__stacksize,
1250 		    PROT_READ|PROT_WRITE) == -1)
1251 			err(1, "mprotect stack");
1252 	}
1253 	size = pthread__stacksize;
1254 
1255 	error = pthread__stackid_setup(base, size, &t);
1256 	if (error) {
1257 		/* XXX */
1258 		errx(2, "failed to setup main thread: error=%d", error);
1259 	}
1260 
1261 	*newt = t;
1262 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
1263 #  ifdef __HAVE___LWP_GETTCB_FAST
1264 	t->pt_tls = __lwp_gettcb_fast();
1265 #  else
1266 	t->pt_tls = _lwp_getprivate();
1267 #  endif
1268 	t->pt_tls->tcb_pthread = t;
1269 #else
1270 	_lwp_setprivate(t);
1271 #endif
1272 }
1273 
1274 static int
1275 /*ARGSUSED*/
1276 pthread__stackid_setup(void *base, size_t size, pthread_t *tp)
1277 {
1278 	pthread_t t;
1279 	void *redaddr;
1280 	size_t pagesize;
1281 	int ret;
1282 
1283 	t = base;
1284 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1285 
1286 	/*
1287 	 * Put a pointer to the pthread in the bottom (but
1288          * redzone-protected section) of the stack.
1289 	 */
1290 	redaddr = STACK_SHRINK(STACK_MAX(base, size), pagesize);
1291 	t->pt_stack.ss_size = size - 2 * pagesize;
1292 #ifdef __MACHINE_STACK_GROWS_UP
1293 	t->pt_stack.ss_sp = (char *)(void *)base + pagesize;
1294 #else
1295 	t->pt_stack.ss_sp = (char *)(void *)base + 2 * pagesize;
1296 #endif
1297 	/* Protect the next-to-bottom stack page as a red zone. */
1298 	ret = mprotect(redaddr, pagesize, PROT_NONE);
1299 	if (ret == -1) {
1300 		return errno;
1301 	}
1302 	*tp = t;
1303 	return 0;
1304 }
1305 
1306 #ifndef lint
1307 static int
1308 pthread__cmp(struct __pthread_st *a, struct __pthread_st *b)
1309 {
1310 
1311 	if ((uintptr_t)a < (uintptr_t)b)
1312 		return (-1);
1313 	else if (a == b)
1314 		return 0;
1315 	else
1316 		return 1;
1317 }
1318 RB_GENERATE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
1319 #endif
1320 
1321 /* Because getenv() wants to use locks. */
1322 char *
1323 pthread__getenv(const char *name)
1324 {
1325 	extern char **environ;
1326 	size_t l_name, offset;
1327 
1328 	l_name = strlen(name);
1329 	for (offset = 0; environ[offset] != NULL; offset++) {
1330 		if (strncmp(name, environ[offset], l_name) == 0 &&
1331 		    environ[offset][l_name] == '=') {
1332 			return environ[offset] + l_name + 1;
1333 		}
1334 	}
1335 
1336 	return NULL;
1337 }
1338 
1339 pthread_mutex_t *
1340 pthread__hashlock(volatile const void *p)
1341 {
1342 	uintptr_t v;
1343 
1344 	v = (uintptr_t)p;
1345 	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1346 }
1347 
1348 int
1349 pthread__checkpri(int pri)
1350 {
1351 	static int havepri;
1352 	static long min, max;
1353 
1354 	if (!havepri) {
1355 		min = sysconf(_SC_SCHED_PRI_MIN);
1356 		max = sysconf(_SC_SCHED_PRI_MAX);
1357 		havepri = 1;
1358 	}
1359 	return (pri < min || pri > max) ? EINVAL : 0;
1360 }
1361