xref: /netbsd-src/lib/libpthread/pthread.c (revision 8182c8fa04ec9d89c83203de4cf4f1bc9d9cc543)
1 /*	$NetBSD: pthread.c,v 1.128 2012/03/08 16:40:45 joerg Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: pthread.c,v 1.128 2012/03/08 16:40:45 joerg Exp $");
34 
35 #define	__EXPOSE_STACK	1
36 
37 #include <sys/param.h>
38 #include <sys/exec_elf.h>
39 #include <sys/mman.h>
40 #include <sys/sysctl.h>
41 #include <sys/lwpctl.h>
42 #include <sys/tls.h>
43 
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <lwp.h>
49 #include <signal.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <syslog.h>
54 #include <ucontext.h>
55 #include <unistd.h>
56 #include <sched.h>
57 
58 #include "pthread.h"
59 #include "pthread_int.h"
60 
61 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
62 RB_HEAD(__pthread__alltree, __pthread_st) pthread__alltree;
63 
64 #ifndef lint
65 static int	pthread__cmp(struct __pthread_st *, struct __pthread_st *);
66 RB_PROTOTYPE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
67 #endif
68 
69 static void	pthread__create_tramp(void *);
70 static void	pthread__initthread(pthread_t);
71 static void	pthread__scrubthread(pthread_t, char *, int);
72 static void	pthread__initmain(pthread_t *);
73 static void	pthread__fork_callback(void);
74 static void	pthread__reap(pthread_t);
75 static void	pthread__child_callback(void);
76 static void	pthread__start(void);
77 
78 void	pthread__init(void);
79 
80 int pthread__started;
81 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
82 pthread_queue_t pthread__deadqueue;
83 pthread_queue_t pthread__allqueue;
84 
85 static pthread_attr_t pthread_default_attr;
86 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
87 static pthread_t pthread__first;
88 
89 enum {
90 	DIAGASSERT_ABORT =	1<<0,
91 	DIAGASSERT_STDERR =	1<<1,
92 	DIAGASSERT_SYSLOG =	1<<2
93 };
94 
95 static int pthread__diagassert;
96 
97 int pthread__concurrency;
98 int pthread__nspins;
99 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
100 int pthread__dbg;	/* set by libpthread_dbg if active */
101 
102 /*
103  * We have to initialize the pthread_stack* variables here because
104  * mutexes are used before pthread_init() and thus pthread__initmain()
105  * are called.  Since mutexes only save the stack pointer and not a
106  * pointer to the thread data, it is safe to change the mapping from
107  * stack pointer to thread data afterwards.
108  */
109 size_t	pthread__stacksize;
110 size_t	pthread__pagesize;
111 static struct __pthread_st pthread__main;
112 
113 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
114 
115 __strong_alias(__libc_thr_self,pthread_self)
116 __strong_alias(__libc_thr_create,pthread_create)
117 __strong_alias(__libc_thr_exit,pthread_exit)
118 __strong_alias(__libc_thr_errno,pthread__errno)
119 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
120 __strong_alias(__libc_thr_equal,pthread_equal)
121 __strong_alias(__libc_thr_init,pthread__init)
122 
123 /*
124  * Static library kludge.  Place a reference to a symbol any library
125  * file which does not already have a reference here.
126  */
127 extern int pthread__cancel_stub_binder;
128 
129 void *pthread__static_lib_binder[] = {
130 	&pthread__cancel_stub_binder,
131 	pthread_cond_init,
132 	pthread_mutex_init,
133 	pthread_rwlock_init,
134 	pthread_barrier_init,
135 	pthread_key_create,
136 	pthread_setspecific,
137 };
138 
139 #define	NHASHLOCK	64
140 
141 static union hashlock {
142 	pthread_mutex_t	mutex;
143 	char		pad[64];
144 } hashlocks[NHASHLOCK] __aligned(64);
145 
146 /*
147  * This needs to be started by the library loading code, before main()
148  * gets to run, for various things that use the state of the initial thread
149  * to work properly (thread-specific data is an application-visible example;
150  * spinlock counts for mutexes is an internal example).
151  */
152 void
153 pthread__init(void)
154 {
155 	pthread_t first;
156 	char *p;
157 	int i, mib[2];
158 	size_t len;
159 	extern int __isthreaded;
160 
161 	pthread__pagesize = (size_t)sysconf(_SC_PAGESIZE);
162 
163 	mib[0] = CTL_HW;
164 	mib[1] = HW_NCPU;
165 
166 	len = sizeof(pthread__concurrency);
167 	if (sysctl(mib, 2, &pthread__concurrency, &len, NULL, 0) == -1)
168 		err(1, "sysctl(hw.ncpu");
169 
170 	mib[0] = CTL_KERN;
171 	mib[1] = KERN_OSREV;
172 
173 	/* Initialize locks first; they're needed elsewhere. */
174 	pthread__lockprim_init();
175 	for (i = 0; i < NHASHLOCK; i++) {
176 		pthread_mutex_init(&hashlocks[i].mutex, NULL);
177 	}
178 
179 	/* Fetch parameters. */
180 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
181 	if (i == -1)
182 		err(1, "_lwp_unpark_all");
183 	if (i < pthread__unpark_max)
184 		pthread__unpark_max = i;
185 
186 	/* Basic data structure setup */
187 	pthread_attr_init(&pthread_default_attr);
188 	PTQ_INIT(&pthread__allqueue);
189 	PTQ_INIT(&pthread__deadqueue);
190 	RB_INIT(&pthread__alltree);
191 
192 	/* Create the thread structure corresponding to main() */
193 	pthread__initmain(&first);
194 	pthread__initthread(first);
195 	pthread__scrubthread(first, NULL, 0);
196 
197 	first->pt_lid = _lwp_self();
198 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
199 	RB_INSERT(__pthread__alltree, &pthread__alltree, first);
200 
201 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
202 		err(1, "_lwp_ctl");
203 	}
204 
205 	/* Start subsystems */
206 	PTHREAD_MD_INIT
207 
208 	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
209 		switch (*p) {
210 		case 'a':
211 			pthread__diagassert |= DIAGASSERT_ABORT;
212 			break;
213 		case 'A':
214 			pthread__diagassert &= ~DIAGASSERT_ABORT;
215 			break;
216 		case 'e':
217 			pthread__diagassert |= DIAGASSERT_STDERR;
218 			break;
219 		case 'E':
220 			pthread__diagassert &= ~DIAGASSERT_STDERR;
221 			break;
222 		case 'l':
223 			pthread__diagassert |= DIAGASSERT_SYSLOG;
224 			break;
225 		case 'L':
226 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
227 			break;
228 		}
229 	}
230 
231 	/* Tell libc that we're here and it should role-play accordingly. */
232 	pthread__first = first;
233 	pthread_atfork(NULL, NULL, pthread__fork_callback);
234 	__isthreaded = 1;
235 }
236 
237 static void
238 pthread__fork_callback(void)
239 {
240 	struct __pthread_st *self;
241 
242 	/* lwpctl state is not copied across fork. */
243 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &pthread__first->pt_lwpctl)) {
244 		err(1, "_lwp_ctl");
245 	}
246 	self = pthread__self();
247 	self->pt_lid = _lwp_self();
248 }
249 
250 static void
251 pthread__child_callback(void)
252 {
253 
254 	/*
255 	 * Clean up data structures that a forked child process might
256 	 * trip over. Note that if threads have been created (causing
257 	 * this handler to be registered) the standards say that the
258 	 * child will trigger undefined behavior if it makes any
259 	 * pthread_* calls (or any other calls that aren't
260 	 * async-signal-safe), so we don't really have to clean up
261 	 * much. Anything that permits some pthread_* calls to work is
262 	 * merely being polite.
263 	 */
264 	pthread__started = 0;
265 }
266 
267 static void
268 pthread__start(void)
269 {
270 
271 	/*
272 	 * Per-process timers are cleared by fork(); despite the
273 	 * various restrictions on fork() and threads, it's legal to
274 	 * fork() before creating any threads.
275 	 */
276 	pthread_atfork(NULL, NULL, pthread__child_callback);
277 }
278 
279 
280 /* General-purpose thread data structure sanitization. */
281 /* ARGSUSED */
282 static void
283 pthread__initthread(pthread_t t)
284 {
285 
286 	t->pt_self = t;
287 	t->pt_magic = PT_MAGIC;
288 	t->pt_willpark = 0;
289 	t->pt_unpark = 0;
290 	t->pt_nwaiters = 0;
291 	t->pt_sleepobj = NULL;
292 	t->pt_signalled = 0;
293 	t->pt_havespecific = 0;
294 	t->pt_early = NULL;
295 	t->pt_lwpctl = &pthread__dummy_lwpctl;
296 	t->pt_blocking = 0;
297 	t->pt_droplock = NULL;
298 
299 	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
300 	pthread_mutex_init(&t->pt_lock, NULL);
301 	PTQ_INIT(&t->pt_cleanup_stack);
302 	pthread_cond_init(&t->pt_joiners, NULL);
303 	memset(&t->pt_specific, 0, sizeof(t->pt_specific));
304 }
305 
306 static void
307 pthread__scrubthread(pthread_t t, char *name, int flags)
308 {
309 
310 	t->pt_state = PT_STATE_RUNNING;
311 	t->pt_exitval = NULL;
312 	t->pt_flags = flags;
313 	t->pt_cancel = 0;
314 	t->pt_errno = 0;
315 	t->pt_name = name;
316 	t->pt_lid = 0;
317 }
318 
319 static int
320 pthread__newstack(pthread_t newthread, const pthread_attr_t *attr)
321 {
322 	void *stackbase, *redzone;
323 	size_t stacksize;
324 	bool mapped_stack = false;
325 
326 	if (attr != NULL) {
327 		pthread_attr_getstack(attr, &stackbase, &stacksize);
328 	} else {
329 		stackbase = NULL;
330 		stacksize = 0;
331 	}
332 	if (stacksize == 0)
333 		stacksize = pthread__stacksize;
334 
335 	if (stackbase == NULL) {
336 		stackbase = mmap(NULL, stacksize,
337 		    PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, (off_t)0);
338 		if (stackbase == MAP_FAILED)
339 			return ENOMEM;
340 		mapped_stack = true;
341 	}
342 	newthread->pt_stack.ss_size = pthread__stacksize - pthread__pagesize;
343 		newthread->pt_stack.ss_sp = stackbase;
344 #ifdef __MACHINE_STACK_GROWS_UP
345 	redzone = (char *)stackbase + newthread->pt_stack.ss_size;
346 #else
347 	redzone = (char *)stackbase;
348 #endif
349 	if (mprotect(redzone, pthread__pagesize, PROT_NONE) == -1) {
350 		if (mapped_stack)
351 			munmap(stackbase, pthread__stacksize);
352 		return EPERM;
353 	}
354 	return 0;
355 }
356 
357 int
358 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
359 	    void *(*startfunc)(void *), void *arg)
360 {
361 	pthread_t newthread;
362 	pthread_attr_t nattr;
363 	struct pthread_attr_private *p;
364 	char * volatile name;
365 	unsigned long flag;
366 	void *private_area;
367 	int ret;
368 
369 	/*
370 	 * It's okay to check this without a lock because there can
371 	 * only be one thread before it becomes true.
372 	 */
373 	if (pthread__started == 0) {
374 		pthread__start();
375 		pthread__started = 1;
376 	}
377 
378 	if (attr == NULL)
379 		nattr = pthread_default_attr;
380 	else if (attr->pta_magic == PT_ATTR_MAGIC)
381 		nattr = *attr;
382 	else
383 		return EINVAL;
384 
385 	/* Fetch misc. attributes from the attr structure. */
386 	name = NULL;
387 	if ((p = nattr.pta_private) != NULL)
388 		if (p->ptap_name[0] != '\0')
389 			if ((name = strdup(p->ptap_name)) == NULL)
390 				return ENOMEM;
391 
392 	newthread = NULL;
393 
394 	/*
395 	 * Try to reclaim a dead thread.
396 	 */
397 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
398 		pthread_mutex_lock(&pthread__deadqueue_lock);
399 		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
400 			/* Still running? */
401 			if (newthread->pt_lwpctl->lc_curcpu ==
402 			    LWPCTL_CPU_EXITED ||
403 			    (_lwp_kill(newthread->pt_lid, 0) == -1 &&
404 			    errno == ESRCH))
405 				break;
406 		}
407 		if (newthread)
408 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
409 		pthread_mutex_unlock(&pthread__deadqueue_lock);
410 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
411 		if (newthread && newthread->pt_tls) {
412 			_rtld_tls_free(newthread->pt_tls);
413 			newthread->pt_tls = NULL;
414 		}
415 #endif
416 	}
417 
418 	/*
419 	 * If necessary set up a stack, allocate space for a pthread_st,
420 	 * and initialize it.
421 	 */
422 	if (newthread == NULL) {
423 		newthread = malloc(sizeof(*newthread));
424 		if (newthread == NULL) {
425 			free(name);
426 			return ENOMEM;
427 		}
428 
429 		if (pthread__newstack(newthread, attr)) {
430 			free(newthread);
431 			free(name);
432 			return ENOMEM;
433 		}
434 
435 		/* This is used only when creating the thread. */
436 		_INITCONTEXT_U(&newthread->pt_uc);
437 		newthread->pt_uc.uc_stack = newthread->pt_stack;
438 		newthread->pt_uc.uc_link = NULL;
439 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
440 		newthread->pt_tls = NULL;
441 #endif
442 
443 		/* Add to list of all threads. */
444 		pthread_rwlock_wrlock(&pthread__alltree_lock);
445 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
446 		RB_INSERT(__pthread__alltree, &pthread__alltree, newthread);
447 		pthread_rwlock_unlock(&pthread__alltree_lock);
448 
449 		/* Will be reset by the thread upon exit. */
450 		pthread__initthread(newthread);
451 	}
452 
453 	/*
454 	 * Create the new LWP.
455 	 */
456 	pthread__scrubthread(newthread, name, nattr.pta_flags);
457 	newthread->pt_func = startfunc;
458 	newthread->pt_arg = arg;
459 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
460 	private_area = newthread->pt_tls = _rtld_tls_allocate();
461 	newthread->pt_tls->tcb_pthread = newthread;
462 #else
463 	private_area = newthread;
464 #endif
465 
466 	_lwp_makecontext(&newthread->pt_uc, pthread__create_tramp,
467 	    newthread, private_area, newthread->pt_stack.ss_sp,
468 	    newthread->pt_stack.ss_size);
469 
470 	flag = LWP_DETACHED;
471 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
472 	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
473 		flag |= LWP_SUSPENDED;
474 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
475 	if (ret != 0) {
476 		pthread_mutex_lock(&newthread->pt_lock);
477 		/* Will unlock and free name. */
478 		pthread__reap(newthread);
479 		return ret;
480 	}
481 
482 	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
483 		if (p != NULL) {
484 			(void)pthread_setschedparam(newthread, p->ptap_policy,
485 			    &p->ptap_sp);
486 		}
487 		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
488 			(void)_lwp_continue(newthread->pt_lid);
489 		}
490 	}
491 
492 	*thread = newthread;
493 
494 	return 0;
495 }
496 
497 
498 __dead static void
499 pthread__create_tramp(void *cookie)
500 {
501 	pthread_t self;
502 	void *retval;
503 
504 	self = cookie;
505 
506 	/*
507 	 * Throw away some stack in a feeble attempt to reduce cache
508 	 * thrash.  May help for SMT processors.  XXX We should not
509 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
510 	 * thread register or decent thread local storage.
511 	 *
512 	 * Note that we may race with the kernel in _lwp_create(),
513 	 * and so pt_lid can be unset at this point, but we don't
514 	 * care.
515 	 */
516 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
517 
518 	if (self->pt_name != NULL) {
519 		pthread_mutex_lock(&self->pt_lock);
520 		if (self->pt_name != NULL)
521 			(void)_lwp_setname(0, self->pt_name);
522 		pthread_mutex_unlock(&self->pt_lock);
523 	}
524 
525 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
526 		err(1, "_lwp_ctl");
527 	}
528 
529 	retval = (*self->pt_func)(self->pt_arg);
530 
531 	pthread_exit(retval);
532 
533 	/*NOTREACHED*/
534 	pthread__abort();
535 }
536 
537 int
538 pthread_suspend_np(pthread_t thread)
539 {
540 	pthread_t self;
541 
542 	self = pthread__self();
543 	if (self == thread) {
544 		return EDEADLK;
545 	}
546 	if (pthread__find(thread) != 0)
547 		return ESRCH;
548 	if (_lwp_suspend(thread->pt_lid) == 0)
549 		return 0;
550 	return errno;
551 }
552 
553 int
554 pthread_resume_np(pthread_t thread)
555 {
556 
557 	if (pthread__find(thread) != 0)
558 		return ESRCH;
559 	if (_lwp_continue(thread->pt_lid) == 0)
560 		return 0;
561 	return errno;
562 }
563 
564 void
565 pthread_exit(void *retval)
566 {
567 	pthread_t self;
568 	struct pt_clean_t *cleanup;
569 	char *name;
570 
571 	self = pthread__self();
572 
573 	/* Disable cancellability. */
574 	pthread_mutex_lock(&self->pt_lock);
575 	self->pt_flags |= PT_FLAG_CS_DISABLED;
576 	self->pt_cancel = 0;
577 
578 	/* Call any cancellation cleanup handlers */
579 	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
580 		pthread_mutex_unlock(&self->pt_lock);
581 		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
582 			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
583 			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
584 			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
585 		}
586 		pthread_mutex_lock(&self->pt_lock);
587 	}
588 
589 	/* Perform cleanup of thread-specific data */
590 	pthread__destroy_tsd(self);
591 
592 	/* Signal our exit. */
593 	self->pt_exitval = retval;
594 	if (self->pt_flags & PT_FLAG_DETACHED) {
595 		self->pt_state = PT_STATE_DEAD;
596 		name = self->pt_name;
597 		self->pt_name = NULL;
598 		pthread_mutex_unlock(&self->pt_lock);
599 		if (name != NULL)
600 			free(name);
601 		pthread_mutex_lock(&pthread__deadqueue_lock);
602 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
603 		pthread_mutex_unlock(&pthread__deadqueue_lock);
604 		_lwp_exit();
605 	} else {
606 		self->pt_state = PT_STATE_ZOMBIE;
607 		pthread_cond_broadcast(&self->pt_joiners);
608 		pthread_mutex_unlock(&self->pt_lock);
609 		/* Note: name will be freed by the joiner. */
610 		_lwp_exit();
611 	}
612 
613 	/*NOTREACHED*/
614 	pthread__abort();
615 	exit(1);
616 }
617 
618 
619 int
620 pthread_join(pthread_t thread, void **valptr)
621 {
622 	pthread_t self;
623 	int error;
624 
625 	self = pthread__self();
626 
627 	if (pthread__find(thread) != 0)
628 		return ESRCH;
629 
630 	if (thread->pt_magic != PT_MAGIC)
631 		return EINVAL;
632 
633 	if (thread == self)
634 		return EDEADLK;
635 
636 	self->pt_droplock = &thread->pt_lock;
637 	pthread_mutex_lock(&thread->pt_lock);
638 	for (;;) {
639 		if (thread->pt_state == PT_STATE_ZOMBIE)
640 			break;
641 		if (thread->pt_state == PT_STATE_DEAD) {
642 			pthread_mutex_unlock(&thread->pt_lock);
643 			self->pt_droplock = NULL;
644 			return ESRCH;
645 		}
646 		if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
647 			pthread_mutex_unlock(&thread->pt_lock);
648 			self->pt_droplock = NULL;
649 			return EINVAL;
650 		}
651 		error = pthread_cond_wait(&thread->pt_joiners,
652 		    &thread->pt_lock);
653 		if (error != 0) {
654 			pthread__errorfunc(__FILE__, __LINE__,
655 			    __func__, "unexpected return from cond_wait()");
656 		}
657 
658 	}
659 	pthread__testcancel(self);
660 	if (valptr != NULL)
661 		*valptr = thread->pt_exitval;
662 	/* pthread__reap() will drop the lock. */
663 	pthread__reap(thread);
664 	self->pt_droplock = NULL;
665 
666 	return 0;
667 }
668 
669 static void
670 pthread__reap(pthread_t thread)
671 {
672 	char *name;
673 
674 	name = thread->pt_name;
675 	thread->pt_name = NULL;
676 	thread->pt_state = PT_STATE_DEAD;
677 	pthread_mutex_unlock(&thread->pt_lock);
678 
679 	pthread_mutex_lock(&pthread__deadqueue_lock);
680 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
681 	pthread_mutex_unlock(&pthread__deadqueue_lock);
682 
683 	if (name != NULL)
684 		free(name);
685 }
686 
687 int
688 pthread_equal(pthread_t t1, pthread_t t2)
689 {
690 
691 	/* Nothing special here. */
692 	return (t1 == t2);
693 }
694 
695 
696 int
697 pthread_detach(pthread_t thread)
698 {
699 
700 	if (pthread__find(thread) != 0)
701 		return ESRCH;
702 
703 	if (thread->pt_magic != PT_MAGIC)
704 		return EINVAL;
705 
706 	pthread_mutex_lock(&thread->pt_lock);
707 	thread->pt_flags |= PT_FLAG_DETACHED;
708 	if (thread->pt_state == PT_STATE_ZOMBIE) {
709 		/* pthread__reap() will drop the lock. */
710 		pthread__reap(thread);
711 	} else {
712 		/*
713 		 * Not valid for threads to be waiting in
714 		 * pthread_join() (there are intractable
715 		 * sync issues from the application
716 		 * perspective), but give those threads
717 		 * a chance anyway.
718 		 */
719 		pthread_cond_broadcast(&thread->pt_joiners);
720 		pthread_mutex_unlock(&thread->pt_lock);
721 	}
722 
723 	return 0;
724 }
725 
726 
727 int
728 pthread_getname_np(pthread_t thread, char *name, size_t len)
729 {
730 
731 	if (pthread__find(thread) != 0)
732 		return ESRCH;
733 
734 	if (thread->pt_magic != PT_MAGIC)
735 		return EINVAL;
736 
737 	pthread_mutex_lock(&thread->pt_lock);
738 	if (thread->pt_name == NULL)
739 		name[0] = '\0';
740 	else
741 		strlcpy(name, thread->pt_name, len);
742 	pthread_mutex_unlock(&thread->pt_lock);
743 
744 	return 0;
745 }
746 
747 
748 int
749 pthread_setname_np(pthread_t thread, const char *name, void *arg)
750 {
751 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
752 	int namelen;
753 
754 	if (pthread__find(thread) != 0)
755 		return ESRCH;
756 
757 	if (thread->pt_magic != PT_MAGIC)
758 		return EINVAL;
759 
760 	namelen = snprintf(newname, sizeof(newname), name, arg);
761 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
762 		return EINVAL;
763 
764 	cp = strdup(newname);
765 	if (cp == NULL)
766 		return ENOMEM;
767 
768 	pthread_mutex_lock(&thread->pt_lock);
769 	oldname = thread->pt_name;
770 	thread->pt_name = cp;
771 	(void)_lwp_setname(thread->pt_lid, cp);
772 	pthread_mutex_unlock(&thread->pt_lock);
773 
774 	if (oldname != NULL)
775 		free(oldname);
776 
777 	return 0;
778 }
779 
780 
781 
782 /*
783  * XXX There should be a way for applications to use the efficent
784  *  inline version, but there are opacity/namespace issues.
785  */
786 pthread_t
787 pthread_self(void)
788 {
789 
790 	return pthread__self();
791 }
792 
793 
794 int
795 pthread_cancel(pthread_t thread)
796 {
797 
798 	if (pthread__find(thread) != 0)
799 		return ESRCH;
800 	pthread_mutex_lock(&thread->pt_lock);
801 	thread->pt_flags |= PT_FLAG_CS_PENDING;
802 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
803 		thread->pt_cancel = 1;
804 		pthread_mutex_unlock(&thread->pt_lock);
805 		_lwp_wakeup(thread->pt_lid);
806 	} else
807 		pthread_mutex_unlock(&thread->pt_lock);
808 
809 	return 0;
810 }
811 
812 
813 int
814 pthread_setcancelstate(int state, int *oldstate)
815 {
816 	pthread_t self;
817 	int retval;
818 
819 	self = pthread__self();
820 	retval = 0;
821 
822 	pthread_mutex_lock(&self->pt_lock);
823 
824 	if (oldstate != NULL) {
825 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
826 			*oldstate = PTHREAD_CANCEL_DISABLE;
827 		else
828 			*oldstate = PTHREAD_CANCEL_ENABLE;
829 	}
830 
831 	if (state == PTHREAD_CANCEL_DISABLE) {
832 		self->pt_flags |= PT_FLAG_CS_DISABLED;
833 		if (self->pt_cancel) {
834 			self->pt_flags |= PT_FLAG_CS_PENDING;
835 			self->pt_cancel = 0;
836 		}
837 	} else if (state == PTHREAD_CANCEL_ENABLE) {
838 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
839 		/*
840 		 * If a cancellation was requested while cancellation
841 		 * was disabled, note that fact for future
842 		 * cancellation tests.
843 		 */
844 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
845 			self->pt_cancel = 1;
846 			/* This is not a deferred cancellation point. */
847 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
848 				pthread_mutex_unlock(&self->pt_lock);
849 				pthread__cancelled();
850 			}
851 		}
852 	} else
853 		retval = EINVAL;
854 
855 	pthread_mutex_unlock(&self->pt_lock);
856 
857 	return retval;
858 }
859 
860 
861 int
862 pthread_setcanceltype(int type, int *oldtype)
863 {
864 	pthread_t self;
865 	int retval;
866 
867 	self = pthread__self();
868 	retval = 0;
869 
870 	pthread_mutex_lock(&self->pt_lock);
871 
872 	if (oldtype != NULL) {
873 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
874 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
875 		else
876 			*oldtype = PTHREAD_CANCEL_DEFERRED;
877 	}
878 
879 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
880 		self->pt_flags |= PT_FLAG_CS_ASYNC;
881 		if (self->pt_cancel) {
882 			pthread_mutex_unlock(&self->pt_lock);
883 			pthread__cancelled();
884 		}
885 	} else if (type == PTHREAD_CANCEL_DEFERRED)
886 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
887 	else
888 		retval = EINVAL;
889 
890 	pthread_mutex_unlock(&self->pt_lock);
891 
892 	return retval;
893 }
894 
895 
896 void
897 pthread_testcancel(void)
898 {
899 	pthread_t self;
900 
901 	self = pthread__self();
902 	if (self->pt_cancel)
903 		pthread__cancelled();
904 }
905 
906 
907 /*
908  * POSIX requires that certain functions return an error rather than
909  * invoking undefined behavior even when handed completely bogus
910  * pthread_t values, e.g. stack garbage or (pthread_t)666. This
911  * utility routine searches the list of threads for the pthread_t
912  * value without dereferencing it.
913  */
914 int
915 pthread__find(pthread_t id)
916 {
917 	pthread_t target;
918 
919 	pthread_rwlock_rdlock(&pthread__alltree_lock);
920 	/* LINTED */
921 	target = RB_FIND(__pthread__alltree, &pthread__alltree, id);
922 	pthread_rwlock_unlock(&pthread__alltree_lock);
923 
924 	if (target == NULL || target->pt_state == PT_STATE_DEAD)
925 		return ESRCH;
926 
927 	return 0;
928 }
929 
930 
931 void
932 pthread__testcancel(pthread_t self)
933 {
934 
935 	if (self->pt_cancel)
936 		pthread__cancelled();
937 }
938 
939 
940 void
941 pthread__cancelled(void)
942 {
943 	pthread_mutex_t *droplock;
944 	pthread_t self;
945 
946 	self = pthread__self();
947 	droplock = self->pt_droplock;
948 	self->pt_droplock = NULL;
949 
950 	if (droplock != NULL && pthread_mutex_held_np(droplock))
951 		pthread_mutex_unlock(droplock);
952 
953 	pthread_exit(PTHREAD_CANCELED);
954 }
955 
956 
957 void
958 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
959 {
960 	pthread_t self;
961 	struct pt_clean_t *entry;
962 
963 	self = pthread__self();
964 	entry = store;
965 	entry->ptc_cleanup = cleanup;
966 	entry->ptc_arg = arg;
967 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
968 }
969 
970 
971 void
972 pthread__cleanup_pop(int ex, void *store)
973 {
974 	pthread_t self;
975 	struct pt_clean_t *entry;
976 
977 	self = pthread__self();
978 	entry = store;
979 
980 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
981 	if (ex)
982 		(*entry->ptc_cleanup)(entry->ptc_arg);
983 }
984 
985 
986 int *
987 pthread__errno(void)
988 {
989 	pthread_t self;
990 
991 	self = pthread__self();
992 
993 	return &(self->pt_errno);
994 }
995 
996 ssize_t	_sys_write(int, const void *, size_t);
997 
998 void
999 pthread__assertfunc(const char *file, int line, const char *function,
1000 		    const char *expr)
1001 {
1002 	char buf[1024];
1003 	int len;
1004 
1005 	/*
1006 	 * snprintf should not acquire any locks, or we could
1007 	 * end up deadlocked if the assert caller held locks.
1008 	 */
1009 	len = snprintf(buf, 1024,
1010 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
1011 	    expr, file, line,
1012 	    function ? ", function \"" : "",
1013 	    function ? function : "",
1014 	    function ? "\"" : "");
1015 
1016 	_sys_write(STDERR_FILENO, buf, (size_t)len);
1017 	(void)kill(getpid(), SIGABRT);
1018 
1019 	_exit(1);
1020 }
1021 
1022 
1023 void
1024 pthread__errorfunc(const char *file, int line, const char *function,
1025 		   const char *msg)
1026 {
1027 	char buf[1024];
1028 	size_t len;
1029 
1030 	if (pthread__diagassert == 0)
1031 		return;
1032 
1033 	/*
1034 	 * snprintf should not acquire any locks, or we could
1035 	 * end up deadlocked if the assert caller held locks.
1036 	 */
1037 	len = snprintf(buf, 1024,
1038 	    "%s: Error detected by libpthread: %s.\n"
1039 	    "Detected by file \"%s\", line %d%s%s%s.\n"
1040 	    "See pthread(3) for information.\n",
1041 	    getprogname(), msg, file, line,
1042 	    function ? ", function \"" : "",
1043 	    function ? function : "",
1044 	    function ? "\"" : "");
1045 
1046 	if (pthread__diagassert & DIAGASSERT_STDERR)
1047 		_sys_write(STDERR_FILENO, buf, len);
1048 
1049 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
1050 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
1051 
1052 	if (pthread__diagassert & DIAGASSERT_ABORT) {
1053 		(void)kill(getpid(), SIGABRT);
1054 		_exit(1);
1055 	}
1056 }
1057 
1058 /*
1059  * Thread park/unpark operations.  The kernel operations are
1060  * modelled after a brief description from "Multithreading in
1061  * the Solaris Operating Environment":
1062  *
1063  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1064  */
1065 
1066 #define	OOPS(msg)			\
1067     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
1068 
1069 int
1070 pthread__park(pthread_t self, pthread_mutex_t *lock,
1071 	      pthread_queue_t *queue, const struct timespec *abstime,
1072 	      int cancelpt, const void *hint)
1073 {
1074 	int rv, error;
1075 	void *obj;
1076 
1077 	/*
1078 	 * For non-interlocked release of mutexes we need a store
1079 	 * barrier before incrementing pt_blocking away from zero.
1080 	 * This is provided by pthread_mutex_unlock().
1081 	 */
1082 	self->pt_willpark = 1;
1083 	pthread_mutex_unlock(lock);
1084 	self->pt_willpark = 0;
1085 	self->pt_blocking++;
1086 
1087 	/*
1088 	 * Wait until we are awoken by a pending unpark operation,
1089 	 * a signal, an unpark posted after we have gone asleep,
1090 	 * or an expired timeout.
1091 	 *
1092 	 * It is fine to test the value of pt_sleepobj without
1093 	 * holding any locks, because:
1094 	 *
1095 	 * o Only the blocking thread (this thread) ever sets them
1096 	 *   to a non-NULL value.
1097 	 *
1098 	 * o Other threads may set them NULL, but if they do so they
1099 	 *   must also make this thread return from _lwp_park.
1100 	 *
1101 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1102 	 *   calls and all make use of spinlocks in the kernel.  So
1103 	 *   these system calls act as full memory barriers, and will
1104 	 *   ensure that the calling CPU's store buffers are drained.
1105 	 *   In combination with the spinlock release before unpark,
1106 	 *   this means that modification of pt_sleepobj/onq by another
1107 	 *   thread will become globally visible before that thread
1108 	 *   schedules an unpark operation on this thread.
1109 	 *
1110 	 * Note: the test in the while() statement dodges the park op if
1111 	 * we have already been awoken, unless there is another thread to
1112 	 * awaken.  This saves a syscall - if we were already awakened,
1113 	 * the next call to _lwp_park() would need to return early in order
1114 	 * to eat the previous wakeup.
1115 	 */
1116 	rv = 0;
1117 	do {
1118 		/*
1119 		 * If we deferred unparking a thread, arrange to
1120 		 * have _lwp_park() restart it before blocking.
1121 		 */
1122 		error = _lwp_park(abstime, self->pt_unpark, hint, hint);
1123 		self->pt_unpark = 0;
1124 		if (error != 0) {
1125 			switch (rv = errno) {
1126 			case EINTR:
1127 			case EALREADY:
1128 				rv = 0;
1129 				break;
1130 			case ETIMEDOUT:
1131 				break;
1132 			default:
1133 				OOPS("_lwp_park failed");
1134 				break;
1135 			}
1136 		}
1137 		/* Check for cancellation. */
1138 		if (cancelpt && self->pt_cancel)
1139 			rv = EINTR;
1140 	} while (self->pt_sleepobj != NULL && rv == 0);
1141 
1142 	/*
1143 	 * If we have been awoken early but are still on the queue,
1144 	 * then remove ourself.  Again, it's safe to do the test
1145 	 * without holding any locks.
1146 	 */
1147 	if (__predict_false(self->pt_sleepobj != NULL)) {
1148 		pthread_mutex_lock(lock);
1149 		if ((obj = self->pt_sleepobj) != NULL) {
1150 			PTQ_REMOVE(queue, self, pt_sleep);
1151 			self->pt_sleepobj = NULL;
1152 			if (obj != NULL && self->pt_early != NULL)
1153 				(*self->pt_early)(obj);
1154 		}
1155 		pthread_mutex_unlock(lock);
1156 	}
1157 	self->pt_early = NULL;
1158 	self->pt_blocking--;
1159 	membar_sync();
1160 
1161 	return rv;
1162 }
1163 
1164 void
1165 pthread__unpark(pthread_queue_t *queue, pthread_t self,
1166 		pthread_mutex_t *interlock)
1167 {
1168 	pthread_t target;
1169 	u_int max;
1170 	size_t nwaiters;
1171 
1172 	max = pthread__unpark_max;
1173 	nwaiters = self->pt_nwaiters;
1174 	target = PTQ_FIRST(queue);
1175 	if (nwaiters == max) {
1176 		/* Overflow. */
1177 		(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1178 		    __UNVOLATILE(&interlock->ptm_waiters));
1179 		nwaiters = 0;
1180 	}
1181 	target->pt_sleepobj = NULL;
1182 	self->pt_waiters[nwaiters++] = target->pt_lid;
1183 	PTQ_REMOVE(queue, target, pt_sleep);
1184 	self->pt_nwaiters = nwaiters;
1185 	pthread__mutex_deferwake(self, interlock);
1186 }
1187 
1188 void
1189 pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1190 		    pthread_mutex_t *interlock)
1191 {
1192 	pthread_t target;
1193 	u_int max;
1194 	size_t nwaiters;
1195 
1196 	max = pthread__unpark_max;
1197 	nwaiters = self->pt_nwaiters;
1198 	PTQ_FOREACH(target, queue, pt_sleep) {
1199 		if (nwaiters == max) {
1200 			/* Overflow. */
1201 			(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1202 			    __UNVOLATILE(&interlock->ptm_waiters));
1203 			nwaiters = 0;
1204 		}
1205 		target->pt_sleepobj = NULL;
1206 		self->pt_waiters[nwaiters++] = target->pt_lid;
1207 	}
1208 	self->pt_nwaiters = nwaiters;
1209 	PTQ_INIT(queue);
1210 	pthread__mutex_deferwake(self, interlock);
1211 }
1212 
1213 #undef	OOPS
1214 
1215 static void
1216 pthread__initmainstack(void)
1217 {
1218 	struct rlimit slimit;
1219 	const AuxInfo *aux;
1220 	size_t size;
1221 
1222 	_DIAGASSERT(_dlauxinfo() != NULL);
1223 
1224 	if (getrlimit(RLIMIT_STACK, &slimit) == -1)
1225 		err(1, "Couldn't get stack resource consumption limits");
1226 	size = slimit.rlim_cur;
1227 	pthread__main.pt_stack.ss_size = size;
1228 
1229 	for (aux = _dlauxinfo(); aux->a_type != AT_NULL; ++aux) {
1230 		if (aux->a_type == AT_STACKBASE) {
1231 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1232 #ifdef __MACHINE_STACK_GROWS_UP
1233 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1234 #else
1235 			pthread__main.pt_stack.ss_sp = (char *)aux->a_v - size;
1236 #endif
1237 			break;
1238 		}
1239 	}
1240 }
1241 
1242 /*
1243  * Set up the slightly special stack for the "initial" thread, which
1244  * runs on the normal system stack, and thus gets slightly different
1245  * treatment.
1246  */
1247 static void
1248 pthread__initmain(pthread_t *newt)
1249 {
1250 	char *value;
1251 
1252 	pthread__initmainstack();
1253 
1254 	value = pthread__getenv("PTHREAD_STACKSIZE");
1255 	if (value != NULL) {
1256 		pthread__stacksize = atoi(value) * 1024;
1257 		if (pthread__stacksize > pthread__main.pt_stack.ss_size)
1258 			pthread__stacksize = pthread__main.pt_stack.ss_size;
1259 	}
1260 	if (pthread__stacksize == 0)
1261 		pthread__stacksize = pthread__main.pt_stack.ss_size;
1262 	pthread__stacksize += pthread__pagesize - 1;
1263 	pthread__stacksize &= ~pthread__pagesize;
1264 	if (pthread__stacksize < 4 * pthread__pagesize)
1265 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1266 		    4 * pthread__pagesize / 1024);
1267 
1268 	*newt = &pthread__main;
1269 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
1270 #  ifdef __HAVE___LWP_GETTCB_FAST
1271 	pthread__main.pt_tls = __lwp_gettcb_fast();
1272 #  else
1273 	pthread__main.pt_tls = _lwp_getprivate();
1274 #  endif
1275 	pthread__main.pt_tls->tcb_pthread = &pthread__main;
1276 #endif
1277 }
1278 
1279 #ifndef lint
1280 static int
1281 pthread__cmp(struct __pthread_st *a, struct __pthread_st *b)
1282 {
1283 
1284 	if ((uintptr_t)a < (uintptr_t)b)
1285 		return (-1);
1286 	else if (a == b)
1287 		return 0;
1288 	else
1289 		return 1;
1290 }
1291 RB_GENERATE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
1292 #endif
1293 
1294 /* Because getenv() wants to use locks. */
1295 char *
1296 pthread__getenv(const char *name)
1297 {
1298 	extern char **environ;
1299 	size_t l_name, offset;
1300 
1301 	l_name = strlen(name);
1302 	for (offset = 0; environ[offset] != NULL; offset++) {
1303 		if (strncmp(name, environ[offset], l_name) == 0 &&
1304 		    environ[offset][l_name] == '=') {
1305 			return environ[offset] + l_name + 1;
1306 		}
1307 	}
1308 
1309 	return NULL;
1310 }
1311 
1312 pthread_mutex_t *
1313 pthread__hashlock(volatile const void *p)
1314 {
1315 	uintptr_t v;
1316 
1317 	v = (uintptr_t)p;
1318 	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1319 }
1320 
1321 int
1322 pthread__checkpri(int pri)
1323 {
1324 	static int havepri;
1325 	static long min, max;
1326 
1327 	if (!havepri) {
1328 		min = sysconf(_SC_SCHED_PRI_MIN);
1329 		max = sysconf(_SC_SCHED_PRI_MAX);
1330 		havepri = 1;
1331 	}
1332 	return (pri < min || pri > max) ? EINVAL : 0;
1333 }
1334