xref: /netbsd-src/lib/libpthread/pthread.c (revision 82ad575716605df31379cf04a2f3efbc97b8a6f5)
1 /*	$NetBSD: pthread.c,v 1.139 2012/11/03 23:42:27 rmind Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: pthread.c,v 1.139 2012/11/03 23:42:27 rmind Exp $");
34 
35 #define	__EXPOSE_STACK	1
36 
37 #include <sys/param.h>
38 #include <sys/exec_elf.h>
39 #include <sys/mman.h>
40 #include <sys/lwp.h>
41 #include <sys/lwpctl.h>
42 #include <sys/tls.h>
43 
44 #include <assert.h>
45 #include <dlfcn.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <lwp.h>
49 #include <signal.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <stddef.h>
53 #include <string.h>
54 #include <syslog.h>
55 #include <ucontext.h>
56 #include <unistd.h>
57 #include <sched.h>
58 
59 #include "pthread.h"
60 #include "pthread_int.h"
61 
62 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
63 static rb_tree_t	pthread__alltree;
64 
65 static signed int	pthread__cmp(void *, const void *, const void *);
66 
67 static const rb_tree_ops_t pthread__alltree_ops = {
68 	.rbto_compare_nodes = pthread__cmp,
69 	.rbto_compare_key = pthread__cmp,
70 	.rbto_node_offset = offsetof(struct __pthread_st, pt_alltree),
71 	.rbto_context = NULL
72 };
73 
74 static void	pthread__create_tramp(void *);
75 static void	pthread__initthread(pthread_t);
76 static void	pthread__scrubthread(pthread_t, char *, int);
77 static void	pthread__initmain(pthread_t *);
78 static void	pthread__fork_callback(void);
79 static void	pthread__reap(pthread_t);
80 static void	pthread__child_callback(void);
81 static void	pthread__start(void);
82 
83 void	pthread__init(void);
84 
85 int pthread__started;
86 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
87 pthread_queue_t pthread__deadqueue;
88 pthread_queue_t pthread__allqueue;
89 
90 static pthread_attr_t pthread_default_attr;
91 static lwpctl_t pthread__dummy_lwpctl = { .lc_curcpu = LWPCTL_CPU_NONE };
92 
93 enum {
94 	DIAGASSERT_ABORT =	1<<0,
95 	DIAGASSERT_STDERR =	1<<1,
96 	DIAGASSERT_SYSLOG =	1<<2
97 };
98 
99 static int pthread__diagassert;
100 
101 int pthread__concurrency;
102 int pthread__nspins;
103 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
104 int pthread__dbg;	/* set by libpthread_dbg if active */
105 
106 /*
107  * We have to initialize the pthread_stack* variables here because
108  * mutexes are used before pthread_init() and thus pthread__initmain()
109  * are called.  Since mutexes only save the stack pointer and not a
110  * pointer to the thread data, it is safe to change the mapping from
111  * stack pointer to thread data afterwards.
112  */
113 size_t	pthread__stacksize;
114 size_t	pthread__pagesize;
115 static struct __pthread_st pthread__main;
116 
117 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
118 
119 __strong_alias(__libc_thr_self,pthread_self)
120 __strong_alias(__libc_thr_create,pthread_create)
121 __strong_alias(__libc_thr_exit,pthread_exit)
122 __strong_alias(__libc_thr_errno,pthread__errno)
123 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
124 __strong_alias(__libc_thr_equal,pthread_equal)
125 __strong_alias(__libc_thr_init,pthread__init)
126 
127 /*
128  * Static library kludge.  Place a reference to a symbol any library
129  * file which does not already have a reference here.
130  */
131 extern int pthread__cancel_stub_binder;
132 
133 void *pthread__static_lib_binder[] = {
134 	&pthread__cancel_stub_binder,
135 	pthread_cond_init,
136 	pthread_mutex_init,
137 	pthread_rwlock_init,
138 	pthread_barrier_init,
139 	pthread_key_create,
140 	pthread_setspecific,
141 };
142 
143 #define	NHASHLOCK	64
144 
145 static union hashlock {
146 	pthread_mutex_t	mutex;
147 	char		pad[64];
148 } hashlocks[NHASHLOCK] __aligned(64);
149 
150 /*
151  * This needs to be started by the library loading code, before main()
152  * gets to run, for various things that use the state of the initial thread
153  * to work properly (thread-specific data is an application-visible example;
154  * spinlock counts for mutexes is an internal example).
155  */
156 void
157 pthread__init(void)
158 {
159 	pthread_t first;
160 	char *p;
161 	int i;
162 	extern int __isthreaded;
163 
164 	pthread__pagesize = (size_t)sysconf(_SC_PAGESIZE);
165 	pthread__concurrency = (int)sysconf(_SC_NPROCESSORS_CONF);
166 
167 	/* Initialize locks first; they're needed elsewhere. */
168 	pthread__lockprim_init();
169 	for (i = 0; i < NHASHLOCK; i++) {
170 		pthread_mutex_init(&hashlocks[i].mutex, NULL);
171 	}
172 
173 	/* Fetch parameters. */
174 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
175 	if (i == -1)
176 		err(1, "_lwp_unpark_all");
177 	if (i < pthread__unpark_max)
178 		pthread__unpark_max = i;
179 
180 	/* Basic data structure setup */
181 	pthread_attr_init(&pthread_default_attr);
182 	PTQ_INIT(&pthread__allqueue);
183 	PTQ_INIT(&pthread__deadqueue);
184 
185 	rb_tree_init(&pthread__alltree, &pthread__alltree_ops);
186 
187 	/* Create the thread structure corresponding to main() */
188 	pthread__initmain(&first);
189 	pthread__initthread(first);
190 	pthread__scrubthread(first, NULL, 0);
191 
192 	first->pt_lid = _lwp_self();
193 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
194 	(void)rb_tree_insert_node(&pthread__alltree, first);
195 
196 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &first->pt_lwpctl) != 0) {
197 		err(1, "_lwp_ctl");
198 	}
199 
200 	/* Start subsystems */
201 	PTHREAD_MD_INIT
202 
203 	for (p = pthread__getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
204 		switch (*p) {
205 		case 'a':
206 			pthread__diagassert |= DIAGASSERT_ABORT;
207 			break;
208 		case 'A':
209 			pthread__diagassert &= ~DIAGASSERT_ABORT;
210 			break;
211 		case 'e':
212 			pthread__diagassert |= DIAGASSERT_STDERR;
213 			break;
214 		case 'E':
215 			pthread__diagassert &= ~DIAGASSERT_STDERR;
216 			break;
217 		case 'l':
218 			pthread__diagassert |= DIAGASSERT_SYSLOG;
219 			break;
220 		case 'L':
221 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
222 			break;
223 		}
224 	}
225 
226 	/* Tell libc that we're here and it should role-play accordingly. */
227 	pthread_atfork(NULL, NULL, pthread__fork_callback);
228 	__isthreaded = 1;
229 }
230 
231 static void
232 pthread__fork_callback(void)
233 {
234 	struct __pthread_st *self = pthread__self();
235 
236 	/* lwpctl state is not copied across fork. */
237 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
238 		err(1, "_lwp_ctl");
239 	}
240 	self->pt_lid = _lwp_self();
241 }
242 
243 static void
244 pthread__child_callback(void)
245 {
246 
247 	/*
248 	 * Clean up data structures that a forked child process might
249 	 * trip over. Note that if threads have been created (causing
250 	 * this handler to be registered) the standards say that the
251 	 * child will trigger undefined behavior if it makes any
252 	 * pthread_* calls (or any other calls that aren't
253 	 * async-signal-safe), so we don't really have to clean up
254 	 * much. Anything that permits some pthread_* calls to work is
255 	 * merely being polite.
256 	 */
257 	pthread__started = 0;
258 }
259 
260 static void
261 pthread__start(void)
262 {
263 
264 	/*
265 	 * Per-process timers are cleared by fork(); despite the
266 	 * various restrictions on fork() and threads, it's legal to
267 	 * fork() before creating any threads.
268 	 */
269 	pthread_atfork(NULL, NULL, pthread__child_callback);
270 }
271 
272 
273 /* General-purpose thread data structure sanitization. */
274 /* ARGSUSED */
275 static void
276 pthread__initthread(pthread_t t)
277 {
278 
279 	t->pt_self = t;
280 	t->pt_magic = PT_MAGIC;
281 	t->pt_willpark = 0;
282 	t->pt_unpark = 0;
283 	t->pt_nwaiters = 0;
284 	t->pt_sleepobj = NULL;
285 	t->pt_signalled = 0;
286 	t->pt_havespecific = 0;
287 	t->pt_early = NULL;
288 	t->pt_lwpctl = &pthread__dummy_lwpctl;
289 	t->pt_blocking = 0;
290 	t->pt_droplock = NULL;
291 
292 	memcpy(&t->pt_lockops, pthread__lock_ops, sizeof(t->pt_lockops));
293 	pthread_mutex_init(&t->pt_lock, NULL);
294 	PTQ_INIT(&t->pt_cleanup_stack);
295 	pthread_cond_init(&t->pt_joiners, NULL);
296 	memset(&t->pt_specific, 0, sizeof(t->pt_specific));
297 }
298 
299 static void
300 pthread__scrubthread(pthread_t t, char *name, int flags)
301 {
302 
303 	t->pt_state = PT_STATE_RUNNING;
304 	t->pt_exitval = NULL;
305 	t->pt_flags = flags;
306 	t->pt_cancel = 0;
307 	t->pt_errno = 0;
308 	t->pt_name = name;
309 	t->pt_lid = 0;
310 }
311 
312 static int
313 pthread__getstack(pthread_t newthread, const pthread_attr_t *attr)
314 {
315 	void *stackbase, *stackbase2, *redzone;
316 	size_t stacksize, guardsize;
317 	bool allocated;
318 
319 	if (attr != NULL) {
320 		pthread_attr_getstack(attr, &stackbase, &stacksize);
321 	} else {
322 		stackbase = NULL;
323 		stacksize = 0;
324 	}
325 	if (stacksize == 0)
326 		stacksize = pthread__stacksize;
327 
328 	if (newthread->pt_stack_allocated) {
329 		if (stackbase == NULL &&
330 		    newthread->pt_stack.ss_size == stacksize)
331 			return 0;
332 		stackbase2 = newthread->pt_stack.ss_sp;
333 #ifndef __MACHINE_STACK_GROWS_UP
334 		stackbase2 = (char *)stackbase2 - newthread->pt_guardsize;
335 #endif
336 		munmap(stackbase2,
337 		    newthread->pt_stack.ss_size + newthread->pt_guardsize);
338 		newthread->pt_stack.ss_sp = NULL;
339 		newthread->pt_stack.ss_size = 0;
340 		newthread->pt_guardsize = 0;
341 		newthread->pt_stack_allocated = false;
342 	}
343 
344 	newthread->pt_stack_allocated = false;
345 
346 	if (stackbase == NULL) {
347 		stacksize = ((stacksize - 1) | (pthread__pagesize - 1)) + 1;
348 		guardsize = pthread__pagesize;
349 		stackbase = mmap(NULL, stacksize + guardsize,
350 		    PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, (off_t)0);
351 		if (stackbase == MAP_FAILED)
352 			return ENOMEM;
353 		allocated = true;
354 	} else {
355 		guardsize = 0;
356 		allocated = false;
357 	}
358 #ifdef __MACHINE_STACK_GROWS_UP
359 	redzone = (char *)stackbase + stacksize;
360 	stackbase2 = (char *)stackbase;
361 #else
362 	redzone = (char *)stackbase;
363 	stackbase2 = (char *)stackbase + guardsize;
364 #endif
365 	if (allocated && guardsize &&
366 	    mprotect(redzone, guardsize, PROT_NONE) == -1) {
367 		munmap(stackbase, stacksize + guardsize);
368 		return EPERM;
369 	}
370 	newthread->pt_stack.ss_size = stacksize;
371 	newthread->pt_stack.ss_sp = stackbase2;
372 	newthread->pt_guardsize = guardsize;
373 	newthread->pt_stack_allocated = allocated;
374 	return 0;
375 }
376 
377 int
378 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
379 	    void *(*startfunc)(void *), void *arg)
380 {
381 	pthread_t newthread;
382 	pthread_attr_t nattr;
383 	struct pthread_attr_private *p;
384 	char * volatile name;
385 	unsigned long flag;
386 	void *private_area;
387 	int ret;
388 
389 	/*
390 	 * It's okay to check this without a lock because there can
391 	 * only be one thread before it becomes true.
392 	 */
393 	if (pthread__started == 0) {
394 		pthread__start();
395 		pthread__started = 1;
396 	}
397 
398 	if (attr == NULL)
399 		nattr = pthread_default_attr;
400 	else if (attr->pta_magic == PT_ATTR_MAGIC)
401 		nattr = *attr;
402 	else
403 		return EINVAL;
404 
405 	/* Fetch misc. attributes from the attr structure. */
406 	name = NULL;
407 	if ((p = nattr.pta_private) != NULL)
408 		if (p->ptap_name[0] != '\0')
409 			if ((name = strdup(p->ptap_name)) == NULL)
410 				return ENOMEM;
411 
412 	newthread = NULL;
413 
414 	/*
415 	 * Try to reclaim a dead thread.
416 	 */
417 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
418 		pthread_mutex_lock(&pthread__deadqueue_lock);
419 		PTQ_FOREACH(newthread, &pthread__deadqueue, pt_deadq) {
420 			/* Still running? */
421 			if (newthread->pt_lwpctl->lc_curcpu ==
422 			    LWPCTL_CPU_EXITED ||
423 			    (_lwp_kill(newthread->pt_lid, 0) == -1 &&
424 			    errno == ESRCH))
425 				break;
426 		}
427 		if (newthread)
428 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
429 		pthread_mutex_unlock(&pthread__deadqueue_lock);
430 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
431 		if (newthread && newthread->pt_tls) {
432 			_rtld_tls_free(newthread->pt_tls);
433 			newthread->pt_tls = NULL;
434 		}
435 #endif
436 	}
437 
438 	/*
439 	 * If necessary set up a stack, allocate space for a pthread_st,
440 	 * and initialize it.
441 	 */
442 	if (newthread == NULL) {
443 		newthread = malloc(sizeof(*newthread));
444 		if (newthread == NULL) {
445 			free(name);
446 			return ENOMEM;
447 		}
448 		newthread->pt_stack_allocated = false;
449 
450 		if (pthread__getstack(newthread, attr)) {
451 			free(newthread);
452 			free(name);
453 			return ENOMEM;
454 		}
455 
456 		/* This is used only when creating the thread. */
457 		_INITCONTEXT_U(&newthread->pt_uc);
458 		newthread->pt_uc.uc_stack = newthread->pt_stack;
459 		newthread->pt_uc.uc_link = NULL;
460 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
461 		newthread->pt_tls = NULL;
462 #endif
463 
464 		/* Add to list of all threads. */
465 		pthread_rwlock_wrlock(&pthread__alltree_lock);
466 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
467 		(void)rb_tree_insert_node(&pthread__alltree, newthread);
468 		pthread_rwlock_unlock(&pthread__alltree_lock);
469 
470 		/* Will be reset by the thread upon exit. */
471 		pthread__initthread(newthread);
472 	} else {
473 		if (pthread__getstack(newthread, attr)) {
474 			pthread_mutex_lock(&pthread__deadqueue_lock);
475 			PTQ_INSERT_TAIL(&pthread__deadqueue, newthread, pt_deadq);
476 			pthread_mutex_unlock(&pthread__deadqueue_lock);
477 			return ENOMEM;
478 		}
479 		_INITCONTEXT_U(&newthread->pt_uc);
480 		newthread->pt_uc.uc_stack = newthread->pt_stack;
481 		newthread->pt_uc.uc_link = NULL;
482 	}
483 
484 	/*
485 	 * Create the new LWP.
486 	 */
487 	pthread__scrubthread(newthread, name, nattr.pta_flags);
488 	newthread->pt_func = startfunc;
489 	newthread->pt_arg = arg;
490 #if defined(__HAVE_TLS_VARIANT_I) || defined(__HAVE_TLS_VARIANT_II)
491 	private_area = newthread->pt_tls = _rtld_tls_allocate();
492 	newthread->pt_tls->tcb_pthread = newthread;
493 #else
494 	private_area = newthread;
495 #endif
496 
497 	_lwp_makecontext(&newthread->pt_uc, pthread__create_tramp,
498 	    newthread, private_area, newthread->pt_stack.ss_sp,
499 	    newthread->pt_stack.ss_size);
500 
501 	flag = LWP_DETACHED;
502 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0 ||
503 	    (nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0)
504 		flag |= LWP_SUSPENDED;
505 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
506 	if (ret != 0) {
507 		ret = errno;
508 		pthread_mutex_lock(&newthread->pt_lock);
509 		/* Will unlock and free name. */
510 		pthread__reap(newthread);
511 		return ret;
512 	}
513 
514 	if ((nattr.pta_flags & PT_FLAG_EXPLICIT_SCHED) != 0) {
515 		if (p != NULL) {
516 			(void)pthread_setschedparam(newthread, p->ptap_policy,
517 			    &p->ptap_sp);
518 		}
519 		if ((newthread->pt_flags & PT_FLAG_SUSPENDED) == 0) {
520 			(void)_lwp_continue(newthread->pt_lid);
521 		}
522 	}
523 
524 	*thread = newthread;
525 
526 	return 0;
527 }
528 
529 
530 __dead static void
531 pthread__create_tramp(void *cookie)
532 {
533 	pthread_t self;
534 	void *retval;
535 
536 	self = cookie;
537 
538 	/*
539 	 * Throw away some stack in a feeble attempt to reduce cache
540 	 * thrash.  May help for SMT processors.  XXX We should not
541 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
542 	 * thread register or decent thread local storage.
543 	 *
544 	 * Note that we may race with the kernel in _lwp_create(),
545 	 * and so pt_lid can be unset at this point, but we don't
546 	 * care.
547 	 */
548 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
549 
550 	if (self->pt_name != NULL) {
551 		pthread_mutex_lock(&self->pt_lock);
552 		if (self->pt_name != NULL)
553 			(void)_lwp_setname(0, self->pt_name);
554 		pthread_mutex_unlock(&self->pt_lock);
555 	}
556 
557 	if (_lwp_ctl(LWPCTL_FEATURE_CURCPU, &self->pt_lwpctl)) {
558 		err(1, "_lwp_ctl");
559 	}
560 
561 	retval = (*self->pt_func)(self->pt_arg);
562 
563 	pthread_exit(retval);
564 
565 	/*NOTREACHED*/
566 	pthread__abort();
567 }
568 
569 int
570 pthread_suspend_np(pthread_t thread)
571 {
572 	pthread_t self;
573 
574 	self = pthread__self();
575 	if (self == thread) {
576 		return EDEADLK;
577 	}
578 	if (pthread__find(thread) != 0)
579 		return ESRCH;
580 	if (_lwp_suspend(thread->pt_lid) == 0)
581 		return 0;
582 	return errno;
583 }
584 
585 int
586 pthread_resume_np(pthread_t thread)
587 {
588 
589 	if (pthread__find(thread) != 0)
590 		return ESRCH;
591 	if (_lwp_continue(thread->pt_lid) == 0)
592 		return 0;
593 	return errno;
594 }
595 
596 void
597 pthread_exit(void *retval)
598 {
599 	pthread_t self;
600 	struct pt_clean_t *cleanup;
601 	char *name;
602 
603 	self = pthread__self();
604 
605 	/* Disable cancellability. */
606 	pthread_mutex_lock(&self->pt_lock);
607 	self->pt_flags |= PT_FLAG_CS_DISABLED;
608 	self->pt_cancel = 0;
609 
610 	/* Call any cancellation cleanup handlers */
611 	if (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
612 		pthread_mutex_unlock(&self->pt_lock);
613 		while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
614 			cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
615 			PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
616 			(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
617 		}
618 		pthread_mutex_lock(&self->pt_lock);
619 	}
620 
621 	/* Perform cleanup of thread-specific data */
622 	pthread__destroy_tsd(self);
623 
624 	/* Signal our exit. */
625 	self->pt_exitval = retval;
626 	if (self->pt_flags & PT_FLAG_DETACHED) {
627 		self->pt_state = PT_STATE_DEAD;
628 		name = self->pt_name;
629 		self->pt_name = NULL;
630 		pthread_mutex_unlock(&self->pt_lock);
631 		if (name != NULL)
632 			free(name);
633 		pthread_mutex_lock(&pthread__deadqueue_lock);
634 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
635 		pthread_mutex_unlock(&pthread__deadqueue_lock);
636 		_lwp_exit();
637 	} else {
638 		self->pt_state = PT_STATE_ZOMBIE;
639 		pthread_cond_broadcast(&self->pt_joiners);
640 		pthread_mutex_unlock(&self->pt_lock);
641 		/* Note: name will be freed by the joiner. */
642 		_lwp_exit();
643 	}
644 
645 	/*NOTREACHED*/
646 	pthread__abort();
647 	exit(1);
648 }
649 
650 
651 int
652 pthread_join(pthread_t thread, void **valptr)
653 {
654 	pthread_t self;
655 	int error;
656 
657 	self = pthread__self();
658 
659 	if (pthread__find(thread) != 0)
660 		return ESRCH;
661 
662 	if (thread->pt_magic != PT_MAGIC)
663 		return EINVAL;
664 
665 	if (thread == self)
666 		return EDEADLK;
667 
668 	self->pt_droplock = &thread->pt_lock;
669 	pthread_mutex_lock(&thread->pt_lock);
670 	for (;;) {
671 		if (thread->pt_state == PT_STATE_ZOMBIE)
672 			break;
673 		if (thread->pt_state == PT_STATE_DEAD) {
674 			pthread_mutex_unlock(&thread->pt_lock);
675 			self->pt_droplock = NULL;
676 			return ESRCH;
677 		}
678 		if ((thread->pt_flags & PT_FLAG_DETACHED) != 0) {
679 			pthread_mutex_unlock(&thread->pt_lock);
680 			self->pt_droplock = NULL;
681 			return EINVAL;
682 		}
683 		error = pthread_cond_wait(&thread->pt_joiners,
684 		    &thread->pt_lock);
685 		if (error != 0) {
686 			pthread__errorfunc(__FILE__, __LINE__,
687 			    __func__, "unexpected return from cond_wait()");
688 		}
689 
690 	}
691 	pthread__testcancel(self);
692 	if (valptr != NULL)
693 		*valptr = thread->pt_exitval;
694 	/* pthread__reap() will drop the lock. */
695 	pthread__reap(thread);
696 	self->pt_droplock = NULL;
697 
698 	return 0;
699 }
700 
701 static void
702 pthread__reap(pthread_t thread)
703 {
704 	char *name;
705 
706 	name = thread->pt_name;
707 	thread->pt_name = NULL;
708 	thread->pt_state = PT_STATE_DEAD;
709 	pthread_mutex_unlock(&thread->pt_lock);
710 
711 	pthread_mutex_lock(&pthread__deadqueue_lock);
712 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
713 	pthread_mutex_unlock(&pthread__deadqueue_lock);
714 
715 	if (name != NULL)
716 		free(name);
717 }
718 
719 int
720 pthread_equal(pthread_t t1, pthread_t t2)
721 {
722 
723 	/* Nothing special here. */
724 	return (t1 == t2);
725 }
726 
727 
728 int
729 pthread_detach(pthread_t thread)
730 {
731 
732 	if (pthread__find(thread) != 0)
733 		return ESRCH;
734 
735 	if (thread->pt_magic != PT_MAGIC)
736 		return EINVAL;
737 
738 	pthread_mutex_lock(&thread->pt_lock);
739 	thread->pt_flags |= PT_FLAG_DETACHED;
740 	if (thread->pt_state == PT_STATE_ZOMBIE) {
741 		/* pthread__reap() will drop the lock. */
742 		pthread__reap(thread);
743 	} else {
744 		/*
745 		 * Not valid for threads to be waiting in
746 		 * pthread_join() (there are intractable
747 		 * sync issues from the application
748 		 * perspective), but give those threads
749 		 * a chance anyway.
750 		 */
751 		pthread_cond_broadcast(&thread->pt_joiners);
752 		pthread_mutex_unlock(&thread->pt_lock);
753 	}
754 
755 	return 0;
756 }
757 
758 
759 int
760 pthread_getname_np(pthread_t thread, char *name, size_t len)
761 {
762 
763 	if (pthread__find(thread) != 0)
764 		return ESRCH;
765 
766 	if (thread->pt_magic != PT_MAGIC)
767 		return EINVAL;
768 
769 	pthread_mutex_lock(&thread->pt_lock);
770 	if (thread->pt_name == NULL)
771 		name[0] = '\0';
772 	else
773 		strlcpy(name, thread->pt_name, len);
774 	pthread_mutex_unlock(&thread->pt_lock);
775 
776 	return 0;
777 }
778 
779 
780 int
781 pthread_setname_np(pthread_t thread, const char *name, void *arg)
782 {
783 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
784 	int namelen;
785 
786 	if (pthread__find(thread) != 0)
787 		return ESRCH;
788 
789 	if (thread->pt_magic != PT_MAGIC)
790 		return EINVAL;
791 
792 	namelen = snprintf(newname, sizeof(newname), name, arg);
793 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
794 		return EINVAL;
795 
796 	cp = strdup(newname);
797 	if (cp == NULL)
798 		return ENOMEM;
799 
800 	pthread_mutex_lock(&thread->pt_lock);
801 	oldname = thread->pt_name;
802 	thread->pt_name = cp;
803 	(void)_lwp_setname(thread->pt_lid, cp);
804 	pthread_mutex_unlock(&thread->pt_lock);
805 
806 	if (oldname != NULL)
807 		free(oldname);
808 
809 	return 0;
810 }
811 
812 
813 
814 /*
815  * XXX There should be a way for applications to use the efficent
816  *  inline version, but there are opacity/namespace issues.
817  */
818 pthread_t
819 pthread_self(void)
820 {
821 
822 	return pthread__self();
823 }
824 
825 
826 int
827 pthread_cancel(pthread_t thread)
828 {
829 
830 	if (pthread__find(thread) != 0)
831 		return ESRCH;
832 	pthread_mutex_lock(&thread->pt_lock);
833 	thread->pt_flags |= PT_FLAG_CS_PENDING;
834 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
835 		thread->pt_cancel = 1;
836 		pthread_mutex_unlock(&thread->pt_lock);
837 		_lwp_wakeup(thread->pt_lid);
838 	} else
839 		pthread_mutex_unlock(&thread->pt_lock);
840 
841 	return 0;
842 }
843 
844 
845 int
846 pthread_setcancelstate(int state, int *oldstate)
847 {
848 	pthread_t self;
849 	int retval;
850 
851 	self = pthread__self();
852 	retval = 0;
853 
854 	pthread_mutex_lock(&self->pt_lock);
855 
856 	if (oldstate != NULL) {
857 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
858 			*oldstate = PTHREAD_CANCEL_DISABLE;
859 		else
860 			*oldstate = PTHREAD_CANCEL_ENABLE;
861 	}
862 
863 	if (state == PTHREAD_CANCEL_DISABLE) {
864 		self->pt_flags |= PT_FLAG_CS_DISABLED;
865 		if (self->pt_cancel) {
866 			self->pt_flags |= PT_FLAG_CS_PENDING;
867 			self->pt_cancel = 0;
868 		}
869 	} else if (state == PTHREAD_CANCEL_ENABLE) {
870 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
871 		/*
872 		 * If a cancellation was requested while cancellation
873 		 * was disabled, note that fact for future
874 		 * cancellation tests.
875 		 */
876 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
877 			self->pt_cancel = 1;
878 			/* This is not a deferred cancellation point. */
879 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
880 				pthread_mutex_unlock(&self->pt_lock);
881 				pthread__cancelled();
882 			}
883 		}
884 	} else
885 		retval = EINVAL;
886 
887 	pthread_mutex_unlock(&self->pt_lock);
888 
889 	return retval;
890 }
891 
892 
893 int
894 pthread_setcanceltype(int type, int *oldtype)
895 {
896 	pthread_t self;
897 	int retval;
898 
899 	self = pthread__self();
900 	retval = 0;
901 
902 	pthread_mutex_lock(&self->pt_lock);
903 
904 	if (oldtype != NULL) {
905 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
906 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
907 		else
908 			*oldtype = PTHREAD_CANCEL_DEFERRED;
909 	}
910 
911 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
912 		self->pt_flags |= PT_FLAG_CS_ASYNC;
913 		if (self->pt_cancel) {
914 			pthread_mutex_unlock(&self->pt_lock);
915 			pthread__cancelled();
916 		}
917 	} else if (type == PTHREAD_CANCEL_DEFERRED)
918 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
919 	else
920 		retval = EINVAL;
921 
922 	pthread_mutex_unlock(&self->pt_lock);
923 
924 	return retval;
925 }
926 
927 
928 void
929 pthread_testcancel(void)
930 {
931 	pthread_t self;
932 
933 	self = pthread__self();
934 	if (self->pt_cancel)
935 		pthread__cancelled();
936 }
937 
938 
939 /*
940  * POSIX requires that certain functions return an error rather than
941  * invoking undefined behavior even when handed completely bogus
942  * pthread_t values, e.g. stack garbage.
943  */
944 int
945 pthread__find(pthread_t id)
946 {
947 	pthread_t target;
948 	int error;
949 
950 	pthread_rwlock_rdlock(&pthread__alltree_lock);
951 	target = rb_tree_find_node(&pthread__alltree, id);
952 	error = (target && target->pt_state != PT_STATE_DEAD) ? 0 : ESRCH;
953 	pthread_rwlock_unlock(&pthread__alltree_lock);
954 
955 	return error;
956 }
957 
958 
959 void
960 pthread__testcancel(pthread_t self)
961 {
962 
963 	if (self->pt_cancel)
964 		pthread__cancelled();
965 }
966 
967 
968 void
969 pthread__cancelled(void)
970 {
971 	pthread_mutex_t *droplock;
972 	pthread_t self;
973 
974 	self = pthread__self();
975 	droplock = self->pt_droplock;
976 	self->pt_droplock = NULL;
977 
978 	if (droplock != NULL && pthread_mutex_held_np(droplock))
979 		pthread_mutex_unlock(droplock);
980 
981 	pthread_exit(PTHREAD_CANCELED);
982 }
983 
984 
985 void
986 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
987 {
988 	pthread_t self;
989 	struct pt_clean_t *entry;
990 
991 	self = pthread__self();
992 	entry = store;
993 	entry->ptc_cleanup = cleanup;
994 	entry->ptc_arg = arg;
995 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
996 }
997 
998 
999 void
1000 pthread__cleanup_pop(int ex, void *store)
1001 {
1002 	pthread_t self;
1003 	struct pt_clean_t *entry;
1004 
1005 	self = pthread__self();
1006 	entry = store;
1007 
1008 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
1009 	if (ex)
1010 		(*entry->ptc_cleanup)(entry->ptc_arg);
1011 }
1012 
1013 
1014 int *
1015 pthread__errno(void)
1016 {
1017 	pthread_t self;
1018 
1019 	self = pthread__self();
1020 
1021 	return &(self->pt_errno);
1022 }
1023 
1024 ssize_t	_sys_write(int, const void *, size_t);
1025 
1026 void
1027 pthread__assertfunc(const char *file, int line, const char *function,
1028 		    const char *expr)
1029 {
1030 	char buf[1024];
1031 	int len;
1032 
1033 	/*
1034 	 * snprintf should not acquire any locks, or we could
1035 	 * end up deadlocked if the assert caller held locks.
1036 	 */
1037 	len = snprintf(buf, 1024,
1038 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
1039 	    expr, file, line,
1040 	    function ? ", function \"" : "",
1041 	    function ? function : "",
1042 	    function ? "\"" : "");
1043 
1044 	_sys_write(STDERR_FILENO, buf, (size_t)len);
1045 	(void)kill(getpid(), SIGABRT);
1046 
1047 	_exit(1);
1048 }
1049 
1050 
1051 void
1052 pthread__errorfunc(const char *file, int line, const char *function,
1053 		   const char *msg)
1054 {
1055 	char buf[1024];
1056 	size_t len;
1057 
1058 	if (pthread__diagassert == 0)
1059 		return;
1060 
1061 	/*
1062 	 * snprintf should not acquire any locks, or we could
1063 	 * end up deadlocked if the assert caller held locks.
1064 	 */
1065 	len = snprintf(buf, 1024,
1066 	    "%s: Error detected by libpthread: %s.\n"
1067 	    "Detected by file \"%s\", line %d%s%s%s.\n"
1068 	    "See pthread(3) for information.\n",
1069 	    getprogname(), msg, file, line,
1070 	    function ? ", function \"" : "",
1071 	    function ? function : "",
1072 	    function ? "\"" : "");
1073 
1074 	if (pthread__diagassert & DIAGASSERT_STDERR)
1075 		_sys_write(STDERR_FILENO, buf, len);
1076 
1077 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
1078 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
1079 
1080 	if (pthread__diagassert & DIAGASSERT_ABORT) {
1081 		(void)kill(getpid(), SIGABRT);
1082 		_exit(1);
1083 	}
1084 }
1085 
1086 /*
1087  * Thread park/unpark operations.  The kernel operations are
1088  * modelled after a brief description from "Multithreading in
1089  * the Solaris Operating Environment":
1090  *
1091  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
1092  */
1093 
1094 #define	OOPS(msg)			\
1095     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
1096 
1097 int
1098 pthread__park(pthread_t self, pthread_mutex_t *lock,
1099 	      pthread_queue_t *queue, const struct timespec *abstime,
1100 	      int cancelpt, const void *hint)
1101 {
1102 	int rv, error;
1103 	void *obj;
1104 
1105 	/*
1106 	 * For non-interlocked release of mutexes we need a store
1107 	 * barrier before incrementing pt_blocking away from zero.
1108 	 * This is provided by pthread_mutex_unlock().
1109 	 */
1110 	self->pt_willpark = 1;
1111 	pthread_mutex_unlock(lock);
1112 	self->pt_willpark = 0;
1113 	self->pt_blocking++;
1114 
1115 	/*
1116 	 * Wait until we are awoken by a pending unpark operation,
1117 	 * a signal, an unpark posted after we have gone asleep,
1118 	 * or an expired timeout.
1119 	 *
1120 	 * It is fine to test the value of pt_sleepobj without
1121 	 * holding any locks, because:
1122 	 *
1123 	 * o Only the blocking thread (this thread) ever sets them
1124 	 *   to a non-NULL value.
1125 	 *
1126 	 * o Other threads may set them NULL, but if they do so they
1127 	 *   must also make this thread return from _lwp_park.
1128 	 *
1129 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1130 	 *   calls and all make use of spinlocks in the kernel.  So
1131 	 *   these system calls act as full memory barriers, and will
1132 	 *   ensure that the calling CPU's store buffers are drained.
1133 	 *   In combination with the spinlock release before unpark,
1134 	 *   this means that modification of pt_sleepobj/onq by another
1135 	 *   thread will become globally visible before that thread
1136 	 *   schedules an unpark operation on this thread.
1137 	 *
1138 	 * Note: the test in the while() statement dodges the park op if
1139 	 * we have already been awoken, unless there is another thread to
1140 	 * awaken.  This saves a syscall - if we were already awakened,
1141 	 * the next call to _lwp_park() would need to return early in order
1142 	 * to eat the previous wakeup.
1143 	 */
1144 	rv = 0;
1145 	do {
1146 		/*
1147 		 * If we deferred unparking a thread, arrange to
1148 		 * have _lwp_park() restart it before blocking.
1149 		 */
1150 		error = _lwp_park(abstime, self->pt_unpark, hint, hint);
1151 		self->pt_unpark = 0;
1152 		if (error != 0) {
1153 			switch (rv = errno) {
1154 			case EINTR:
1155 			case EALREADY:
1156 				rv = 0;
1157 				break;
1158 			case ETIMEDOUT:
1159 				break;
1160 			default:
1161 				OOPS("_lwp_park failed");
1162 				break;
1163 			}
1164 		}
1165 		/* Check for cancellation. */
1166 		if (cancelpt && self->pt_cancel)
1167 			rv = EINTR;
1168 	} while (self->pt_sleepobj != NULL && rv == 0);
1169 
1170 	/*
1171 	 * If we have been awoken early but are still on the queue,
1172 	 * then remove ourself.  Again, it's safe to do the test
1173 	 * without holding any locks.
1174 	 */
1175 	if (__predict_false(self->pt_sleepobj != NULL)) {
1176 		pthread_mutex_lock(lock);
1177 		if ((obj = self->pt_sleepobj) != NULL) {
1178 			PTQ_REMOVE(queue, self, pt_sleep);
1179 			self->pt_sleepobj = NULL;
1180 			if (obj != NULL && self->pt_early != NULL)
1181 				(*self->pt_early)(obj);
1182 		}
1183 		pthread_mutex_unlock(lock);
1184 	}
1185 	self->pt_early = NULL;
1186 	self->pt_blocking--;
1187 	membar_sync();
1188 
1189 	return rv;
1190 }
1191 
1192 void
1193 pthread__unpark(pthread_queue_t *queue, pthread_t self,
1194 		pthread_mutex_t *interlock)
1195 {
1196 	pthread_t target;
1197 	u_int max;
1198 	size_t nwaiters;
1199 
1200 	max = pthread__unpark_max;
1201 	nwaiters = self->pt_nwaiters;
1202 	target = PTQ_FIRST(queue);
1203 	if (nwaiters == max) {
1204 		/* Overflow. */
1205 		(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1206 		    __UNVOLATILE(&interlock->ptm_waiters));
1207 		nwaiters = 0;
1208 	}
1209 	target->pt_sleepobj = NULL;
1210 	self->pt_waiters[nwaiters++] = target->pt_lid;
1211 	PTQ_REMOVE(queue, target, pt_sleep);
1212 	self->pt_nwaiters = nwaiters;
1213 	pthread__mutex_deferwake(self, interlock);
1214 }
1215 
1216 void
1217 pthread__unpark_all(pthread_queue_t *queue, pthread_t self,
1218 		    pthread_mutex_t *interlock)
1219 {
1220 	pthread_t target;
1221 	u_int max;
1222 	size_t nwaiters;
1223 
1224 	max = pthread__unpark_max;
1225 	nwaiters = self->pt_nwaiters;
1226 	PTQ_FOREACH(target, queue, pt_sleep) {
1227 		if (nwaiters == max) {
1228 			/* Overflow. */
1229 			(void)_lwp_unpark_all(self->pt_waiters, nwaiters,
1230 			    __UNVOLATILE(&interlock->ptm_waiters));
1231 			nwaiters = 0;
1232 		}
1233 		target->pt_sleepobj = NULL;
1234 		self->pt_waiters[nwaiters++] = target->pt_lid;
1235 	}
1236 	self->pt_nwaiters = nwaiters;
1237 	PTQ_INIT(queue);
1238 	pthread__mutex_deferwake(self, interlock);
1239 }
1240 
1241 #undef	OOPS
1242 
1243 static void
1244 pthread__initmainstack(void)
1245 {
1246 	struct rlimit slimit;
1247 	const AuxInfo *aux;
1248 	size_t size;
1249 
1250 	_DIAGASSERT(_dlauxinfo() != NULL);
1251 
1252 	if (getrlimit(RLIMIT_STACK, &slimit) == -1)
1253 		err(1, "Couldn't get stack resource consumption limits");
1254 	size = slimit.rlim_cur;
1255 	pthread__main.pt_stack.ss_size = size;
1256 
1257 	for (aux = _dlauxinfo(); aux->a_type != AT_NULL; ++aux) {
1258 		if (aux->a_type == AT_STACKBASE) {
1259 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1260 #ifdef __MACHINE_STACK_GROWS_UP
1261 			pthread__main.pt_stack.ss_sp = (void *)aux->a_v;
1262 #else
1263 			pthread__main.pt_stack.ss_sp = (char *)aux->a_v - size;
1264 #endif
1265 			break;
1266 		}
1267 	}
1268 }
1269 
1270 /*
1271  * Set up the slightly special stack for the "initial" thread, which
1272  * runs on the normal system stack, and thus gets slightly different
1273  * treatment.
1274  */
1275 static void
1276 pthread__initmain(pthread_t *newt)
1277 {
1278 	char *value;
1279 
1280 	pthread__initmainstack();
1281 
1282 	value = pthread__getenv("PTHREAD_STACKSIZE");
1283 	if (value != NULL) {
1284 		pthread__stacksize = atoi(value) * 1024;
1285 		if (pthread__stacksize > pthread__main.pt_stack.ss_size)
1286 			pthread__stacksize = pthread__main.pt_stack.ss_size;
1287 	}
1288 	if (pthread__stacksize == 0)
1289 		pthread__stacksize = pthread__main.pt_stack.ss_size;
1290 	pthread__stacksize += pthread__pagesize - 1;
1291 	pthread__stacksize &= ~(pthread__pagesize - 1);
1292 	if (pthread__stacksize < 4 * pthread__pagesize)
1293 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1294 		    4 * pthread__pagesize / 1024);
1295 
1296 	*newt = &pthread__main;
1297 #ifdef __HAVE___LWP_GETTCB_FAST
1298 	pthread__main.pt_tls = __lwp_gettcb_fast();
1299 #else
1300 	pthread__main.pt_tls = _lwp_getprivate();
1301 #endif
1302 	pthread__main.pt_tls->tcb_pthread = &pthread__main;
1303 }
1304 
1305 static signed int
1306 pthread__cmp(void *ctx, const void *n1, const void *n2)
1307 {
1308 	const uintptr_t const p1 = (const uintptr_t)n1;
1309 	const uintptr_t const p2 = (const uintptr_t)n2;
1310 
1311 	if (p1 < p2)
1312 		return -1;
1313 	if (p1 > p2)
1314 		return 1;
1315 	return 0;
1316 }
1317 
1318 /* Because getenv() wants to use locks. */
1319 char *
1320 pthread__getenv(const char *name)
1321 {
1322 	extern char **environ;
1323 	size_t l_name, offset;
1324 
1325 	l_name = strlen(name);
1326 	for (offset = 0; environ[offset] != NULL; offset++) {
1327 		if (strncmp(name, environ[offset], l_name) == 0 &&
1328 		    environ[offset][l_name] == '=') {
1329 			return environ[offset] + l_name + 1;
1330 		}
1331 	}
1332 
1333 	return NULL;
1334 }
1335 
1336 pthread_mutex_t *
1337 pthread__hashlock(volatile const void *p)
1338 {
1339 	uintptr_t v;
1340 
1341 	v = (uintptr_t)p;
1342 	return &hashlocks[((v >> 9) ^ (v >> 3)) & (NHASHLOCK - 1)].mutex;
1343 }
1344 
1345 int
1346 pthread__checkpri(int pri)
1347 {
1348 	static int havepri;
1349 	static long min, max;
1350 
1351 	if (!havepri) {
1352 		min = sysconf(_SC_SCHED_PRI_MIN);
1353 		max = sysconf(_SC_SCHED_PRI_MAX);
1354 		havepri = 1;
1355 	}
1356 	return (pri < min || pri > max) ? EINVAL : 0;
1357 }
1358