xref: /netbsd-src/lib/libpthread/pthread.c (revision 7fa608457b817eca6e0977b37f758ae064f3c99c)
1 /*	$NetBSD: pthread.c,v 1.86 2007/11/07 00:55:22 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: pthread.c,v 1.86 2007/11/07 00:55:22 ad Exp $");
41 
42 #define	__EXPOSE_STACK	1
43 
44 #include <sys/param.h>
45 #include <sys/mman.h>
46 #include <sys/sysctl.h>
47 
48 #include <err.h>
49 #include <errno.h>
50 #include <lwp.h>
51 #include <signal.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <syslog.h>
56 #include <ucontext.h>
57 #include <unistd.h>
58 #include <sched.h>
59 
60 #include "pthread.h"
61 #include "pthread_int.h"
62 
63 #ifdef PTHREAD_MAIN_DEBUG
64 #define SDPRINTF(x) DPRINTF(x)
65 #else
66 #define SDPRINTF(x)
67 #endif
68 
69 
70 pthread_rwlock_t pthread__alltree_lock = PTHREAD_RWLOCK_INITIALIZER;
71 RB_HEAD(__pthread__alltree, __pthread_st) pthread__alltree;
72 
73 #ifndef lint
74 static int	pthread__cmp(struct __pthread_st *, struct __pthread_st *);
75 RB_PROTOTYPE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
76 #endif
77 
78 static void	pthread__create_tramp(void *(*)(void *), void *);
79 static void	pthread__initthread(pthread_t);
80 static void	pthread__scrubthread(pthread_t, char *, int);
81 static int	pthread__stackid_setup(void *, size_t, pthread_t *);
82 static int	pthread__stackalloc(pthread_t *);
83 static void	pthread__initmain(pthread_t *);
84 
85 int pthread__started;
86 
87 pthread_mutex_t pthread__deadqueue_lock = PTHREAD_MUTEX_INITIALIZER;
88 pthread_queue_t pthread__deadqueue;
89 pthread_queue_t pthread__allqueue;
90 
91 static pthread_attr_t pthread_default_attr;
92 
93 enum {
94 	DIAGASSERT_ABORT =	1<<0,
95 	DIAGASSERT_STDERR =	1<<1,
96 	DIAGASSERT_SYSLOG =	1<<2
97 };
98 
99 static int pthread__diagassert = DIAGASSERT_ABORT | DIAGASSERT_STDERR;
100 
101 int pthread__concurrency;
102 int pthread__nspins;
103 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
104 int pthread__osrev;
105 
106 /*
107  * We have to initialize the pthread_stack* variables here because
108  * mutexes are used before pthread_init() and thus pthread__initmain()
109  * are called.  Since mutexes only save the stack pointer and not a
110  * pointer to the thread data, it is safe to change the mapping from
111  * stack pointer to thread data afterwards.
112  */
113 #define	_STACKSIZE_LG 18
114 int	pthread__stacksize_lg = _STACKSIZE_LG;
115 size_t	pthread__stacksize = 1 << _STACKSIZE_LG;
116 vaddr_t	pthread__stackmask = (1 << _STACKSIZE_LG) - 1;
117 vaddr_t pthread__threadmask = (vaddr_t)~((1 << _STACKSIZE_LG) - 1);
118 #undef	_STACKSIZE_LG
119 
120 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
121 
122 __strong_alias(__libc_thr_self,pthread_self)
123 __strong_alias(__libc_thr_create,pthread_create)
124 __strong_alias(__libc_thr_exit,pthread_exit)
125 __strong_alias(__libc_thr_errno,pthread__errno)
126 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
127 
128 /*
129  * Static library kludge.  Place a reference to a symbol any library
130  * file which does not already have a reference here.
131  */
132 extern int pthread__cancel_stub_binder;
133 
134 void *pthread__static_lib_binder[] = {
135 	&pthread__cancel_stub_binder,
136 	pthread_cond_init,
137 	pthread_mutex_init,
138 	pthread_rwlock_init,
139 	pthread_barrier_init,
140 	pthread_key_create,
141 	pthread_setspecific,
142 };
143 
144 /*
145  * This needs to be started by the library loading code, before main()
146  * gets to run, for various things that use the state of the initial thread
147  * to work properly (thread-specific data is an application-visible example;
148  * spinlock counts for mutexes is an internal example).
149  */
150 void
151 pthread_init(void)
152 {
153 	pthread_t first;
154 	char *p;
155 	int i, mib[2];
156 	size_t len;
157 	extern int __isthreaded;
158 
159 	mib[0] = CTL_HW;
160 	mib[1] = HW_NCPU;
161 
162 	len = sizeof(pthread__concurrency);
163 	if (sysctl(mib, 2, &pthread__concurrency, &len, NULL, 0) == -1)
164 		err(1, "sysctl(hw.ncpu");
165 
166 	mib[0] = CTL_KERN;
167 	mib[1] = KERN_OSREV;
168 
169 	len = sizeof(pthread__osrev);
170 	if (sysctl(mib, 2, &pthread__osrev, &len, NULL, 0) == -1)
171 		err(1, "sysctl(hw.osrevision");
172 
173 	/* Initialize locks first; they're needed elsewhere. */
174 	pthread__lockprim_init();
175 
176 	/* Fetch parameters. */
177 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
178 	if (i == -1)
179 		err(1, "_lwp_unpark_all");
180 	if (i < pthread__unpark_max)
181 		pthread__unpark_max = i;
182 
183 	/* Basic data structure setup */
184 	pthread_attr_init(&pthread_default_attr);
185 	PTQ_INIT(&pthread__allqueue);
186 	PTQ_INIT(&pthread__deadqueue);
187 	RB_INIT(&pthread__alltree);
188 
189 	/* Create the thread structure corresponding to main() */
190 	pthread__initmain(&first);
191 	pthread__initthread(first);
192 	pthread__scrubthread(first, NULL, 0);
193 
194 	first->pt_lid = _lwp_self();
195 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
196 	RB_INSERT(__pthread__alltree, &pthread__alltree, first);
197 
198 	/* Start subsystems */
199 	PTHREAD_MD_INIT
200 	pthread__debug_init();
201 
202 	for (p = getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
203 		switch (*p) {
204 		case 'a':
205 			pthread__diagassert |= DIAGASSERT_ABORT;
206 			break;
207 		case 'A':
208 			pthread__diagassert &= ~DIAGASSERT_ABORT;
209 			break;
210 		case 'e':
211 			pthread__diagassert |= DIAGASSERT_STDERR;
212 			break;
213 		case 'E':
214 			pthread__diagassert &= ~DIAGASSERT_STDERR;
215 			break;
216 		case 'l':
217 			pthread__diagassert |= DIAGASSERT_SYSLOG;
218 			break;
219 		case 'L':
220 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
221 			break;
222 		}
223 	}
224 
225 
226 	/* Tell libc that we're here and it should role-play accordingly. */
227 	__isthreaded = 1;
228 }
229 
230 static void
231 pthread__child_callback(void)
232 {
233 	/*
234 	 * Clean up data structures that a forked child process might
235 	 * trip over. Note that if threads have been created (causing
236 	 * this handler to be registered) the standards say that the
237 	 * child will trigger undefined behavior if it makes any
238 	 * pthread_* calls (or any other calls that aren't
239 	 * async-signal-safe), so we don't really have to clean up
240 	 * much. Anything that permits some pthread_* calls to work is
241 	 * merely being polite.
242 	 */
243 	pthread__started = 0;
244 }
245 
246 static void
247 pthread__start(void)
248 {
249 
250 	/*
251 	 * Per-process timers are cleared by fork(); despite the
252 	 * various restrictions on fork() and threads, it's legal to
253 	 * fork() before creating any threads.
254 	 */
255 	pthread_atfork(NULL, NULL, pthread__child_callback);
256 	SDPRINTF(("(pthread__start %p) Started.\n", pthread__self()));
257 }
258 
259 
260 /* General-purpose thread data structure sanitization. */
261 /* ARGSUSED */
262 static void
263 pthread__initthread(pthread_t t)
264 {
265 
266 	t->pt_magic = PT_MAGIC;
267 	t->pt_spinlocks = 0;
268 	t->pt_willpark = 0;
269 	t->pt_unpark = 0;
270 	t->pt_sleeponq = 0;
271 	t->pt_nwaiters = 0;
272 	t->pt_sleepobj = NULL;
273 	t->pt_signalled = 0;
274 	t->pt_havespecific = 0;
275 	t->pt_early = NULL;
276 
277 	pthread_mutex_init(&t->pt_lock, NULL);
278 	PTQ_INIT(&t->pt_cleanup_stack);
279 	PTQ_INIT(&t->pt_joiners);
280 	memset(&t->pt_specific, 0, sizeof(int) * PTHREAD_KEYS_MAX);
281 }
282 
283 static void
284 pthread__scrubthread(pthread_t t, char *name, int flags)
285 {
286 
287 	t->pt_state = PT_STATE_RUNNING;
288 	t->pt_exitval = NULL;
289 	t->pt_flags = flags;
290 	t->pt_cancel = 0;
291 	t->pt_errno = 0;
292 	t->pt_name = name;
293 	t->pt_lid = 0;
294 }
295 
296 
297 int
298 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
299 	    void *(*startfunc)(void *), void *arg)
300 {
301 	pthread_t newthread;
302 	pthread_attr_t nattr;
303 	struct pthread_attr_private *p;
304 	char * volatile name;
305 	unsigned long flag;
306 	int ret;
307 
308 	PTHREADD_ADD(PTHREADD_CREATE);
309 
310 	/*
311 	 * It's okay to check this without a lock because there can
312 	 * only be one thread before it becomes true.
313 	 */
314 	if (pthread__started == 0) {
315 		pthread__start();
316 		pthread__started = 1;
317 	}
318 
319 	if (attr == NULL)
320 		nattr = pthread_default_attr;
321 	else if (attr->pta_magic == PT_ATTR_MAGIC)
322 		nattr = *attr;
323 	else
324 		return EINVAL;
325 
326 	/* Fetch misc. attributes from the attr structure. */
327 	name = NULL;
328 	if ((p = nattr.pta_private) != NULL)
329 		if (p->ptap_name[0] != '\0')
330 			if ((name = strdup(p->ptap_name)) == NULL)
331 				return ENOMEM;
332 
333 	newthread = NULL;
334 
335 	/*
336 	 * Try to reclaim a dead thread.
337 	 */
338 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
339 		pthread_mutex_lock(&pthread__deadqueue_lock);
340 		newthread = PTQ_FIRST(&pthread__deadqueue);
341 		if (newthread != NULL) {
342 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
343 			pthread_mutex_unlock(&pthread__deadqueue_lock);
344 			if ((newthread->pt_flags & PT_FLAG_DETACHED) != 0) {
345 				/* Still running? */
346 				if (_lwp_kill(newthread->pt_lid, 0) == 0 ||
347 				    errno != ESRCH) {
348 					pthread_mutex_lock(
349 					    &pthread__deadqueue_lock);
350 					PTQ_INSERT_TAIL(&pthread__deadqueue,
351 					    newthread, pt_deadq);
352 					pthread_mutex_unlock(
353 					    &pthread__deadqueue_lock);
354 					newthread = NULL;
355 				}
356 			}
357 		} else
358 			pthread_mutex_unlock(&pthread__deadqueue_lock);
359 	}
360 
361 	/*
362 	 * If necessary set up a stack, allocate space for a pthread_st,
363 	 * and initialize it.
364 	 */
365 	if (newthread == NULL) {
366 		ret = pthread__stackalloc(&newthread);
367 		if (ret != 0) {
368 			if (name)
369 				free(name);
370 			return ret;
371 		}
372 
373 		/* This is used only when creating the thread. */
374 		_INITCONTEXT_U(&newthread->pt_uc);
375 #ifdef PTHREAD_MACHINE_HAS_ID_REGISTER
376 		pthread__uc_id(&newthread->pt_uc) = newthread;
377 #endif
378 		newthread->pt_uc.uc_stack = newthread->pt_stack;
379 		newthread->pt_uc.uc_link = NULL;
380 
381 		/* Add to list of all threads. */
382 		pthread_rwlock_wrlock(&pthread__alltree_lock);
383 		PTQ_INSERT_TAIL(&pthread__allqueue, newthread, pt_allq);
384 		RB_INSERT(__pthread__alltree, &pthread__alltree, newthread);
385 		pthread_rwlock_unlock(&pthread__alltree_lock);
386 
387 		/* Will be reset by the thread upon exit. */
388 		pthread__initthread(newthread);
389 	}
390 
391 	/*
392 	 * Create the new LWP.
393 	 */
394 	pthread__scrubthread(newthread, name, nattr.pta_flags);
395 	makecontext(&newthread->pt_uc, pthread__create_tramp, 2,
396 	    startfunc, arg);
397 
398 	flag = 0;
399 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0)
400 		flag |= LWP_SUSPENDED;
401 	if ((newthread->pt_flags & PT_FLAG_DETACHED) != 0)
402 		flag |= LWP_DETACHED;
403 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
404 	if (ret != 0) {
405 		SDPRINTF(("(pthread_create %p) _lwp_create: %s\n",
406 		    strerror(errno)));
407 		free(name);
408 		newthread->pt_state = PT_STATE_DEAD;
409 		pthread_mutex_lock(&pthread__deadqueue_lock);
410 		PTQ_INSERT_HEAD(&pthread__deadqueue, newthread, pt_deadq);
411 		pthread_mutex_unlock(&pthread__deadqueue_lock);
412 		return ret;
413 	}
414 
415 	/* XXX must die */
416 	newthread->pt_num = newthread->pt_lid;
417 
418 	SDPRINTF(("(pthread_create %p) new thread %p (name %p, lid %d).\n",
419 		  pthread__self(), newthread, newthread->pt_name,
420 		  (int)newthread->pt_lid));
421 
422 	*thread = newthread;
423 
424 	return 0;
425 }
426 
427 
428 static void
429 pthread__create_tramp(void *(*start)(void *), void *arg)
430 {
431 	pthread_t self;
432 	void *retval;
433 
434 	/*
435 	 * Throw away some stack in a feeble attempt to reduce cache
436 	 * thrash.  May help for SMT processors.  XXX We should not
437 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
438 	 * thread register or decent thread local storage.  Note
439 	 * that pt_lid may not be set by this point, but we don't
440 	 * care.
441 	 */
442 	self = pthread__self();
443 	(void)alloca(((unsigned)self->pt_lid & 7) << 8);
444 
445 	if (self->pt_name != NULL) {
446 		pthread_mutex_lock(&self->pt_lock);
447 		if (self->pt_name != NULL)
448 			(void)_lwp_setname(_lwp_self(), self->pt_name);
449 		pthread_mutex_unlock(&self->pt_lock);
450 	}
451 
452 	retval = (*start)(arg);
453 
454 	pthread_exit(retval);
455 
456 	/*NOTREACHED*/
457 	pthread__abort();
458 }
459 
460 int
461 pthread_suspend_np(pthread_t thread)
462 {
463 	pthread_t self;
464 
465 	self = pthread__self();
466 	if (self == thread) {
467 		return EDEADLK;
468 	}
469 #ifdef ERRORCHECK
470 	if (pthread__find(thread) != 0)
471 		return ESRCH;
472 #endif
473 	SDPRINTF(("(pthread_suspend_np %p) Suspend thread %p.\n",
474 	    pthread__self(), thread));
475 
476 	if (_lwp_suspend(thread->pt_lid) == 0)
477 		return 0;
478 	return errno;
479 }
480 
481 int
482 pthread_resume_np(pthread_t thread)
483 {
484 
485 #ifdef ERRORCHECK
486 	if (pthread__find(thread) != 0)
487 		return ESRCH;
488 #endif
489 	SDPRINTF(("(pthread_resume_np %p) Resume thread %p.\n",
490 	    pthread__self(), thread));
491 
492 	if (_lwp_continue(thread->pt_lid) == 0)
493 		return 0;
494 	return errno;
495 }
496 
497 void
498 pthread_exit(void *retval)
499 {
500 	pthread_t self;
501 	struct pt_clean_t *cleanup;
502 	char *name;
503 
504 	self = pthread__self();
505 	SDPRINTF(("(pthread_exit %p) status %p, flags %x, cancel %d\n",
506 		  self, retval, self->pt_flags, self->pt_cancel));
507 
508 	/* Disable cancellability. */
509 	pthread_mutex_lock(&self->pt_lock);
510 	self->pt_flags |= PT_FLAG_CS_DISABLED;
511 	self->pt_cancel = 0;
512 	pthread_mutex_unlock(&self->pt_lock);
513 
514 	/* Call any cancellation cleanup handlers */
515 	while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
516 		cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
517 		PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
518 		(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
519 	}
520 
521 	/* Perform cleanup of thread-specific data */
522 	pthread__destroy_tsd(self);
523 
524 	self->pt_exitval = retval;
525 
526 	pthread_mutex_lock(&self->pt_lock);
527 	if (self->pt_flags & PT_FLAG_DETACHED) {
528 		self->pt_state = PT_STATE_DEAD;
529 		name = self->pt_name;
530 		self->pt_name = NULL;
531 		pthread_mutex_lock(&pthread__deadqueue_lock);
532 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
533 		pthread_mutex_unlock(&pthread__deadqueue_lock);
534 		pthread_mutex_unlock(&self->pt_lock);
535 		if (name != NULL)
536 			free(name);
537 		_lwp_exit();
538 	} else {
539 		self->pt_state = PT_STATE_ZOMBIE;
540 		pthread_mutex_unlock(&self->pt_lock);
541 		/* Note: name will be freed by the joiner. */
542 		_lwp_exit();
543 	}
544 
545 	/*NOTREACHED*/
546 	pthread__abort();
547 	exit(1);
548 }
549 
550 
551 int
552 pthread_join(pthread_t thread, void **valptr)
553 {
554 	pthread_t self;
555 	char *name;
556 
557 	self = pthread__self();
558 	SDPRINTF(("(pthread_join %p) Joining %p.\n", self, thread));
559 
560 	if (pthread__find(thread) != 0)
561 		return ESRCH;
562 
563 	if (thread->pt_magic != PT_MAGIC)
564 		return EINVAL;
565 
566 	if (thread == self)
567 		return EDEADLK;
568 
569 	/*
570 	 * IEEE Std 1003.1, 2004 Edition:
571 	 *
572 	 * "The pthread_join() function shall not return an
573 	 * error code of [EINTR]."
574 	 */
575 	while (_lwp_wait(thread->pt_lid, NULL) != 0) {
576 		if (errno != EINTR)
577 			return errno;
578 	}
579 
580 	/*
581 	 * No need to lock - nothing else should (legally) be
582 	 * interested in the thread's state at this point.
583 	 *
584 	 * _lwp_wait() provides a barrier, so the user level
585 	 * thread state will be visible to us at this point.
586 	 */
587 	if (thread->pt_state != PT_STATE_ZOMBIE) {
588 		pthread__errorfunc(__FILE__, __LINE__, __func__,
589 		    "not a zombie");
590 	}
591 	if (valptr != NULL)
592 		*valptr = thread->pt_exitval;
593 	name = thread->pt_name;
594 	thread->pt_name = NULL;
595 	thread->pt_state = PT_STATE_DEAD;
596 	pthread_mutex_lock(&pthread__deadqueue_lock);
597 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
598 	pthread_mutex_unlock(&pthread__deadqueue_lock);
599 	SDPRINTF(("(pthread_join %p) Joined %p.\n", self, thread));
600 	if (name != NULL)
601 		free(name);
602 	return 0;
603 }
604 
605 
606 int
607 pthread_equal(pthread_t t1, pthread_t t2)
608 {
609 
610 	/* Nothing special here. */
611 	return (t1 == t2);
612 }
613 
614 
615 int
616 pthread_detach(pthread_t thread)
617 {
618 	int rv;
619 
620 	if (pthread__find(thread) != 0)
621 		return ESRCH;
622 
623 	if (thread->pt_magic != PT_MAGIC)
624 		return EINVAL;
625 
626 	pthread_mutex_lock(&thread->pt_lock);
627 	thread->pt_flags |= PT_FLAG_DETACHED;
628 	rv = _lwp_detach(thread->pt_lid);
629 	pthread_mutex_unlock(&thread->pt_lock);
630 
631 	if (rv == 0)
632 		return 0;
633 	return errno;
634 }
635 
636 
637 int
638 pthread_getname_np(pthread_t thread, char *name, size_t len)
639 {
640 
641 	if (pthread__find(thread) != 0)
642 		return ESRCH;
643 
644 	if (thread->pt_magic != PT_MAGIC)
645 		return EINVAL;
646 
647 	pthread_mutex_lock(&thread->pt_lock);
648 	if (thread->pt_name == NULL)
649 		name[0] = '\0';
650 	else
651 		strlcpy(name, thread->pt_name, len);
652 	pthread_mutex_unlock(&thread->pt_lock);
653 
654 	return 0;
655 }
656 
657 
658 int
659 pthread_setname_np(pthread_t thread, const char *name, void *arg)
660 {
661 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
662 	int namelen;
663 
664 	if (pthread__find(thread) != 0)
665 		return ESRCH;
666 
667 	if (thread->pt_magic != PT_MAGIC)
668 		return EINVAL;
669 
670 	namelen = snprintf(newname, sizeof(newname), name, arg);
671 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
672 		return EINVAL;
673 
674 	cp = strdup(newname);
675 	if (cp == NULL)
676 		return ENOMEM;
677 
678 	pthread_mutex_lock(&thread->pt_lock);
679 	oldname = thread->pt_name;
680 	thread->pt_name = cp;
681 	(void)_lwp_setname(thread->pt_lid, cp);
682 	pthread_mutex_unlock(&thread->pt_lock);
683 
684 	if (oldname != NULL)
685 		free(oldname);
686 
687 	return 0;
688 }
689 
690 
691 
692 /*
693  * XXX There should be a way for applications to use the efficent
694  *  inline version, but there are opacity/namespace issues.
695  */
696 pthread_t
697 pthread_self(void)
698 {
699 
700 	return pthread__self();
701 }
702 
703 
704 int
705 pthread_cancel(pthread_t thread)
706 {
707 
708 	if (pthread__find(thread) != 0)
709 		return ESRCH;
710 	pthread_mutex_lock(&thread->pt_lock);
711 	thread->pt_flags |= PT_FLAG_CS_PENDING;
712 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
713 		thread->pt_cancel = 1;
714 		pthread_mutex_unlock(&thread->pt_lock);
715 		_lwp_wakeup(thread->pt_lid);
716 	} else
717 		pthread_mutex_unlock(&thread->pt_lock);
718 
719 	return 0;
720 }
721 
722 
723 int
724 pthread_setcancelstate(int state, int *oldstate)
725 {
726 	pthread_t self;
727 	int retval;
728 
729 	self = pthread__self();
730 	retval = 0;
731 
732 	pthread_mutex_lock(&self->pt_lock);
733 
734 	if (oldstate != NULL) {
735 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
736 			*oldstate = PTHREAD_CANCEL_DISABLE;
737 		else
738 			*oldstate = PTHREAD_CANCEL_ENABLE;
739 	}
740 
741 	if (state == PTHREAD_CANCEL_DISABLE) {
742 		self->pt_flags |= PT_FLAG_CS_DISABLED;
743 		if (self->pt_cancel) {
744 			self->pt_flags |= PT_FLAG_CS_PENDING;
745 			self->pt_cancel = 0;
746 		}
747 	} else if (state == PTHREAD_CANCEL_ENABLE) {
748 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
749 		/*
750 		 * If a cancellation was requested while cancellation
751 		 * was disabled, note that fact for future
752 		 * cancellation tests.
753 		 */
754 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
755 			self->pt_cancel = 1;
756 			/* This is not a deferred cancellation point. */
757 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
758 				pthread_mutex_unlock(&self->pt_lock);
759 				pthread_exit(PTHREAD_CANCELED);
760 			}
761 		}
762 	} else
763 		retval = EINVAL;
764 
765 	pthread_mutex_unlock(&self->pt_lock);
766 
767 	return retval;
768 }
769 
770 
771 int
772 pthread_setcanceltype(int type, int *oldtype)
773 {
774 	pthread_t self;
775 	int retval;
776 
777 	self = pthread__self();
778 	retval = 0;
779 
780 	pthread_mutex_lock(&self->pt_lock);
781 
782 	if (oldtype != NULL) {
783 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
784 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
785 		else
786 			*oldtype = PTHREAD_CANCEL_DEFERRED;
787 	}
788 
789 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
790 		self->pt_flags |= PT_FLAG_CS_ASYNC;
791 		if (self->pt_cancel) {
792 			pthread_mutex_unlock(&self->pt_lock);
793 			pthread_exit(PTHREAD_CANCELED);
794 		}
795 	} else if (type == PTHREAD_CANCEL_DEFERRED)
796 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
797 	else
798 		retval = EINVAL;
799 
800 	pthread_mutex_unlock(&self->pt_lock);
801 
802 	return retval;
803 }
804 
805 
806 void
807 pthread_testcancel()
808 {
809 	pthread_t self;
810 
811 	self = pthread__self();
812 	if (self->pt_cancel)
813 		pthread_exit(PTHREAD_CANCELED);
814 }
815 
816 
817 /*
818  * POSIX requires that certain functions return an error rather than
819  * invoking undefined behavior even when handed completely bogus
820  * pthread_t values, e.g. stack garbage or (pthread_t)666. This
821  * utility routine searches the list of threads for the pthread_t
822  * value without dereferencing it.
823  */
824 int
825 pthread__find(pthread_t id)
826 {
827 	pthread_t target;
828 
829 	pthread_rwlock_rdlock(&pthread__alltree_lock);
830 	/* LINTED */
831 	target = RB_FIND(__pthread__alltree, &pthread__alltree, id);
832 	pthread_rwlock_unlock(&pthread__alltree_lock);
833 
834 	if (target == NULL || target->pt_state == PT_STATE_DEAD)
835 		return ESRCH;
836 
837 	return 0;
838 }
839 
840 
841 void
842 pthread__testcancel(pthread_t self)
843 {
844 
845 	if (self->pt_cancel)
846 		pthread_exit(PTHREAD_CANCELED);
847 }
848 
849 
850 void
851 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
852 {
853 	pthread_t self;
854 	struct pt_clean_t *entry;
855 
856 	self = pthread__self();
857 	entry = store;
858 	entry->ptc_cleanup = cleanup;
859 	entry->ptc_arg = arg;
860 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
861 }
862 
863 
864 void
865 pthread__cleanup_pop(int ex, void *store)
866 {
867 	pthread_t self;
868 	struct pt_clean_t *entry;
869 
870 	self = pthread__self();
871 	entry = store;
872 
873 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
874 	if (ex)
875 		(*entry->ptc_cleanup)(entry->ptc_arg);
876 }
877 
878 
879 int *
880 pthread__errno(void)
881 {
882 	pthread_t self;
883 
884 	self = pthread__self();
885 
886 	return &(self->pt_errno);
887 }
888 
889 ssize_t	_sys_write(int, const void *, size_t);
890 
891 void
892 pthread__assertfunc(const char *file, int line, const char *function,
893 		    const char *expr)
894 {
895 	char buf[1024];
896 	int len;
897 
898 	SDPRINTF(("(af)\n"));
899 
900 	/*
901 	 * snprintf should not acquire any locks, or we could
902 	 * end up deadlocked if the assert caller held locks.
903 	 */
904 	len = snprintf(buf, 1024,
905 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
906 	    expr, file, line,
907 	    function ? ", function \"" : "",
908 	    function ? function : "",
909 	    function ? "\"" : "");
910 
911 	_sys_write(STDERR_FILENO, buf, (size_t)len);
912 	(void)kill(getpid(), SIGABRT);
913 
914 	_exit(1);
915 }
916 
917 
918 void
919 pthread__errorfunc(const char *file, int line, const char *function,
920 		   const char *msg)
921 {
922 	char buf[1024];
923 	size_t len;
924 
925 	if (pthread__diagassert == 0)
926 		return;
927 
928 	/*
929 	 * snprintf should not acquire any locks, or we could
930 	 * end up deadlocked if the assert caller held locks.
931 	 */
932 	len = snprintf(buf, 1024,
933 	    "%s: Error detected by libpthread: %s.\n"
934 	    "Detected by file \"%s\", line %d%s%s%s.\n"
935 	    "See pthread(3) for information.\n",
936 	    getprogname(), msg, file, line,
937 	    function ? ", function \"" : "",
938 	    function ? function : "",
939 	    function ? "\"" : "");
940 
941 	if (pthread__diagassert & DIAGASSERT_STDERR)
942 		_sys_write(STDERR_FILENO, buf, len);
943 
944 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
945 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
946 
947 	if (pthread__diagassert & DIAGASSERT_ABORT) {
948 		(void)kill(getpid(), SIGABRT);
949 		_exit(1);
950 	}
951 }
952 
953 /*
954  * Thread park/unpark operations.  The kernel operations are
955  * modelled after a brief description from "Multithreading in
956  * the Solaris Operating Environment":
957  *
958  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
959  */
960 
961 #define	OOPS(msg)			\
962     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
963 
964 int
965 pthread__park(pthread_t self, pthread_spin_t *lock,
966 	      pthread_queue_t *queue, const struct timespec *abstime,
967 	      int cancelpt, const void *hint)
968 {
969 	int rv, error;
970 	void *obj;
971 
972 	SDPRINTF(("(pthread__park %p) queue %p enter\n", self, queue));
973 
974 	/* Clear the willpark flag, since we're about to block. */
975 	self->pt_willpark = 0;
976 
977 	/*
978 	 * Kernels before 4.99.27 can't park and unpark in one step,
979 	 * so take care of it now if on an old kernel.
980 	 *
981 	 * XXX Remove this check before NetBSD 5.0 is released.
982 	 * It's for compatibility with recent -current only.
983 	 */
984 	if (__predict_false(pthread__osrev < 499002700) &&
985 	    self->pt_unpark != 0) {
986 		_lwp_unpark(self->pt_unpark, self->pt_unparkhint);
987 		self->pt_unpark = 0;
988 	}
989 
990 	/*
991 	 * Wait until we are awoken by a pending unpark operation,
992 	 * a signal, an unpark posted after we have gone asleep,
993 	 * or an expired timeout.
994 	 *
995 	 * It is fine to test the value of both pt_sleepobj and
996 	 * pt_sleeponq without holding any locks, because:
997 	 *
998 	 * o Only the blocking thread (this thread) ever sets them
999 	 *   to a non-NULL value.
1000 	 *
1001 	 * o Other threads may set them NULL, but if they do so they
1002 	 *   must also make this thread return from _lwp_park.
1003 	 *
1004 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
1005 	 *   calls and all make use of spinlocks in the kernel.  So
1006 	 *   these system calls act as full memory barriers, and will
1007 	 *   ensure that the calling CPU's store buffers are drained.
1008 	 *   In combination with the spinlock release before unpark,
1009 	 *   this means that modification of pt_sleepobj/onq by another
1010 	 *   thread will become globally visible before that thread
1011 	 *   schedules an unpark operation on this thread.
1012 	 *
1013 	 * Note: the test in the while() statement dodges the park op if
1014 	 * we have already been awoken, unless there is another thread to
1015 	 * awaken.  This saves a syscall - if we were already awakened,
1016 	 * the next call to _lwp_park() would need to return early in order
1017 	 * to eat the previous wakeup.
1018 	 */
1019 	rv = 0;
1020 	while ((self->pt_sleepobj != NULL || self->pt_unpark != 0) && rv == 0) {
1021 		/*
1022 		 * If we deferred unparking a thread, arrange to
1023 		 * have _lwp_park() restart it before blocking.
1024 		 */
1025 		error = _lwp_park(abstime, self->pt_unpark, hint,
1026 		    self->pt_unparkhint);
1027 		self->pt_unpark = 0;
1028 		if (error != 0) {
1029 			switch (rv = errno) {
1030 			case EINTR:
1031 			case EALREADY:
1032 				rv = 0;
1033 				break;
1034 			case ETIMEDOUT:
1035 				break;
1036 			default:
1037 				OOPS("_lwp_park failed");
1038 				SDPRINTF(("(pthread__park %p) syscall rv=%d\n",
1039 				    self, rv));
1040 				break;
1041 			}
1042 		}
1043 		/* Check for cancellation. */
1044 		if (cancelpt && self->pt_cancel)
1045 			rv = EINTR;
1046 	}
1047 
1048 	/*
1049 	 * If we have been awoken early but are still on the queue,
1050 	 * then remove ourself.  Again, it's safe to do the test
1051 	 * without holding any locks.
1052 	 */
1053 	if (__predict_false(self->pt_sleeponq)) {
1054 		pthread_spinlock(lock);
1055 		if (self->pt_sleeponq) {
1056 			PTQ_REMOVE(queue, self, pt_sleep);
1057 			obj = self->pt_sleepobj;
1058 			self->pt_sleepobj = NULL;
1059 			self->pt_sleeponq = 0;
1060 			if (obj != NULL && self->pt_early != NULL)
1061 				(*self->pt_early)(obj);
1062 		}
1063 		pthread_spinunlock(lock);
1064 	}
1065 	self->pt_early = NULL;
1066 
1067 	SDPRINTF(("(pthread__park %p) queue %p exit\n", self, queue));
1068 
1069 	return rv;
1070 }
1071 
1072 void
1073 pthread__unpark(pthread_t self, pthread_spin_t *lock,
1074 		pthread_queue_t *queue, pthread_t target)
1075 {
1076 	int rv;
1077 
1078 	if (target == NULL) {
1079 		pthread_spinunlock(lock);
1080 		return;
1081 	}
1082 
1083 	SDPRINTF(("(pthread__unpark %p) queue %p target %p\n",
1084 	    self, queue, target));
1085 
1086 	/*
1087 	 * Easy: the thread has already been removed from
1088 	 * the queue, so just awaken it.
1089 	 */
1090 	target->pt_sleepobj = NULL;
1091 	target->pt_sleeponq = 0;
1092 
1093 	/*
1094 	 * Releasing the spinlock serves as a store barrier,
1095 	 * which ensures that all our modifications are visible
1096 	 * to the thread in pthread__park() before the unpark
1097 	 * operation is set in motion.
1098 	 */
1099 	pthread_spinunlock(lock);
1100 
1101 	/*
1102 	 * If the calling thread is about to block, defer
1103 	 * unparking the target until _lwp_park() is called.
1104 	 */
1105 	if (self->pt_willpark && self->pt_unpark == 0) {
1106 		self->pt_unpark = target->pt_lid;
1107 		self->pt_unparkhint = queue;
1108 	} else {
1109 		rv = _lwp_unpark(target->pt_lid, queue);
1110 		if (rv != 0 && errno != EALREADY && errno != EINTR) {
1111 			SDPRINTF(("(pthread__unpark %p) syscall rv=%d\n",
1112 			    self, rv));
1113 			OOPS("_lwp_unpark failed");
1114 		}
1115 	}
1116 }
1117 
1118 void
1119 pthread__unpark_all(pthread_t self, pthread_spin_t *lock,
1120 		    pthread_queue_t *queue)
1121 {
1122 	ssize_t n, rv;
1123 	pthread_t thread, next;
1124 	void *wakeobj;
1125 
1126 	if (PTQ_EMPTY(queue) && self->pt_nwaiters == 0) {
1127 		pthread_spinunlock(lock);
1128 		return;
1129 	}
1130 
1131 	wakeobj = queue;
1132 
1133 	for (;; n = 0) {
1134 		/*
1135 		 * Pull waiters from the queue and add to this
1136 		 * thread's waiters list.
1137 		 */
1138 		thread = PTQ_FIRST(queue);
1139 		for (n = self->pt_nwaiters, self->pt_nwaiters = 0;
1140 		    n < pthread__unpark_max && thread != NULL;
1141 		    thread = next) {
1142 			/*
1143 			 * If the sleepobj pointer is non-NULL, it
1144 			 * means one of two things:
1145 			 *
1146 			 * o The thread has awoken early, spun
1147 			 *   through application code and is
1148 			 *   once more asleep on this object.
1149 			 *
1150 			 * o This is a new thread that has blocked
1151 			 *   on the object after we have released
1152 			 *   the interlock in this loop.
1153 			 *
1154 			 * In both cases we shouldn't remove the
1155 			 * thread from the queue.
1156 			 */
1157 			next = PTQ_NEXT(thread, pt_sleep);
1158 			if (thread->pt_sleepobj != wakeobj)
1159 				continue;
1160 			thread->pt_sleepobj = NULL;
1161 			thread->pt_sleeponq = 0;
1162 			self->pt_waiters[n++] = thread->pt_lid;
1163 			PTQ_REMOVE(queue, thread, pt_sleep);
1164 			SDPRINTF(("(pthread__unpark_all %p) queue %p "
1165 			    "unpark %p\n", self, queue, thread));
1166 		}
1167 
1168 		/*
1169 		 * Releasing the spinlock serves as a store barrier,
1170 		 * which ensures that all our modifications are visible
1171 		 * to the thread in pthread__park() before the unpark
1172 		 * operation is set in motion.
1173 		 */
1174 		switch (n) {
1175 		case 0:
1176 			pthread_spinunlock(lock);
1177 			return;
1178 		case 1:
1179 			/*
1180 			 * If the calling thread is about to block,
1181 			 * defer unparking the target until _lwp_park()
1182 			 * is called.
1183 			 */
1184 			pthread_spinunlock(lock);
1185 			if (self->pt_willpark && self->pt_unpark == 0) {
1186 				self->pt_unpark = self->pt_waiters[0];
1187 				self->pt_unparkhint = queue;
1188 				return;
1189 			}
1190 			rv = (ssize_t)_lwp_unpark(self->pt_waiters[0], queue);
1191 			if (rv != 0 && errno != EALREADY && errno != EINTR) {
1192 				OOPS("_lwp_unpark failed");
1193 				SDPRINTF(("(pthread__unpark_all %p) "
1194 				    "syscall rv=%d\n", self, rv));
1195 			}
1196 			return;
1197 		default:
1198 			/*
1199 			 * Clear all sleepobj pointers, since we
1200 			 * release the spin lock before awkening
1201 			 * everybody, and must synchronise with
1202 			 * pthread__park().
1203 			 */
1204 			while (thread != NULL) {
1205 				thread->pt_sleepobj = NULL;
1206 				thread = PTQ_NEXT(thread, pt_sleep);
1207 			}
1208 			/*
1209 			 * Now only interested in waking threads
1210 			 * marked to be woken (sleepobj == NULL).
1211 			 */
1212 			wakeobj = NULL;
1213 			pthread_spinunlock(lock);
1214 			rv = _lwp_unpark_all(self->pt_waiters, (size_t)n,
1215 			    queue);
1216 			if (rv != 0 && errno != EINTR) {
1217 				OOPS("_lwp_unpark_all failed");
1218 				SDPRINTF(("(pthread__unpark_all %p) "
1219 				    "syscall rv=%d\n", self, rv));
1220 			}
1221 			break;
1222 		}
1223 		pthread_spinlock(lock);
1224 	}
1225 }
1226 
1227 #undef	OOPS
1228 
1229 /*
1230  * Allocate a stack for a thread, and set it up. It needs to be aligned, so
1231  * that a thread can find itself by its stack pointer.
1232  */
1233 static int
1234 pthread__stackalloc(pthread_t *newt)
1235 {
1236 	void *addr;
1237 
1238 	addr = mmap(NULL, pthread__stacksize, PROT_READ|PROT_WRITE,
1239 	    MAP_ANON|MAP_PRIVATE | MAP_ALIGNED(pthread__stacksize_lg),
1240 	    -1, (off_t)0);
1241 
1242 	if (addr == MAP_FAILED)
1243 		return ENOMEM;
1244 
1245 	pthread__assert(((intptr_t)addr & pthread__stackmask) == 0);
1246 
1247 	return pthread__stackid_setup(addr, pthread__stacksize, newt);
1248 }
1249 
1250 
1251 /*
1252  * Set up the slightly special stack for the "initial" thread, which
1253  * runs on the normal system stack, and thus gets slightly different
1254  * treatment.
1255  */
1256 static void
1257 pthread__initmain(pthread_t *newt)
1258 {
1259 	struct rlimit slimit;
1260 	size_t pagesize;
1261 	pthread_t t;
1262 	void *base;
1263 	size_t size;
1264 	int error, ret;
1265 	char *value;
1266 
1267 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1268 	pthread__stacksize = 0;
1269 	ret = getrlimit(RLIMIT_STACK, &slimit);
1270 	if (ret == -1)
1271 		err(1, "Couldn't get stack resource consumption limits");
1272 	value = getenv("PTHREAD_STACKSIZE");
1273 	if (value) {
1274 		pthread__stacksize = atoi(value) * 1024;
1275 		if (pthread__stacksize > slimit.rlim_cur)
1276 			pthread__stacksize = (size_t)slimit.rlim_cur;
1277 	}
1278 	if (pthread__stacksize == 0)
1279 		pthread__stacksize = (size_t)slimit.rlim_cur;
1280 	if (pthread__stacksize < 4 * pagesize)
1281 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1282 		    4 * pagesize / 1024);
1283 
1284 	pthread__stacksize_lg = -1;
1285 	while (pthread__stacksize) {
1286 		pthread__stacksize >>= 1;
1287 		pthread__stacksize_lg++;
1288 	}
1289 
1290 	pthread__stacksize = (1 << pthread__stacksize_lg);
1291 	pthread__stackmask = pthread__stacksize - 1;
1292 	pthread__threadmask = ~pthread__stackmask;
1293 
1294 	base = (void *)(pthread__sp() & pthread__threadmask);
1295 	size = pthread__stacksize;
1296 
1297 	error = pthread__stackid_setup(base, size, &t);
1298 	if (error) {
1299 		/* XXX */
1300 		errx(2, "failed to setup main thread: error=%d", error);
1301 	}
1302 
1303 	*newt = t;
1304 }
1305 
1306 static int
1307 /*ARGSUSED*/
1308 pthread__stackid_setup(void *base, size_t size, pthread_t *tp)
1309 {
1310 	pthread_t t;
1311 	void *redaddr;
1312 	size_t pagesize;
1313 	int ret;
1314 
1315 	t = base;
1316 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1317 
1318 	/*
1319 	 * Put a pointer to the pthread in the bottom (but
1320          * redzone-protected section) of the stack.
1321 	 */
1322 	redaddr = STACK_SHRINK(STACK_MAX(base, size), pagesize);
1323 	t->pt_stack.ss_size = size - 2 * pagesize;
1324 #ifdef __MACHINE_STACK_GROWS_UP
1325 	t->pt_stack.ss_sp = (char *)(void *)base + pagesize;
1326 #else
1327 	t->pt_stack.ss_sp = (char *)(void *)base + 2 * pagesize;
1328 #endif
1329 
1330 	/* Protect the next-to-bottom stack page as a red zone. */
1331 	ret = mprotect(redaddr, pagesize, PROT_NONE);
1332 	if (ret == -1) {
1333 		return errno;
1334 	}
1335 	*tp = t;
1336 	return 0;
1337 }
1338 
1339 #ifndef lint
1340 static int
1341 pthread__cmp(struct __pthread_st *a, struct __pthread_st *b)
1342 {
1343 	return b - a;
1344 }
1345 RB_GENERATE_STATIC(__pthread__alltree, __pthread_st, pt_alltree, pthread__cmp)
1346 #endif
1347 
1348