xref: /netbsd-src/lib/libpthread/pthread.c (revision 0920b4f20b78ab1ccd9f2312fbe10deaf000cbf3)
1 /*	$NetBSD: pthread.c,v 1.80 2007/08/23 19:13:23 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2001, 2002, 2003, 2006, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Nathan J. Williams and Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: pthread.c,v 1.80 2007/08/23 19:13:23 ad Exp $");
41 
42 #define	__EXPOSE_STACK	1
43 
44 #include <sys/param.h>
45 #include <sys/mman.h>
46 #include <sys/sysctl.h>
47 
48 #include <err.h>
49 #include <errno.h>
50 #include <lwp.h>
51 #include <signal.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <syslog.h>
56 #include <ucontext.h>
57 #include <unistd.h>
58 #include <sched.h>
59 
60 #include "pthread.h"
61 #include "pthread_int.h"
62 
63 #ifdef PTHREAD_MAIN_DEBUG
64 #define SDPRINTF(x) DPRINTF(x)
65 #else
66 #define SDPRINTF(x)
67 #endif
68 
69 /* Maximum number of LWPs to unpark in one operation. */
70 #define	PTHREAD__UNPARK_MAX	128
71 
72 static void	pthread__create_tramp(void *(*)(void *), void *);
73 static void	pthread__initthread(pthread_t);
74 static void	pthread__scrubthread(pthread_t, char *, int);
75 static int	pthread__stackid_setup(void *, size_t, pthread_t *);
76 static int	pthread__stackalloc(pthread_t *);
77 static void	pthread__initmain(pthread_t *);
78 
79 int pthread__started;
80 
81 pthread_spin_t pthread__allqueue_lock = __SIMPLELOCK_UNLOCKED;
82 pthread_spin_t pthread__deadqueue_lock = __SIMPLELOCK_UNLOCKED;
83 pthread_queue_t pthread__allqueue;
84 pthread_queue_t pthread__deadqueue;
85 
86 static pthread_attr_t pthread_default_attr;
87 
88 enum {
89 	DIAGASSERT_ABORT =	1<<0,
90 	DIAGASSERT_STDERR =	1<<1,
91 	DIAGASSERT_SYSLOG =	1<<2
92 };
93 
94 static int pthread__diagassert = DIAGASSERT_ABORT | DIAGASSERT_STDERR;
95 
96 int pthread__concurrency;
97 int pthread__nspins;
98 int pthread__unpark_max = PTHREAD__UNPARK_MAX;
99 int pthread__osrev;
100 
101 /*
102  * We have to initialize the pthread_stack* variables here because
103  * mutexes are used before pthread_init() and thus pthread__initmain()
104  * are called.  Since mutexes only save the stack pointer and not a
105  * pointer to the thread data, it is safe to change the mapping from
106  * stack pointer to thread data afterwards.
107  */
108 #define	_STACKSIZE_LG 18
109 int	pthread__stacksize_lg = _STACKSIZE_LG;
110 size_t	pthread__stacksize = 1 << _STACKSIZE_LG;
111 vaddr_t	pthread__stackmask = (1 << _STACKSIZE_LG) - 1;
112 #undef	_STACKSIZE_LG
113 
114 int _sys___sigprocmask14(int, const sigset_t *, sigset_t *);
115 
116 __strong_alias(__libc_thr_self,pthread_self)
117 __strong_alias(__libc_thr_create,pthread_create)
118 __strong_alias(__libc_thr_exit,pthread_exit)
119 __strong_alias(__libc_thr_errno,pthread__errno)
120 __strong_alias(__libc_thr_setcancelstate,pthread_setcancelstate)
121 
122 /*
123  * Static library kludge.  Place a reference to a symbol any library
124  * file which does not already have a reference here.
125  */
126 extern int pthread__cancel_stub_binder;
127 
128 void *pthread__static_lib_binder[] = {
129 	&pthread__cancel_stub_binder,
130 	pthread_cond_init,
131 	pthread_mutex_init,
132 	pthread_rwlock_init,
133 	pthread_barrier_init,
134 	pthread_key_create,
135 	pthread_setspecific,
136 };
137 
138 /*
139  * This needs to be started by the library loading code, before main()
140  * gets to run, for various things that use the state of the initial thread
141  * to work properly (thread-specific data is an application-visible example;
142  * spinlock counts for mutexes is an internal example).
143  */
144 void
145 pthread_init(void)
146 {
147 	pthread_t first;
148 	char *p;
149 	int i, mib[2];
150 	size_t len;
151 	extern int __isthreaded;
152 
153 	mib[0] = CTL_HW;
154 	mib[1] = HW_NCPU;
155 
156 	len = sizeof(pthread__concurrency);
157 	if (sysctl(mib, 2, &pthread__concurrency, &len, NULL, 0) == -1)
158 		err(1, "sysctl(hw.ncpu");
159 
160 	mib[0] = CTL_KERN;
161 	mib[1] = KERN_OSREV;
162 
163 	len = sizeof(pthread__osrev);
164 	if (sysctl(mib, 2, &pthread__osrev, &len, NULL, 0) == -1)
165 		err(1, "sysctl(hw.osrevision");
166 
167 	/* Initialize locks first; they're needed elsewhere. */
168 	pthread__lockprim_init();
169 
170 	/* Fetch parameters. */
171 	i = (int)_lwp_unpark_all(NULL, 0, NULL);
172 	if (i == -1)
173 		err(1, "_lwp_unpark_all");
174 	if (i < pthread__unpark_max)
175 		pthread__unpark_max = i;
176 
177 	/* Basic data structure setup */
178 	pthread_attr_init(&pthread_default_attr);
179 	PTQ_INIT(&pthread__allqueue);
180 	PTQ_INIT(&pthread__deadqueue);
181 
182 	/* Create the thread structure corresponding to main() */
183 	pthread__initmain(&first);
184 	pthread__initthread(first);
185 	pthread__scrubthread(first, NULL, 0);
186 
187 	first->pt_lid = _lwp_self();
188 	PTQ_INSERT_HEAD(&pthread__allqueue, first, pt_allq);
189 
190 	/* Start subsystems */
191 	PTHREAD_MD_INIT
192 	pthread__debug_init();
193 
194 	for (p = getenv("PTHREAD_DIAGASSERT"); p && *p; p++) {
195 		switch (*p) {
196 		case 'a':
197 			pthread__diagassert |= DIAGASSERT_ABORT;
198 			break;
199 		case 'A':
200 			pthread__diagassert &= ~DIAGASSERT_ABORT;
201 			break;
202 		case 'e':
203 			pthread__diagassert |= DIAGASSERT_STDERR;
204 			break;
205 		case 'E':
206 			pthread__diagassert &= ~DIAGASSERT_STDERR;
207 			break;
208 		case 'l':
209 			pthread__diagassert |= DIAGASSERT_SYSLOG;
210 			break;
211 		case 'L':
212 			pthread__diagassert &= ~DIAGASSERT_SYSLOG;
213 			break;
214 		}
215 	}
216 
217 
218 	/* Tell libc that we're here and it should role-play accordingly. */
219 	__isthreaded = 1;
220 }
221 
222 static void
223 pthread__child_callback(void)
224 {
225 	/*
226 	 * Clean up data structures that a forked child process might
227 	 * trip over. Note that if threads have been created (causing
228 	 * this handler to be registered) the standards say that the
229 	 * child will trigger undefined behavior if it makes any
230 	 * pthread_* calls (or any other calls that aren't
231 	 * async-signal-safe), so we don't really have to clean up
232 	 * much. Anything that permits some pthread_* calls to work is
233 	 * merely being polite.
234 	 */
235 	pthread__started = 0;
236 }
237 
238 static void
239 pthread__start(void)
240 {
241 
242 	/*
243 	 * Per-process timers are cleared by fork(); despite the
244 	 * various restrictions on fork() and threads, it's legal to
245 	 * fork() before creating any threads.
246 	 */
247 	pthread_atfork(NULL, NULL, pthread__child_callback);
248 	SDPRINTF(("(pthread__start %p) Started.\n", pthread__self()));
249 }
250 
251 
252 /* General-purpose thread data structure sanitization. */
253 /* ARGSUSED */
254 static void
255 pthread__initthread(pthread_t t)
256 {
257 
258 	t->pt_magic = PT_MAGIC;
259 	t->pt_spinlocks = 0;
260 	t->pt_willpark = 0;
261 	t->pt_unpark = 0;
262 	t->pt_sleeponq = 0;
263 	t->pt_sleepobj = NULL;
264 	t->pt_signalled = 0;
265 	t->pt_havespecific = 0;
266 
267 	pthread_lockinit(&t->pt_lock);
268 	PTQ_INIT(&t->pt_cleanup_stack);
269 	PTQ_INIT(&t->pt_joiners);
270 	memset(&t->pt_specific, 0, sizeof(int) * PTHREAD_KEYS_MAX);
271 }
272 
273 static void
274 pthread__scrubthread(pthread_t t, char *name, int flags)
275 {
276 
277 	t->pt_state = PT_STATE_RUNNING;
278 	t->pt_exitval = NULL;
279 	t->pt_flags = flags;
280 	t->pt_cancel = 0;
281 	t->pt_errno = 0;
282 	t->pt_name = name;
283 	t->pt_lid = 0;
284 }
285 
286 
287 int
288 pthread_create(pthread_t *thread, const pthread_attr_t *attr,
289 	    void *(*startfunc)(void *), void *arg)
290 {
291 	pthread_t newthread;
292 	pthread_attr_t nattr;
293 	struct pthread_attr_private *p;
294 	char * volatile name;
295 	unsigned long flag;
296 	int ret;
297 
298 	PTHREADD_ADD(PTHREADD_CREATE);
299 
300 	/*
301 	 * It's okay to check this without a lock because there can
302 	 * only be one thread before it becomes true.
303 	 */
304 	if (pthread__started == 0) {
305 		pthread__start();
306 		pthread__started = 1;
307 	}
308 
309 	if (attr == NULL)
310 		nattr = pthread_default_attr;
311 	else if (attr->pta_magic == PT_ATTR_MAGIC)
312 		nattr = *attr;
313 	else
314 		return EINVAL;
315 
316 	/* Fetch misc. attributes from the attr structure. */
317 	name = NULL;
318 	if ((p = nattr.pta_private) != NULL)
319 		if (p->ptap_name[0] != '\0')
320 			if ((name = strdup(p->ptap_name)) == NULL)
321 				return ENOMEM;
322 
323 	newthread = NULL;
324 
325 	/*
326 	 * Try to reclaim a dead thread.
327 	 */
328 	if (!PTQ_EMPTY(&pthread__deadqueue)) {
329 		pthread_spinlock(&pthread__deadqueue_lock);
330 		newthread = PTQ_FIRST(&pthread__deadqueue);
331 		if (newthread != NULL) {
332 			PTQ_REMOVE(&pthread__deadqueue, newthread, pt_deadq);
333 			pthread_spinunlock(&pthread__deadqueue_lock);
334 			if ((newthread->pt_flags & PT_FLAG_DETACHED) != 0) {
335 				/* Still running? */
336 				if (_lwp_kill(newthread->pt_lid, 0) == 0 ||
337 				    errno != ESRCH) {
338 					pthread_spinlock(
339 					    &pthread__deadqueue_lock);
340 					PTQ_INSERT_TAIL(&pthread__deadqueue,
341 					    newthread, pt_deadq);
342 					pthread_spinunlock(
343 					    &pthread__deadqueue_lock);
344 					newthread = NULL;
345 				}
346 			}
347 		} else
348 			pthread_spinunlock(&pthread__deadqueue_lock);
349 	}
350 
351 	/*
352 	 * If necessary set up a stack, allocate space for a pthread_st,
353 	 * and initialize it.
354 	 */
355 	if (newthread == NULL) {
356 		ret = pthread__stackalloc(&newthread);
357 		if (ret != 0) {
358 			if (name)
359 				free(name);
360 			return ret;
361 		}
362 
363 		/* This is used only when creating the thread. */
364 		_INITCONTEXT_U(&newthread->pt_uc);
365 #ifdef PTHREAD_MACHINE_HAS_ID_REGISTER
366 		pthread__uc_id(&newthread->pt_uc) = newthread;
367 #endif
368 		newthread->pt_uc.uc_stack = newthread->pt_stack;
369 		newthread->pt_uc.uc_link = NULL;
370 
371 		/* Add to list of all threads. */
372 		pthread_spinlock(&pthread__allqueue_lock);
373 		PTQ_INSERT_HEAD(&pthread__allqueue, newthread, pt_allq);
374 		pthread_spinunlock(&pthread__allqueue_lock);
375 
376 		/* Will be reset by the thread upon exit. */
377 		pthread__initthread(newthread);
378 	}
379 
380 	/*
381 	 * Create the new LWP.
382 	 */
383 	pthread__scrubthread(newthread, name, nattr.pta_flags);
384 	makecontext(&newthread->pt_uc, pthread__create_tramp, 2,
385 	    startfunc, arg);
386 
387 	flag = 0;
388 	if ((newthread->pt_flags & PT_FLAG_SUSPENDED) != 0)
389 		flag |= LWP_SUSPENDED;
390 	if ((newthread->pt_flags & PT_FLAG_DETACHED) != 0)
391 		flag |= LWP_DETACHED;
392 	ret = _lwp_create(&newthread->pt_uc, flag, &newthread->pt_lid);
393 	if (ret != 0) {
394 		SDPRINTF(("(pthread_create %p) _lwp_create: %s\n",
395 		    strerror(errno)));
396 		free(name);
397 		newthread->pt_state = PT_STATE_DEAD;
398 		pthread_spinlock(&pthread__deadqueue_lock);
399 		PTQ_INSERT_HEAD(&pthread__deadqueue, newthread, pt_deadq);
400 		pthread_spinunlock(&pthread__deadqueue_lock);
401 		return ret;
402 	}
403 
404 	/* XXX must die */
405 	newthread->pt_num = newthread->pt_lid;
406 
407 	SDPRINTF(("(pthread_create %p) new thread %p (name %p, lid %d).\n",
408 		  pthread__self(), newthread, newthread->pt_name,
409 		  (int)newthread->pt_lid));
410 
411 	*thread = newthread;
412 
413 	return 0;
414 }
415 
416 
417 static void
418 pthread__create_tramp(void *(*start)(void *), void *arg)
419 {
420 	void *retval;
421 
422 	/*
423 	 * Throw away some stack in a feeble attempt to reduce cache
424 	 * thrash.  May help for SMT processors.  XXX We should not
425 	 * be allocating stacks on fixed 2MB boundaries.  Needs a
426 	 * thread register or decent thread local storage.
427 	 */
428 	(void)alloca(((unsigned)pthread__self()->pt_lid & 7) << 8);
429 
430 	retval = (*start)(arg);
431 
432 	pthread_exit(retval);
433 
434 	/*NOTREACHED*/
435 	pthread__abort();
436 }
437 
438 int
439 pthread_suspend_np(pthread_t thread)
440 {
441 	pthread_t self;
442 
443 	self = pthread__self();
444 	if (self == thread) {
445 		return EDEADLK;
446 	}
447 #ifdef ERRORCHECK
448 	if (pthread__find(thread) != 0)
449 		return ESRCH;
450 #endif
451 	SDPRINTF(("(pthread_suspend_np %p) Suspend thread %p.\n",
452 	    pthread__self(), thread));
453 
454 	if (_lwp_suspend(thread->pt_lid) == 0)
455 		return 0;
456 	return errno;
457 }
458 
459 int
460 pthread_resume_np(pthread_t thread)
461 {
462 
463 #ifdef ERRORCHECK
464 	if (pthread__find(thread) != 0)
465 		return ESRCH;
466 #endif
467 	SDPRINTF(("(pthread_resume_np %p) Resume thread %p.\n",
468 	    pthread__self(), thread));
469 
470 	if (_lwp_continue(thread->pt_lid) == 0)
471 		return 0;
472 	return errno;
473 }
474 
475 void
476 pthread_exit(void *retval)
477 {
478 	pthread_t self;
479 	struct pt_clean_t *cleanup;
480 	char *name;
481 
482 	self = pthread__self();
483 	SDPRINTF(("(pthread_exit %p) status %p, flags %x, cancel %d\n",
484 		  self, retval, self->pt_flags, self->pt_cancel));
485 
486 	/* Disable cancellability. */
487 	pthread_spinlock(&self->pt_lock);
488 	self->pt_flags |= PT_FLAG_CS_DISABLED;
489 	self->pt_cancel = 0;
490 	pthread_spinunlock(&self->pt_lock);
491 
492 	/* Call any cancellation cleanup handlers */
493 	while (!PTQ_EMPTY(&self->pt_cleanup_stack)) {
494 		cleanup = PTQ_FIRST(&self->pt_cleanup_stack);
495 		PTQ_REMOVE(&self->pt_cleanup_stack, cleanup, ptc_next);
496 		(*cleanup->ptc_cleanup)(cleanup->ptc_arg);
497 	}
498 
499 	/* Perform cleanup of thread-specific data */
500 	pthread__destroy_tsd(self);
501 
502 	self->pt_exitval = retval;
503 
504 	pthread_spinlock(&self->pt_lock);
505 	if (self->pt_flags & PT_FLAG_DETACHED) {
506 		self->pt_state = PT_STATE_DEAD;
507 		name = self->pt_name;
508 		self->pt_name = NULL;
509 		pthread_spinlock(&pthread__deadqueue_lock);
510 		PTQ_INSERT_TAIL(&pthread__deadqueue, self, pt_deadq);
511 		pthread_spinunlock(&pthread__deadqueue_lock);
512 		pthread_spinunlock(&self->pt_lock);
513 		if (name != NULL)
514 			free(name);
515 		_lwp_exit();
516 	} else {
517 		self->pt_state = PT_STATE_ZOMBIE;
518 		pthread_spinunlock(&self->pt_lock);
519 		/* Note: name will be freed by the joiner. */
520 		_lwp_exit();
521 	}
522 
523 	/*NOTREACHED*/
524 	pthread__abort();
525 	exit(1);
526 }
527 
528 
529 int
530 pthread_join(pthread_t thread, void **valptr)
531 {
532 	pthread_t self;
533 	char *name;
534 
535 	self = pthread__self();
536 	SDPRINTF(("(pthread_join %p) Joining %p.\n", self, thread));
537 
538 	if (pthread__find(thread) != 0)
539 		return ESRCH;
540 
541 	if (thread->pt_magic != PT_MAGIC)
542 		return EINVAL;
543 
544 	if (thread == self)
545 		return EDEADLK;
546 
547 	/*
548 	 * IEEE Std 1003.1, 2004 Edition:
549 	 *
550 	 * "The pthread_join() function shall not return an
551 	 * error code of [EINTR]."
552 	 */
553 	while (_lwp_wait(thread->pt_lid, NULL) != 0) {
554 		if (errno != EINTR)
555 			return errno;
556 	}
557 
558 	/*
559 	 * No need to lock - nothing else should (legally) be
560 	 * interested in the thread's state at this point.
561 	 *
562 	 * _lwp_wait() provides a barrier, so the user level
563 	 * thread state will be visible to us at this point.
564 	 */
565 	if (thread->pt_state != PT_STATE_ZOMBIE) {
566 		pthread__errorfunc(__FILE__, __LINE__, __func__,
567 		    "not a zombie");
568 	}
569 	if (valptr != NULL)
570 		*valptr = thread->pt_exitval;
571 	name = thread->pt_name;
572 	thread->pt_name = NULL;
573 	thread->pt_state = PT_STATE_DEAD;
574 	pthread_spinlock(&pthread__deadqueue_lock);
575 	PTQ_INSERT_HEAD(&pthread__deadqueue, thread, pt_deadq);
576 	pthread_spinunlock(&pthread__deadqueue_lock);
577 	SDPRINTF(("(pthread_join %p) Joined %p.\n", self, thread));
578 	if (name != NULL)
579 		free(name);
580 	return 0;
581 }
582 
583 
584 int
585 pthread_equal(pthread_t t1, pthread_t t2)
586 {
587 
588 	/* Nothing special here. */
589 	return (t1 == t2);
590 }
591 
592 
593 int
594 pthread_detach(pthread_t thread)
595 {
596 	pthread_t self;
597 
598 	self = pthread__self();
599 
600 	if (pthread__find(thread) != 0)
601 		return ESRCH;
602 
603 	if (thread->pt_magic != PT_MAGIC)
604 		return EINVAL;
605 
606 	pthread_spinlock(&thread->pt_lock);
607 	thread->pt_flags |= PT_FLAG_DETACHED;
608 	pthread_spinunlock(&thread->pt_lock);
609 
610 	if (_lwp_detach(thread->pt_lid) == 0)
611 		return 0;
612 	return errno;
613 }
614 
615 
616 int
617 pthread_getname_np(pthread_t thread, char *name, size_t len)
618 {
619 
620 	if (pthread__find(thread) != 0)
621 		return ESRCH;
622 
623 	if (thread->pt_magic != PT_MAGIC)
624 		return EINVAL;
625 
626 	pthread_spinlock(&thread->pt_lock);
627 	if (thread->pt_name == NULL)
628 		name[0] = '\0';
629 	else
630 		strlcpy(name, thread->pt_name, len);
631 	pthread_spinunlock(&thread->pt_lock);
632 
633 	return 0;
634 }
635 
636 
637 int
638 pthread_setname_np(pthread_t thread, const char *name, void *arg)
639 {
640 	char *oldname, *cp, newname[PTHREAD_MAX_NAMELEN_NP];
641 	int namelen;
642 
643 	if (pthread__find(thread) != 0)
644 		return ESRCH;
645 
646 	if (thread->pt_magic != PT_MAGIC)
647 		return EINVAL;
648 
649 	namelen = snprintf(newname, sizeof(newname), name, arg);
650 	if (namelen >= PTHREAD_MAX_NAMELEN_NP)
651 		return EINVAL;
652 
653 	cp = strdup(newname);
654 	if (cp == NULL)
655 		return ENOMEM;
656 
657 	pthread_spinlock(&thread->pt_lock);
658 	oldname = thread->pt_name;
659 	thread->pt_name = cp;
660 	pthread_spinunlock(&thread->pt_lock);
661 
662 	if (oldname != NULL)
663 		free(oldname);
664 
665 	return 0;
666 }
667 
668 
669 
670 /*
671  * XXX There should be a way for applications to use the efficent
672  *  inline version, but there are opacity/namespace issues.
673  */
674 pthread_t
675 pthread_self(void)
676 {
677 
678 	return pthread__self();
679 }
680 
681 
682 int
683 pthread_cancel(pthread_t thread)
684 {
685 
686 	if (pthread__find(thread) != 0)
687 		return ESRCH;
688 	pthread_spinlock(&thread->pt_lock);
689 	thread->pt_flags |= PT_FLAG_CS_PENDING;
690 	if ((thread->pt_flags & PT_FLAG_CS_DISABLED) == 0) {
691 		thread->pt_cancel = 1;
692 		pthread_spinunlock(&thread->pt_lock);
693 		_lwp_wakeup(thread->pt_lid);
694 	} else
695 		pthread_spinunlock(&thread->pt_lock);
696 
697 	return 0;
698 }
699 
700 
701 int
702 pthread_setcancelstate(int state, int *oldstate)
703 {
704 	pthread_t self;
705 	int retval;
706 
707 	self = pthread__self();
708 	retval = 0;
709 
710 	pthread_spinlock(&self->pt_lock);
711 
712 	if (oldstate != NULL) {
713 		if (self->pt_flags & PT_FLAG_CS_DISABLED)
714 			*oldstate = PTHREAD_CANCEL_DISABLE;
715 		else
716 			*oldstate = PTHREAD_CANCEL_ENABLE;
717 	}
718 
719 	if (state == PTHREAD_CANCEL_DISABLE) {
720 		self->pt_flags |= PT_FLAG_CS_DISABLED;
721 		if (self->pt_cancel) {
722 			self->pt_flags |= PT_FLAG_CS_PENDING;
723 			self->pt_cancel = 0;
724 		}
725 	} else if (state == PTHREAD_CANCEL_ENABLE) {
726 		self->pt_flags &= ~PT_FLAG_CS_DISABLED;
727 		/*
728 		 * If a cancellation was requested while cancellation
729 		 * was disabled, note that fact for future
730 		 * cancellation tests.
731 		 */
732 		if (self->pt_flags & PT_FLAG_CS_PENDING) {
733 			self->pt_cancel = 1;
734 			/* This is not a deferred cancellation point. */
735 			if (self->pt_flags & PT_FLAG_CS_ASYNC) {
736 				pthread_spinunlock(&self->pt_lock);
737 				pthread_exit(PTHREAD_CANCELED);
738 			}
739 		}
740 	} else
741 		retval = EINVAL;
742 
743 	pthread_spinunlock(&self->pt_lock);
744 
745 	return retval;
746 }
747 
748 
749 int
750 pthread_setcanceltype(int type, int *oldtype)
751 {
752 	pthread_t self;
753 	int retval;
754 
755 	self = pthread__self();
756 	retval = 0;
757 
758 	pthread_spinlock(&self->pt_lock);
759 
760 	if (oldtype != NULL) {
761 		if (self->pt_flags & PT_FLAG_CS_ASYNC)
762 			*oldtype = PTHREAD_CANCEL_ASYNCHRONOUS;
763 		else
764 			*oldtype = PTHREAD_CANCEL_DEFERRED;
765 	}
766 
767 	if (type == PTHREAD_CANCEL_ASYNCHRONOUS) {
768 		self->pt_flags |= PT_FLAG_CS_ASYNC;
769 		if (self->pt_cancel) {
770 			pthread_spinunlock(&self->pt_lock);
771 			pthread_exit(PTHREAD_CANCELED);
772 		}
773 	} else if (type == PTHREAD_CANCEL_DEFERRED)
774 		self->pt_flags &= ~PT_FLAG_CS_ASYNC;
775 	else
776 		retval = EINVAL;
777 
778 	pthread_spinunlock(&self->pt_lock);
779 
780 	return retval;
781 }
782 
783 
784 void
785 pthread_testcancel()
786 {
787 	pthread_t self;
788 
789 	self = pthread__self();
790 	if (self->pt_cancel)
791 		pthread_exit(PTHREAD_CANCELED);
792 }
793 
794 
795 /*
796  * POSIX requires that certain functions return an error rather than
797  * invoking undefined behavior even when handed completely bogus
798  * pthread_t values, e.g. stack garbage or (pthread_t)666. This
799  * utility routine searches the list of threads for the pthread_t
800  * value without dereferencing it.
801  */
802 int
803 pthread__find(pthread_t id)
804 {
805 	pthread_t target;
806 
807 	pthread_spinlock(&pthread__allqueue_lock);
808 	PTQ_FOREACH(target, &pthread__allqueue, pt_allq)
809 	    if (target == id)
810 		    break;
811 	pthread_spinunlock(&pthread__allqueue_lock);
812 
813 	if (target == NULL || target->pt_state == PT_STATE_DEAD)
814 		return ESRCH;
815 
816 	return 0;
817 }
818 
819 
820 void
821 pthread__testcancel(pthread_t self)
822 {
823 
824 	if (self->pt_cancel)
825 		pthread_exit(PTHREAD_CANCELED);
826 }
827 
828 
829 void
830 pthread__cleanup_push(void (*cleanup)(void *), void *arg, void *store)
831 {
832 	pthread_t self;
833 	struct pt_clean_t *entry;
834 
835 	self = pthread__self();
836 	entry = store;
837 	entry->ptc_cleanup = cleanup;
838 	entry->ptc_arg = arg;
839 	PTQ_INSERT_HEAD(&self->pt_cleanup_stack, entry, ptc_next);
840 }
841 
842 
843 void
844 pthread__cleanup_pop(int ex, void *store)
845 {
846 	pthread_t self;
847 	struct pt_clean_t *entry;
848 
849 	self = pthread__self();
850 	entry = store;
851 
852 	PTQ_REMOVE(&self->pt_cleanup_stack, entry, ptc_next);
853 	if (ex)
854 		(*entry->ptc_cleanup)(entry->ptc_arg);
855 }
856 
857 
858 int *
859 pthread__errno(void)
860 {
861 	pthread_t self;
862 
863 	self = pthread__self();
864 
865 	return &(self->pt_errno);
866 }
867 
868 ssize_t	_sys_write(int, const void *, size_t);
869 
870 void
871 pthread__assertfunc(const char *file, int line, const char *function,
872 		    const char *expr)
873 {
874 	char buf[1024];
875 	int len;
876 
877 	SDPRINTF(("(af)\n"));
878 
879 	/*
880 	 * snprintf should not acquire any locks, or we could
881 	 * end up deadlocked if the assert caller held locks.
882 	 */
883 	len = snprintf(buf, 1024,
884 	    "assertion \"%s\" failed: file \"%s\", line %d%s%s%s\n",
885 	    expr, file, line,
886 	    function ? ", function \"" : "",
887 	    function ? function : "",
888 	    function ? "\"" : "");
889 
890 	_sys_write(STDERR_FILENO, buf, (size_t)len);
891 	(void)kill(getpid(), SIGABRT);
892 
893 	_exit(1);
894 }
895 
896 
897 void
898 pthread__errorfunc(const char *file, int line, const char *function,
899 		   const char *msg)
900 {
901 	char buf[1024];
902 	size_t len;
903 
904 	if (pthread__diagassert == 0)
905 		return;
906 
907 	/*
908 	 * snprintf should not acquire any locks, or we could
909 	 * end up deadlocked if the assert caller held locks.
910 	 */
911 	len = snprintf(buf, 1024,
912 	    "%s: Error detected by libpthread: %s.\n"
913 	    "Detected by file \"%s\", line %d%s%s%s.\n"
914 	    "See pthread(3) for information.\n",
915 	    getprogname(), msg, file, line,
916 	    function ? ", function \"" : "",
917 	    function ? function : "",
918 	    function ? "\"" : "");
919 
920 	if (pthread__diagassert & DIAGASSERT_STDERR)
921 		_sys_write(STDERR_FILENO, buf, len);
922 
923 	if (pthread__diagassert & DIAGASSERT_SYSLOG)
924 		syslog(LOG_DEBUG | LOG_USER, "%s", buf);
925 
926 	if (pthread__diagassert & DIAGASSERT_ABORT) {
927 		(void)kill(getpid(), SIGABRT);
928 		_exit(1);
929 	}
930 }
931 
932 /*
933  * Thread park/unpark operations.  The kernel operations are
934  * modelled after a brief description from "Multithreading in
935  * the Solaris Operating Environment":
936  *
937  * http://www.sun.com/software/whitepapers/solaris9/multithread.pdf
938  */
939 
940 #define	OOPS(msg)			\
941     pthread__errorfunc(__FILE__, __LINE__, __func__, msg)
942 
943 int
944 pthread__park(pthread_t self, pthread_spin_t *lock,
945 	      pthread_queue_t *queue, const struct timespec *abstime,
946 	      int cancelpt, const void *hint)
947 {
948 	int rv, error;
949 
950 	SDPRINTF(("(pthread__park %p) queue %p enter\n", self, queue));
951 
952 	/* Clear the willpark flag, since we're about to block. */
953 	self->pt_willpark = 0;
954 
955 	/*
956 	 * Kernels before 4.99.27 can't park and unpark in one step,
957 	 * so take care of it now if on an old kernel.
958 	 *
959 	 * XXX Remove this check before NetBSD 5.0 is released.
960 	 * It's for compatibility with recent -current only.
961 	 */
962 	if (__predict_false(pthread__osrev < 499002700) &&
963 	    self->pt_unpark != 0) {
964 		_lwp_unpark(self->pt_unpark, self->pt_unparkhint);
965 		self->pt_unpark = 0;
966 	}
967 
968 	/*
969 	 * Wait until we are awoken by a pending unpark operation,
970 	 * a signal, an unpark posted after we have gone asleep,
971 	 * or an expired timeout.
972 	 *
973 	 * It is fine to test the value of both pt_sleepobj and
974 	 * pt_sleeponq without holding any locks, because:
975 	 *
976 	 * o Only the blocking thread (this thread) ever sets them
977 	 *   to a non-NULL value.
978 	 *
979 	 * o Other threads may set them NULL, but if they do so they
980 	 *   must also make this thread return from _lwp_park.
981 	 *
982 	 * o _lwp_park, _lwp_unpark and _lwp_unpark_all are system
983 	 *   calls and all make use of spinlocks in the kernel.  So
984 	 *   these system calls act as full memory barriers, and will
985 	 *   ensure that the calling CPU's store buffers are drained.
986 	 *   In combination with the spinlock release before unpark,
987 	 *   this means that modification of pt_sleepobj/onq by another
988 	 *   thread will become globally visible before that thread
989 	 *   schedules an unpark operation on this thread.
990 	 *
991 	 * Note: the test in the while() statement dodges the park op if
992 	 * we have already been awoken, unless there is another thread to
993 	 * awaken.  This saves a syscall - if we were already awakened,
994 	 * the next call to _lwp_park() would need to return early in order
995 	 * to eat the previous wakeup.
996 	 */
997 	rv = 0;
998 	while ((self->pt_sleepobj != NULL || self->pt_unpark != 0) && rv == 0) {
999 		/*
1000 		 * If we deferred unparking a thread, arrange to
1001 		 * have _lwp_park() restart it before blocking.
1002 		 */
1003 		error = _lwp_park(abstime, self->pt_unpark, hint,
1004 		    self->pt_unparkhint);
1005 		self->pt_unpark = 0;
1006 		if (error != 0) {
1007 			switch (rv = errno) {
1008 			case EINTR:
1009 			case EALREADY:
1010 				rv = 0;
1011 				break;
1012 			case ETIMEDOUT:
1013 				break;
1014 			default:
1015 				OOPS("_lwp_park failed");
1016 				SDPRINTF(("(pthread__park %p) syscall rv=%d\n",
1017 				    self, rv));
1018 				break;
1019 			}
1020 		}
1021 		/* Check for cancellation. */
1022 		if (cancelpt && self->pt_cancel)
1023 			rv = EINTR;
1024 	}
1025 
1026 	/*
1027 	 * If we have been awoken early but are still on the queue,
1028 	 * then remove ourself.  Again, it's safe to do the test
1029 	 * without holding any locks.
1030 	 */
1031 	if (self->pt_sleeponq) {
1032 		pthread_spinlock(lock);
1033 		if (self->pt_sleeponq) {
1034 			PTQ_REMOVE(queue, self, pt_sleep);
1035 			self->pt_sleepobj = NULL;
1036 			self->pt_sleeponq = 0;
1037 		}
1038 		pthread_spinunlock(lock);
1039 	}
1040 
1041 	SDPRINTF(("(pthread__park %p) queue %p exit\n", self, queue));
1042 
1043 	return rv;
1044 }
1045 
1046 void
1047 pthread__unpark(pthread_t self, pthread_spin_t *lock,
1048 		pthread_queue_t *queue, pthread_t target)
1049 {
1050 	int rv;
1051 
1052 	if (target == NULL) {
1053 		pthread_spinunlock(lock);
1054 		return;
1055 	}
1056 
1057 	SDPRINTF(("(pthread__unpark %p) queue %p target %p\n",
1058 	    self, queue, target));
1059 
1060 	/*
1061 	 * Easy: the thread has already been removed from
1062 	 * the queue, so just awaken it.
1063 	 */
1064 	target->pt_sleepobj = NULL;
1065 	target->pt_sleeponq = 0;
1066 
1067 	/*
1068 	 * Releasing the spinlock serves as a store barrier,
1069 	 * which ensures that all our modifications are visible
1070 	 * to the thread in pthread__park() before the unpark
1071 	 * operation is set in motion.
1072 	 */
1073 	pthread_spinunlock(lock);
1074 
1075 	/*
1076 	 * If the calling thread is about to block, defer
1077 	 * unparking the target until _lwp_park() is called.
1078 	 */
1079 	if (self->pt_willpark && self->pt_unpark == 0) {
1080 		self->pt_unpark = target->pt_lid;
1081 		self->pt_unparkhint = queue;
1082 	} else {
1083 		rv = _lwp_unpark(target->pt_lid, queue);
1084 		if (rv != 0 && errno != EALREADY && errno != EINTR) {
1085 			SDPRINTF(("(pthread__unpark %p) syscall rv=%d\n",
1086 			    self, rv));
1087 			OOPS("_lwp_unpark failed");
1088 		}
1089 	}
1090 }
1091 
1092 void
1093 pthread__unpark_all(pthread_t self, pthread_spin_t *lock,
1094 		    pthread_queue_t *queue)
1095 {
1096 	lwpid_t waiters[PTHREAD__UNPARK_MAX];
1097 	ssize_t n, rv;
1098 	pthread_t thread, next;
1099 
1100 	if (PTQ_EMPTY(queue)) {
1101 		pthread_spinunlock(lock);
1102 		return;
1103 	}
1104 
1105 	/*
1106 	 * First, clear all sleepobj pointers, since we can release the
1107 	 * spin lock before awkening everybody, and must synchronise with
1108 	 * pthread__park().
1109 	 */
1110 	PTQ_FOREACH(thread, queue, pt_sleep) {
1111 		thread->pt_sleepobj = NULL;
1112 		if (thread == PTQ_NEXT(thread, pt_sleep))
1113 			OOPS("unpark: thread linked to self");
1114 	}
1115 
1116 	for (;;) {
1117 		thread = PTQ_FIRST(queue);
1118 		for (n = 0; n < pthread__unpark_max && thread != NULL;
1119 		    thread = next) {
1120 			/*
1121 			 * If the sleepobj pointer is non-NULL, it
1122 			 * means one of two things:
1123 			 *
1124 			 * o The thread has awoken early, spun
1125 			 *   through application code and is
1126 			 *   once more asleep on this object.
1127 			 *
1128 			 * o This is a new thread that has blocked
1129 			 *   on the object after we have released
1130 			 *   the interlock in this loop.
1131 			 *
1132 			 * In both cases we shouldn't remove the
1133 			 * thread from the queue.
1134 			 */
1135 			next = PTQ_NEXT(thread, pt_sleep);
1136 			if (thread->pt_sleepobj != NULL)
1137 			    	continue;
1138 			thread->pt_sleeponq = 0;
1139 			waiters[n++] = thread->pt_lid;
1140 			PTQ_REMOVE(queue, thread, pt_sleep);
1141 			SDPRINTF(("(pthread__unpark_all %p) queue %p "
1142 			    "unpark %p\n", self, queue, thread));
1143 		}
1144 
1145 		/*
1146 		 * Releasing the spinlock serves as a store barrier,
1147 		 * which ensures that all our modifications are visible
1148 		 * to the thread in pthread__park() before the unpark
1149 		 * operation is set in motion.
1150 		 */
1151 		pthread_spinunlock(lock);
1152 		switch (n) {
1153 		case 0:
1154 			return;
1155 		case 1:
1156 			/*
1157 			 * If the calling thread is about to block,
1158 			 * defer unparking the target until _lwp_park()
1159 			 * is called.
1160 			 */
1161 			if (self->pt_willpark && self->pt_unpark == 0) {
1162 				self->pt_unpark = waiters[0];
1163 				self->pt_unparkhint = queue;
1164 				return;
1165 			}
1166 			rv = (ssize_t)_lwp_unpark(waiters[0], queue);
1167 			if (rv != 0 && errno != EALREADY && errno != EINTR) {
1168 				OOPS("_lwp_unpark failed");
1169 				SDPRINTF(("(pthread__unpark_all %p) "
1170 				    "syscall rv=%d\n", self, rv));
1171 			}
1172 			return;
1173 		default:
1174 			rv = _lwp_unpark_all(waiters, (size_t)n, queue);
1175 			if (rv != 0 && errno != EINTR) {
1176 				OOPS("_lwp_unpark_all failed");
1177 				SDPRINTF(("(pthread__unpark_all %p) "
1178 				    "syscall rv=%d\n", self, rv));
1179 			}
1180 			break;
1181 		}
1182 		pthread_spinlock(lock);
1183 	}
1184 }
1185 
1186 #undef	OOPS
1187 
1188 /*
1189  * Allocate a stack for a thread, and set it up. It needs to be aligned, so
1190  * that a thread can find itself by its stack pointer.
1191  */
1192 static int
1193 pthread__stackalloc(pthread_t *newt)
1194 {
1195 	void *addr;
1196 
1197 	addr = mmap(NULL, pthread__stacksize, PROT_READ|PROT_WRITE,
1198 	    MAP_ANON|MAP_PRIVATE | MAP_ALIGNED(pthread__stacksize_lg),
1199 	    -1, (off_t)0);
1200 
1201 	if (addr == MAP_FAILED)
1202 		return ENOMEM;
1203 
1204 	pthread__assert(((intptr_t)addr & pthread__stackmask) == 0);
1205 
1206 	return pthread__stackid_setup(addr, pthread__stacksize, newt);
1207 }
1208 
1209 
1210 /*
1211  * Set up the slightly special stack for the "initial" thread, which
1212  * runs on the normal system stack, and thus gets slightly different
1213  * treatment.
1214  */
1215 static void
1216 pthread__initmain(pthread_t *newt)
1217 {
1218 	struct rlimit slimit;
1219 	size_t pagesize;
1220 	pthread_t t;
1221 	void *base;
1222 	size_t size;
1223 	int error, ret;
1224 	char *value;
1225 
1226 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1227 	pthread__stacksize = 0;
1228 	ret = getrlimit(RLIMIT_STACK, &slimit);
1229 	if (ret == -1)
1230 		err(1, "Couldn't get stack resource consumption limits");
1231 	value = getenv("PTHREAD_STACKSIZE");
1232 	if (value) {
1233 		pthread__stacksize = atoi(value) * 1024;
1234 		if (pthread__stacksize > slimit.rlim_cur)
1235 			pthread__stacksize = (size_t)slimit.rlim_cur;
1236 	}
1237 	if (pthread__stacksize == 0)
1238 		pthread__stacksize = (size_t)slimit.rlim_cur;
1239 	if (pthread__stacksize < 4 * pagesize)
1240 		errx(1, "Stacksize limit is too low, minimum %zd kbyte.",
1241 		    4 * pagesize / 1024);
1242 
1243 	pthread__stacksize_lg = -1;
1244 	while (pthread__stacksize) {
1245 		pthread__stacksize >>= 1;
1246 		pthread__stacksize_lg++;
1247 	}
1248 
1249 	pthread__stacksize = (1 << pthread__stacksize_lg);
1250 	pthread__stackmask = pthread__stacksize - 1;
1251 
1252 	base = (void *)(pthread__sp() & ~pthread__stackmask);
1253 	size = pthread__stacksize;
1254 
1255 	error = pthread__stackid_setup(base, size, &t);
1256 	if (error) {
1257 		/* XXX */
1258 		errx(2, "failed to setup main thread: error=%d", error);
1259 	}
1260 
1261 	*newt = t;
1262 }
1263 
1264 static int
1265 /*ARGSUSED*/
1266 pthread__stackid_setup(void *base, size_t size, pthread_t *tp)
1267 {
1268 	pthread_t t;
1269 	void *redaddr;
1270 	size_t pagesize;
1271 	int ret;
1272 
1273 	t = base;
1274 	pagesize = (size_t)sysconf(_SC_PAGESIZE);
1275 
1276 	/*
1277 	 * Put a pointer to the pthread in the bottom (but
1278          * redzone-protected section) of the stack.
1279 	 */
1280 	redaddr = STACK_SHRINK(STACK_MAX(base, size), pagesize);
1281 	t->pt_stack.ss_size = size - 2 * pagesize;
1282 #ifdef __MACHINE_STACK_GROWS_UP
1283 	t->pt_stack.ss_sp = (char *)(void *)base + pagesize;
1284 #else
1285 	t->pt_stack.ss_sp = (char *)(void *)base + 2 * pagesize;
1286 #endif
1287 
1288 	/* Protect the next-to-bottom stack page as a red zone. */
1289 	ret = mprotect(redaddr, pagesize, PROT_NONE);
1290 	if (ret == -1) {
1291 		return errno;
1292 	}
1293 	*tp = t;
1294 	return 0;
1295 }
1296