xref: /onnv-gate/usr/src/uts/common/os/schedctl.c (revision 6036:c98c367c32cb)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53247Sgjelinek  * Common Development and Distribution License (the "License").
63247Sgjelinek  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
215891Sraf 
220Sstevel@tonic-gate /*
235891Sraf  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <sys/types.h>
300Sstevel@tonic-gate #include <sys/systm.h>
310Sstevel@tonic-gate #include <sys/schedctl.h>
320Sstevel@tonic-gate #include <sys/proc.h>
330Sstevel@tonic-gate #include <sys/thread.h>
340Sstevel@tonic-gate #include <sys/class.h>
350Sstevel@tonic-gate #include <sys/cred.h>
360Sstevel@tonic-gate #include <sys/kmem.h>
370Sstevel@tonic-gate #include <sys/cmn_err.h>
380Sstevel@tonic-gate #include <sys/stack.h>
390Sstevel@tonic-gate #include <sys/debug.h>
400Sstevel@tonic-gate #include <sys/cpuvar.h>
410Sstevel@tonic-gate #include <sys/sobject.h>
420Sstevel@tonic-gate #include <sys/door.h>
430Sstevel@tonic-gate #include <sys/modctl.h>
440Sstevel@tonic-gate #include <sys/syscall.h>
450Sstevel@tonic-gate #include <sys/sysmacros.h>
460Sstevel@tonic-gate #include <sys/vmsystm.h>
470Sstevel@tonic-gate #include <sys/mman.h>
480Sstevel@tonic-gate #include <sys/vnode.h>
490Sstevel@tonic-gate #include <sys/swap.h>
500Sstevel@tonic-gate #include <sys/lwp.h>
510Sstevel@tonic-gate #include <sys/bitmap.h>
520Sstevel@tonic-gate #include <sys/atomic.h>
530Sstevel@tonic-gate #include <sys/fcntl.h>
540Sstevel@tonic-gate #include <vm/seg_kp.h>
550Sstevel@tonic-gate #include <vm/seg_vn.h>
560Sstevel@tonic-gate #include <vm/as.h>
570Sstevel@tonic-gate #include <fs/fs_subr.h>
580Sstevel@tonic-gate 
590Sstevel@tonic-gate /*
600Sstevel@tonic-gate  * Page handling structures.  This is set up as a list of per-page
610Sstevel@tonic-gate  * control structures (sc_page_ctl), with p->p_pagep pointing to
620Sstevel@tonic-gate  * the first.  The per-page structures point to the actual pages
630Sstevel@tonic-gate  * and contain pointers to the user address for each mapped page.
640Sstevel@tonic-gate  *
650Sstevel@tonic-gate  * All data is protected by p->p_sc_lock.  Since this lock is
660Sstevel@tonic-gate  * held while waiting for memory, schedctl_shared_alloc() should
670Sstevel@tonic-gate  * not be called while holding p_lock.
680Sstevel@tonic-gate  */
690Sstevel@tonic-gate 
700Sstevel@tonic-gate typedef struct sc_page_ctl {
710Sstevel@tonic-gate 	struct sc_page_ctl *spc_next;
720Sstevel@tonic-gate 	sc_shared_t	*spc_base;	/* base of kernel page */
730Sstevel@tonic-gate 	sc_shared_t	*spc_end;	/* end of usable space */
740Sstevel@tonic-gate 	ulong_t		*spc_map;	/* bitmap of allocated space on page */
750Sstevel@tonic-gate 	size_t		spc_space;	/* amount of space on page */
760Sstevel@tonic-gate 	caddr_t		spc_uaddr;	/* user-level address of the page */
770Sstevel@tonic-gate 	struct anon_map	*spc_amp;	/* anonymous memory structure */
780Sstevel@tonic-gate } sc_page_ctl_t;
790Sstevel@tonic-gate 
800Sstevel@tonic-gate static size_t	sc_pagesize;		/* size of usable space on page */
810Sstevel@tonic-gate static size_t	sc_bitmap_len;		/* # of bits in allocation bitmap */
820Sstevel@tonic-gate static size_t	sc_bitmap_words;	/* # of words in allocation bitmap */
830Sstevel@tonic-gate 
840Sstevel@tonic-gate /* Context ops */
850Sstevel@tonic-gate static void	schedctl_save(sc_shared_t *);
860Sstevel@tonic-gate static void	schedctl_restore(sc_shared_t *);
870Sstevel@tonic-gate static void	schedctl_fork(kthread_t *, kthread_t *);
880Sstevel@tonic-gate 
890Sstevel@tonic-gate /* Functions for handling shared pages */
900Sstevel@tonic-gate static int	schedctl_shared_alloc(sc_shared_t **, uintptr_t *);
910Sstevel@tonic-gate static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *);
920Sstevel@tonic-gate static int	schedctl_map(struct anon_map *, caddr_t *, caddr_t);
930Sstevel@tonic-gate static int	schedctl_getpage(struct anon_map **, caddr_t *);
940Sstevel@tonic-gate static void	schedctl_freepage(struct anon_map *, caddr_t);
950Sstevel@tonic-gate 
960Sstevel@tonic-gate /*
970Sstevel@tonic-gate  * System call interface to scheduler activations.
980Sstevel@tonic-gate  * This always operates on the current lwp.
990Sstevel@tonic-gate  */
1000Sstevel@tonic-gate caddr_t
1010Sstevel@tonic-gate schedctl(void)
1020Sstevel@tonic-gate {
1030Sstevel@tonic-gate 	kthread_t	*t = curthread;
1040Sstevel@tonic-gate 	sc_shared_t	*ssp;
1050Sstevel@tonic-gate 	uintptr_t	uaddr;
1060Sstevel@tonic-gate 	int		error;
1070Sstevel@tonic-gate 
1080Sstevel@tonic-gate 	if (t->t_schedctl == NULL) {
1090Sstevel@tonic-gate 		/*
1100Sstevel@tonic-gate 		 * Allocate and initialize the shared structure.
1110Sstevel@tonic-gate 		 */
1120Sstevel@tonic-gate 		if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0)
1130Sstevel@tonic-gate 			return ((caddr_t)(uintptr_t)set_errno(error));
1140Sstevel@tonic-gate 		bzero(ssp, sizeof (*ssp));
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 		installctx(t, ssp, schedctl_save, schedctl_restore,
1170Sstevel@tonic-gate 		    schedctl_fork, NULL, NULL, NULL);
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 		thread_lock(t);	/* protect against ts_tick and ts_update */
1200Sstevel@tonic-gate 		t->t_schedctl = ssp;
1210Sstevel@tonic-gate 		t->t_sc_uaddr = uaddr;
1220Sstevel@tonic-gate 		thread_unlock(t);
1230Sstevel@tonic-gate 	}
1240Sstevel@tonic-gate 
1250Sstevel@tonic-gate 	return ((caddr_t)t->t_sc_uaddr);
1260Sstevel@tonic-gate }
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate /*
1300Sstevel@tonic-gate  * Clean up scheduler activations state associated with an exiting
1310Sstevel@tonic-gate  * (or execing) lwp.  t is always the current thread.
1320Sstevel@tonic-gate  */
1330Sstevel@tonic-gate void
1340Sstevel@tonic-gate schedctl_lwp_cleanup(kthread_t *t)
1350Sstevel@tonic-gate {
1360Sstevel@tonic-gate 	sc_shared_t	*ssp = t->t_schedctl;
1370Sstevel@tonic-gate 	proc_t		*p = ttoproc(t);
1380Sstevel@tonic-gate 	sc_page_ctl_t	*pagep;
1390Sstevel@tonic-gate 	index_t		index;
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 	thread_lock(t);		/* protect against ts_tick and ts_update */
1440Sstevel@tonic-gate 	t->t_schedctl = NULL;
1450Sstevel@tonic-gate 	t->t_sc_uaddr = 0;
1460Sstevel@tonic-gate 	thread_unlock(t);
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate 	/*
1490Sstevel@tonic-gate 	 * Remove the context op to avoid the final call to
1500Sstevel@tonic-gate 	 * schedctl_save when switching away from this lwp.
1510Sstevel@tonic-gate 	 */
1520Sstevel@tonic-gate 	(void) removectx(t, ssp, schedctl_save, schedctl_restore,
1530Sstevel@tonic-gate 	    schedctl_fork, NULL, NULL, NULL);
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate 	/*
1560Sstevel@tonic-gate 	 * Do not unmap the shared page until the process exits.
1570Sstevel@tonic-gate 	 * User-level library code relies on this for adaptive mutex locking.
1580Sstevel@tonic-gate 	 */
1590Sstevel@tonic-gate 	mutex_enter(&p->p_sc_lock);
1600Sstevel@tonic-gate 	ssp->sc_state = SC_FREE;
1610Sstevel@tonic-gate 	pagep = schedctl_page_lookup(ssp);
1620Sstevel@tonic-gate 	index = (index_t)(ssp - pagep->spc_base);
1630Sstevel@tonic-gate 	BT_CLEAR(pagep->spc_map, index);
1640Sstevel@tonic-gate 	pagep->spc_space += sizeof (sc_shared_t);
1650Sstevel@tonic-gate 	mutex_exit(&p->p_sc_lock);
1660Sstevel@tonic-gate }
1670Sstevel@tonic-gate 
1685891Sraf 
1690Sstevel@tonic-gate /*
1700Sstevel@tonic-gate  * Cleanup the list of schedctl shared pages for the process.
1710Sstevel@tonic-gate  * Called from exec() and exit() system calls.
1720Sstevel@tonic-gate  */
1730Sstevel@tonic-gate void
1745891Sraf schedctl_proc_cleanup(void)
1750Sstevel@tonic-gate {
1760Sstevel@tonic-gate 	proc_t *p = curproc;
1770Sstevel@tonic-gate 	sc_page_ctl_t *pagep;
1780Sstevel@tonic-gate 	sc_page_ctl_t *next;
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 	ASSERT(p->p_lwpcnt == 1);	/* we are single-threaded now */
1810Sstevel@tonic-gate 	ASSERT(curthread->t_schedctl == NULL);
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 	/*
1840Sstevel@tonic-gate 	 * Since we are single-threaded, we don't have to hold p->p_sc_lock.
1850Sstevel@tonic-gate 	 */
1860Sstevel@tonic-gate 	pagep = p->p_pagep;
1870Sstevel@tonic-gate 	p->p_pagep = NULL;
1880Sstevel@tonic-gate 	while (pagep != NULL) {
1890Sstevel@tonic-gate 		ASSERT(pagep->spc_space == sc_pagesize);
1900Sstevel@tonic-gate 		next = pagep->spc_next;
1910Sstevel@tonic-gate 		/*
1920Sstevel@tonic-gate 		 * Unmap the user space and free the mapping structure.
1930Sstevel@tonic-gate 		 */
1940Sstevel@tonic-gate 		(void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE);
1950Sstevel@tonic-gate 		schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base));
1960Sstevel@tonic-gate 		kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words);
1970Sstevel@tonic-gate 		kmem_free(pagep, sizeof (sc_page_ctl_t));
1980Sstevel@tonic-gate 		pagep = next;
1990Sstevel@tonic-gate 	}
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate 
2025891Sraf 
2030Sstevel@tonic-gate /*
2040Sstevel@tonic-gate  * Called by resume just before switching away from the current thread.
2050Sstevel@tonic-gate  * Save new thread state.
2060Sstevel@tonic-gate  */
2070Sstevel@tonic-gate void
2080Sstevel@tonic-gate schedctl_save(sc_shared_t *ssp)
2090Sstevel@tonic-gate {
2100Sstevel@tonic-gate 	ssp->sc_state = curthread->t_state;
2110Sstevel@tonic-gate }
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate /*
2150Sstevel@tonic-gate  * Called by resume after switching to the current thread.
2160Sstevel@tonic-gate  * Save new thread state and CPU.
2170Sstevel@tonic-gate  */
2180Sstevel@tonic-gate void
2190Sstevel@tonic-gate schedctl_restore(sc_shared_t *ssp)
2200Sstevel@tonic-gate {
2210Sstevel@tonic-gate 	ssp->sc_state = SC_ONPROC;
2220Sstevel@tonic-gate 	ssp->sc_cpu = CPU->cpu_id;
2230Sstevel@tonic-gate }
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate /*
2270Sstevel@tonic-gate  * On fork, remove inherited mappings from the child's address space.
2280Sstevel@tonic-gate  * The child's threads must call schedctl() to get new shared mappings.
2290Sstevel@tonic-gate  */
2300Sstevel@tonic-gate void
2310Sstevel@tonic-gate schedctl_fork(kthread_t *pt, kthread_t *ct)
2320Sstevel@tonic-gate {
2330Sstevel@tonic-gate 	proc_t *pp = ttoproc(pt);
2340Sstevel@tonic-gate 	proc_t *cp = ttoproc(ct);
2350Sstevel@tonic-gate 	sc_page_ctl_t *pagep;
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	ASSERT(ct->t_schedctl == NULL);
2380Sstevel@tonic-gate 
2390Sstevel@tonic-gate 	/*
2400Sstevel@tonic-gate 	 * Do this only once, whether we are doing fork1() or forkall().
2410Sstevel@tonic-gate 	 * Don't do it at all if the child process is a child of vfork()
2420Sstevel@tonic-gate 	 * because a child of vfork() borrows the parent's address space.
2430Sstevel@tonic-gate 	 */
2440Sstevel@tonic-gate 	if (pt != curthread || (cp->p_flag & SVFORK))
2450Sstevel@tonic-gate 		return;
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	mutex_enter(&pp->p_sc_lock);
2480Sstevel@tonic-gate 	for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next)
2490Sstevel@tonic-gate 		(void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE);
2500Sstevel@tonic-gate 	mutex_exit(&pp->p_sc_lock);
2510Sstevel@tonic-gate }
2520Sstevel@tonic-gate 
2535891Sraf 
2540Sstevel@tonic-gate /*
2550Sstevel@tonic-gate  * Returns non-zero if the specified thread shouldn't be preempted at this time.
2560Sstevel@tonic-gate  * Called by ts_preempt, ts_tick, and ts_update.
2570Sstevel@tonic-gate  */
2580Sstevel@tonic-gate int
2590Sstevel@tonic-gate schedctl_get_nopreempt(kthread_t *t)
2600Sstevel@tonic-gate {
2610Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
2620Sstevel@tonic-gate 	return (t->t_schedctl->sc_preemptctl.sc_nopreempt);
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate /*
2670Sstevel@tonic-gate  * Sets the value of the nopreempt field for the specified thread.
2680Sstevel@tonic-gate  * Called by ts_preempt to clear the field on preemption.
2690Sstevel@tonic-gate  */
2700Sstevel@tonic-gate void
2710Sstevel@tonic-gate schedctl_set_nopreempt(kthread_t *t, short val)
2720Sstevel@tonic-gate {
2730Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
2740Sstevel@tonic-gate 	t->t_schedctl->sc_preemptctl.sc_nopreempt = val;
2750Sstevel@tonic-gate }
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate /*
2790Sstevel@tonic-gate  * Sets the value of the yield field for the specified thread.  Called by
2800Sstevel@tonic-gate  * ts_preempt and ts_tick to set the field, and ts_yield to clear it.
2810Sstevel@tonic-gate  * The kernel never looks at this field so we don't need a schedctl_get_yield
2820Sstevel@tonic-gate  * function.
2830Sstevel@tonic-gate  */
2840Sstevel@tonic-gate void
2850Sstevel@tonic-gate schedctl_set_yield(kthread_t *t, short val)
2860Sstevel@tonic-gate {
2870Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
2880Sstevel@tonic-gate 	t->t_schedctl->sc_preemptctl.sc_yield = val;
2890Sstevel@tonic-gate }
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate /*
2930Sstevel@tonic-gate  * Returns non-zero if the specified thread has requested that all
2940Sstevel@tonic-gate  * signals be blocked.  Called by signal-related code that tests
2950Sstevel@tonic-gate  * the signal mask of a thread that may not be the current thread
2960Sstevel@tonic-gate  * and where the process's p_lock cannot be acquired.
2970Sstevel@tonic-gate  */
2980Sstevel@tonic-gate int
2990Sstevel@tonic-gate schedctl_sigblock(kthread_t *t)
3000Sstevel@tonic-gate {
3010Sstevel@tonic-gate 	sc_shared_t *tdp = t->t_schedctl;
3020Sstevel@tonic-gate 
3034389Ssl108498 	if (tdp != NULL)
3040Sstevel@tonic-gate 		return (tdp->sc_sigblock);
3050Sstevel@tonic-gate 	return (0);
3060Sstevel@tonic-gate }
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate /*
3100Sstevel@tonic-gate  * If the sc_sigblock field is set for the specified thread, set
3110Sstevel@tonic-gate  * its signal mask to block all maskable signals, then clear the
3120Sstevel@tonic-gate  * sc_sigblock field.  This finishes what user-level code requested
3130Sstevel@tonic-gate  * to be done when it set tdp->sc_shared->sc_sigblock non-zero.
3140Sstevel@tonic-gate  * Called by signal-related code that holds the process's p_lock.
3150Sstevel@tonic-gate  */
3160Sstevel@tonic-gate void
3170Sstevel@tonic-gate schedctl_finish_sigblock(kthread_t *t)
3180Sstevel@tonic-gate {
3190Sstevel@tonic-gate 	sc_shared_t *tdp = t->t_schedctl;
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3220Sstevel@tonic-gate 
3234389Ssl108498 	if (tdp != NULL && tdp->sc_sigblock) {
3240Sstevel@tonic-gate 		t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0;
3250Sstevel@tonic-gate 		t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1;
3260Sstevel@tonic-gate 		tdp->sc_sigblock = 0;
3270Sstevel@tonic-gate 	}
3280Sstevel@tonic-gate }
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate /*
3325891Sraf  * Return non-zero if the current thread has declared that it has
3335891Sraf  * a cancellation pending and that cancellation is not disabled.
3345891Sraf  * If SIGCANCEL is blocked, we must be going over the wire in an
3355891Sraf  * NFS transaction (sigintr() was called); return zero in this case.
3365891Sraf  */
3375891Sraf int
3385891Sraf schedctl_cancel_pending(void)
3395891Sraf {
3405891Sraf 	sc_shared_t *tdp = curthread->t_schedctl;
3415891Sraf 
3425891Sraf 	if (tdp != NULL &&
3435891Sraf 	    (tdp->sc_flgs & SC_CANCEL_FLG) &&
3445891Sraf 	    !tdp->sc_sigblock &&
3455891Sraf 	    !sigismember(&curthread->t_hold, SIGCANCEL))
3465891Sraf 		return (1);
3475891Sraf 	return (0);
3485891Sraf }
3495891Sraf 
3505891Sraf 
3515891Sraf /*
3525891Sraf  * Inform libc that the kernel returned EINTR from some system call
3535891Sraf  * due to there being a cancellation pending (SC_CANCEL_FLG set or
3545891Sraf  * we received an SI_LWP SIGCANCEL while in a system call), rather
3555891Sraf  * than because of some other signal.  User-level code can try to
3565891Sraf  * recover from receiving other signals, but it can't recover from
3575891Sraf  * being cancelled.
3585891Sraf  */
3595891Sraf void
3605891Sraf schedctl_cancel_eintr(void)
3615891Sraf {
3625891Sraf 	sc_shared_t *tdp = curthread->t_schedctl;
3635891Sraf 
3645891Sraf 	if (tdp != NULL)
3655891Sraf 		tdp->sc_flgs |= SC_EINTR_FLG;
3665891Sraf }
3675891Sraf 
3685891Sraf 
3695891Sraf /*
3700Sstevel@tonic-gate  * Return non-zero if the current thread has declared that
3710Sstevel@tonic-gate  * it is calling into the kernel to park, else return zero.
3720Sstevel@tonic-gate  */
3730Sstevel@tonic-gate int
3745891Sraf schedctl_is_park(void)
3750Sstevel@tonic-gate {
3760Sstevel@tonic-gate 	sc_shared_t *tdp = curthread->t_schedctl;
3770Sstevel@tonic-gate 
3784389Ssl108498 	if (tdp != NULL)
3795891Sraf 		return ((tdp->sc_flgs & SC_PARK_FLG) != 0);
3800Sstevel@tonic-gate 	/*
3810Sstevel@tonic-gate 	 * If we're here and there is no shared memory (how could
3820Sstevel@tonic-gate 	 * that happen?) then just assume we really are here to park.
3830Sstevel@tonic-gate 	 */
3840Sstevel@tonic-gate 	return (1);
3850Sstevel@tonic-gate }
3860Sstevel@tonic-gate 
3875891Sraf 
3884389Ssl108498 /*
3894389Ssl108498  * Declare thread is parking.
3904389Ssl108498  *
3915891Sraf  * libc will set "sc_flgs |= SC_PARK_FLG" before calling lwpsys_park(0, tid)
3925891Sraf  * in order to declare that the thread is calling into the kernel to park.
3934389Ssl108498  *
3944389Ssl108498  * This interface exists ONLY to support older versions of libthread which
3955891Sraf  * are not aware of the SC_PARK_FLG flag.
3964389Ssl108498  *
3975891Sraf  * Older versions of libthread which are not aware of the SC_PARK_FLG flag
3985891Sraf  * need to be modified or emulated to call lwpsys_park(4, ...) instead of
3994389Ssl108498  * lwpsys_park(0, ...).  This will invoke schedctl_set_park() before
4004389Ssl108498  * lwp_park() to declare that the thread is parking.
4014389Ssl108498  */
4024389Ssl108498 void
4035891Sraf schedctl_set_park(void)
4045891Sraf {
4055891Sraf 	sc_shared_t *tdp = curthread->t_schedctl;
4065891Sraf 	if (tdp != NULL)
4075891Sraf 		tdp->sc_flgs |= SC_PARK_FLG;
4085891Sraf }
4095891Sraf 
4105891Sraf 
4115891Sraf /*
4125891Sraf  * Clear the parking flag on return from parking in the kernel.
4135891Sraf  */
4145891Sraf void
4155891Sraf schedctl_unpark(void)
4164389Ssl108498 {
4174389Ssl108498 	sc_shared_t *tdp = curthread->t_schedctl;
4184389Ssl108498 
4194389Ssl108498 	if (tdp != NULL)
4205891Sraf 		tdp->sc_flgs &= ~SC_PARK_FLG;
4210Sstevel@tonic-gate }
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 
4240Sstevel@tonic-gate /*
4250Sstevel@tonic-gate  * Page handling code.
4260Sstevel@tonic-gate  */
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate void
4295891Sraf schedctl_init(void)
4300Sstevel@tonic-gate {
4310Sstevel@tonic-gate 	/*
4320Sstevel@tonic-gate 	 * Amount of page that can hold sc_shared_t structures.  If
4330Sstevel@tonic-gate 	 * sizeof (sc_shared_t) is a power of 2, this should just be
4340Sstevel@tonic-gate 	 * PAGESIZE.
4350Sstevel@tonic-gate 	 */
4360Sstevel@tonic-gate 	sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t));
4370Sstevel@tonic-gate 
4380Sstevel@tonic-gate 	/*
4390Sstevel@tonic-gate 	 * Allocation bitmap is one bit per struct on a page.
4400Sstevel@tonic-gate 	 */
4410Sstevel@tonic-gate 	sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t);
4420Sstevel@tonic-gate 	sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL);
4430Sstevel@tonic-gate }
4440Sstevel@tonic-gate 
4455891Sraf 
4460Sstevel@tonic-gate int
4470Sstevel@tonic-gate schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp)
4480Sstevel@tonic-gate {
4490Sstevel@tonic-gate 	proc_t		*p = curproc;
4500Sstevel@tonic-gate 	sc_page_ctl_t	*pagep;
4510Sstevel@tonic-gate 	sc_shared_t	*ssp;
4520Sstevel@tonic-gate 	caddr_t		base;
4530Sstevel@tonic-gate 	index_t		index;
4540Sstevel@tonic-gate 	int		error;
4550Sstevel@tonic-gate 
4560Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
4570Sstevel@tonic-gate 	mutex_enter(&p->p_sc_lock);
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	/*
4600Sstevel@tonic-gate 	 * Try to find space for the new data in existing pages
4610Sstevel@tonic-gate 	 * within the process's list of shared pages.
4620Sstevel@tonic-gate 	 */
4630Sstevel@tonic-gate 	for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next)
4640Sstevel@tonic-gate 		if (pagep->spc_space != 0)
4650Sstevel@tonic-gate 			break;
4660Sstevel@tonic-gate 
4670Sstevel@tonic-gate 	if (pagep != NULL)
4680Sstevel@tonic-gate 		base = pagep->spc_uaddr;
4690Sstevel@tonic-gate 	else {
4700Sstevel@tonic-gate 		struct anon_map *amp;
4710Sstevel@tonic-gate 		caddr_t kaddr;
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 		/*
4740Sstevel@tonic-gate 		 * No room, need to allocate a new page.  Also set up
4750Sstevel@tonic-gate 		 * a mapping to the kernel address space for the new
4760Sstevel@tonic-gate 		 * page and lock it in memory.
4770Sstevel@tonic-gate 		 */
4780Sstevel@tonic-gate 		if ((error = schedctl_getpage(&amp, &kaddr)) != 0) {
4790Sstevel@tonic-gate 			mutex_exit(&p->p_sc_lock);
4800Sstevel@tonic-gate 			return (error);
4810Sstevel@tonic-gate 		}
4820Sstevel@tonic-gate 		if ((error = schedctl_map(amp, &base, kaddr)) != 0) {
4830Sstevel@tonic-gate 			schedctl_freepage(amp, kaddr);
4840Sstevel@tonic-gate 			mutex_exit(&p->p_sc_lock);
4850Sstevel@tonic-gate 			return (error);
4860Sstevel@tonic-gate 		}
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 		/*
4890Sstevel@tonic-gate 		 * Allocate and initialize the page control structure.
4900Sstevel@tonic-gate 		 */
4910Sstevel@tonic-gate 		pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP);
4920Sstevel@tonic-gate 		pagep->spc_amp = amp;
4930Sstevel@tonic-gate 		pagep->spc_base = (sc_shared_t *)kaddr;
4940Sstevel@tonic-gate 		pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize);
4950Sstevel@tonic-gate 		pagep->spc_uaddr = base;
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 		pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words,
4980Sstevel@tonic-gate 		    KM_SLEEP);
4990Sstevel@tonic-gate 		pagep->spc_space = sc_pagesize;
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 		pagep->spc_next = p->p_pagep;
5020Sstevel@tonic-gate 		p->p_pagep = pagep;
5030Sstevel@tonic-gate 	}
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	/*
5060Sstevel@tonic-gate 	 * Got a page, now allocate space for the data.  There should
5070Sstevel@tonic-gate 	 * be space unless something's wrong.
5080Sstevel@tonic-gate 	 */
5090Sstevel@tonic-gate 	ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t));
5100Sstevel@tonic-gate 	index = bt_availbit(pagep->spc_map, sc_bitmap_len);
5110Sstevel@tonic-gate 	ASSERT(index != -1);
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate 	/*
5140Sstevel@tonic-gate 	 * Get location with pointer arithmetic.  spc_base is of type
5150Sstevel@tonic-gate 	 * sc_shared_t *.  Mark as allocated.
5160Sstevel@tonic-gate 	 */
5170Sstevel@tonic-gate 	ssp = pagep->spc_base + index;
5180Sstevel@tonic-gate 	BT_SET(pagep->spc_map, index);
5190Sstevel@tonic-gate 	pagep->spc_space -= sizeof (sc_shared_t);
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate 	mutex_exit(&p->p_sc_lock);
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 	/*
5240Sstevel@tonic-gate 	 * Return kernel and user addresses.
5250Sstevel@tonic-gate 	 */
5260Sstevel@tonic-gate 	*kaddrp = ssp;
5270Sstevel@tonic-gate 	*uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET);
5280Sstevel@tonic-gate 	return (0);
5290Sstevel@tonic-gate }
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate /*
5330Sstevel@tonic-gate  * Find the page control structure corresponding to a kernel address.
5340Sstevel@tonic-gate  */
5350Sstevel@tonic-gate static sc_page_ctl_t *
5360Sstevel@tonic-gate schedctl_page_lookup(sc_shared_t *ssp)
5370Sstevel@tonic-gate {
5380Sstevel@tonic-gate 	proc_t *p = curproc;
5390Sstevel@tonic-gate 	sc_page_ctl_t *pagep;
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_sc_lock));
5420Sstevel@tonic-gate 	for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) {
5430Sstevel@tonic-gate 		if (ssp >= pagep->spc_base && ssp < pagep->spc_end)
5440Sstevel@tonic-gate 			return (pagep);
5450Sstevel@tonic-gate 	}
5460Sstevel@tonic-gate 	return (NULL);		/* This "can't happen".  Should we panic? */
5470Sstevel@tonic-gate }
5480Sstevel@tonic-gate 
5490Sstevel@tonic-gate 
5500Sstevel@tonic-gate /*
5510Sstevel@tonic-gate  * This function is called when a page needs to be mapped into a
5520Sstevel@tonic-gate  * process's address space.  Allocate the user address space and
5530Sstevel@tonic-gate  * set up the mapping to the page.  Assumes the page has already
5540Sstevel@tonic-gate  * been allocated and locked in memory via schedctl_getpage.
5550Sstevel@tonic-gate  */
5560Sstevel@tonic-gate static int
5570Sstevel@tonic-gate schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr)
5580Sstevel@tonic-gate {
559*6036Smec 	caddr_t addr = NULL;
5600Sstevel@tonic-gate 	struct as *as = curproc->p_as;
5610Sstevel@tonic-gate 	struct segvn_crargs vn_a;
5620Sstevel@tonic-gate 	int error;
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate 	as_rangelock(as);
5650Sstevel@tonic-gate 	/* pass address of kernel mapping as offset to avoid VAC conflicts */
5660Sstevel@tonic-gate 	map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0);
5670Sstevel@tonic-gate 	if (addr == NULL) {
5680Sstevel@tonic-gate 		as_rangeunlock(as);
5690Sstevel@tonic-gate 		return (ENOMEM);
5700Sstevel@tonic-gate 	}
5710Sstevel@tonic-gate 
5720Sstevel@tonic-gate 	/*
5730Sstevel@tonic-gate 	 * Use segvn to set up the mapping to the page.
5740Sstevel@tonic-gate 	 */
5750Sstevel@tonic-gate 	vn_a.vp = NULL;
5760Sstevel@tonic-gate 	vn_a.offset = 0;
5770Sstevel@tonic-gate 	vn_a.cred = NULL;
5780Sstevel@tonic-gate 	vn_a.type = MAP_SHARED;
5790Sstevel@tonic-gate 	vn_a.prot = vn_a.maxprot = PROT_ALL;
5800Sstevel@tonic-gate 	vn_a.flags = 0;
5810Sstevel@tonic-gate 	vn_a.amp = amp;
5820Sstevel@tonic-gate 	vn_a.szc = 0;
5830Sstevel@tonic-gate 	vn_a.lgrp_mem_policy_flags = 0;
5840Sstevel@tonic-gate 	error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a);
5850Sstevel@tonic-gate 	as_rangeunlock(as);
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate 	if (error)
5880Sstevel@tonic-gate 		return (error);
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate 	*uaddrp = addr;
5910Sstevel@tonic-gate 	return (0);
5920Sstevel@tonic-gate }
5930Sstevel@tonic-gate 
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate /*
5960Sstevel@tonic-gate  * Allocate a new page from anonymous memory.  Also, create a kernel
5970Sstevel@tonic-gate  * mapping to the page and lock the page in memory.
5980Sstevel@tonic-gate  */
5990Sstevel@tonic-gate static int
6000Sstevel@tonic-gate schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr)
6010Sstevel@tonic-gate {
6020Sstevel@tonic-gate 	struct anon_map *amp;
6030Sstevel@tonic-gate 	caddr_t kaddr;
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 	/*
6060Sstevel@tonic-gate 	 * Set up anonymous memory struct.  No swap reservation is
6070Sstevel@tonic-gate 	 * needed since the page will be locked into memory.
6080Sstevel@tonic-gate 	 */
6094426Saguzovsk 	amp = anonmap_alloc(PAGESIZE, 0, ANON_SLEEP);
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	/*
6120Sstevel@tonic-gate 	 * Allocate the page.
6130Sstevel@tonic-gate 	 */
6143247Sgjelinek 	kaddr = segkp_get_withanonmap(segkp, PAGESIZE,
6153247Sgjelinek 	    KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp);
6160Sstevel@tonic-gate 	if (kaddr == NULL) {
6170Sstevel@tonic-gate 		amp->refcnt--;
6180Sstevel@tonic-gate 		anonmap_free(amp);
6190Sstevel@tonic-gate 		return (ENOMEM);
6200Sstevel@tonic-gate 	}
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	/*
6230Sstevel@tonic-gate 	 * The page is left SE_SHARED locked so that it won't be
6240Sstevel@tonic-gate 	 * paged out or relocated (KPD_LOCKED above).
6250Sstevel@tonic-gate 	 */
6260Sstevel@tonic-gate 
6270Sstevel@tonic-gate 	*newamp = amp;
6280Sstevel@tonic-gate 	*newaddr = kaddr;
6290Sstevel@tonic-gate 	return (0);
6300Sstevel@tonic-gate }
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate /*
6340Sstevel@tonic-gate  * Take the necessary steps to allow a page to be released.
6350Sstevel@tonic-gate  * This is called when the process is doing exit() or exec().
6360Sstevel@tonic-gate  * There should be no accesses to the page after this.
6370Sstevel@tonic-gate  * The kernel mapping of the page is released and the page is unlocked.
6380Sstevel@tonic-gate  */
6390Sstevel@tonic-gate static void
6400Sstevel@tonic-gate schedctl_freepage(struct anon_map *amp, caddr_t kaddr)
6410Sstevel@tonic-gate {
6420Sstevel@tonic-gate 	/*
6430Sstevel@tonic-gate 	 * Release the lock on the page and remove the kernel mapping.
6440Sstevel@tonic-gate 	 */
6450Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
6460Sstevel@tonic-gate 	segkp_release(segkp, kaddr);
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	/*
6490Sstevel@tonic-gate 	 * Decrement the refcnt so the anon_map structure will be freed.
6500Sstevel@tonic-gate 	 */
6510Sstevel@tonic-gate 	if (--amp->refcnt == 0) {
6520Sstevel@tonic-gate 		/*
6530Sstevel@tonic-gate 		 * The current process no longer has the page mapped, so
6540Sstevel@tonic-gate 		 * we have to free everything rather than letting as_free
6550Sstevel@tonic-gate 		 * do the work.
6560Sstevel@tonic-gate 		 */
6570Sstevel@tonic-gate 		anon_free(amp->ahp, 0, PAGESIZE);
6580Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
6590Sstevel@tonic-gate 		anonmap_free(amp);
6600Sstevel@tonic-gate 	} else {
6610Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
6620Sstevel@tonic-gate 	}
6630Sstevel@tonic-gate }
664