10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
53247Sgjelinek * Common Development and Distribution License (the "License").
63247Sgjelinek * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
215891Sraf
220Sstevel@tonic-gate /*
23*11913SRoger.Faulkner@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/systm.h>
290Sstevel@tonic-gate #include <sys/schedctl.h>
300Sstevel@tonic-gate #include <sys/proc.h>
310Sstevel@tonic-gate #include <sys/thread.h>
320Sstevel@tonic-gate #include <sys/class.h>
330Sstevel@tonic-gate #include <sys/cred.h>
340Sstevel@tonic-gate #include <sys/kmem.h>
350Sstevel@tonic-gate #include <sys/cmn_err.h>
360Sstevel@tonic-gate #include <sys/stack.h>
370Sstevel@tonic-gate #include <sys/debug.h>
380Sstevel@tonic-gate #include <sys/cpuvar.h>
390Sstevel@tonic-gate #include <sys/sobject.h>
400Sstevel@tonic-gate #include <sys/door.h>
410Sstevel@tonic-gate #include <sys/modctl.h>
420Sstevel@tonic-gate #include <sys/syscall.h>
430Sstevel@tonic-gate #include <sys/sysmacros.h>
440Sstevel@tonic-gate #include <sys/vmsystm.h>
450Sstevel@tonic-gate #include <sys/mman.h>
460Sstevel@tonic-gate #include <sys/vnode.h>
470Sstevel@tonic-gate #include <sys/swap.h>
480Sstevel@tonic-gate #include <sys/lwp.h>
490Sstevel@tonic-gate #include <sys/bitmap.h>
500Sstevel@tonic-gate #include <sys/atomic.h>
510Sstevel@tonic-gate #include <sys/fcntl.h>
520Sstevel@tonic-gate #include <vm/seg_kp.h>
530Sstevel@tonic-gate #include <vm/seg_vn.h>
540Sstevel@tonic-gate #include <vm/as.h>
550Sstevel@tonic-gate #include <fs/fs_subr.h>
560Sstevel@tonic-gate
570Sstevel@tonic-gate /*
580Sstevel@tonic-gate * Page handling structures. This is set up as a list of per-page
590Sstevel@tonic-gate * control structures (sc_page_ctl), with p->p_pagep pointing to
600Sstevel@tonic-gate * the first. The per-page structures point to the actual pages
610Sstevel@tonic-gate * and contain pointers to the user address for each mapped page.
620Sstevel@tonic-gate *
630Sstevel@tonic-gate * All data is protected by p->p_sc_lock. Since this lock is
640Sstevel@tonic-gate * held while waiting for memory, schedctl_shared_alloc() should
650Sstevel@tonic-gate * not be called while holding p_lock.
660Sstevel@tonic-gate */
670Sstevel@tonic-gate
680Sstevel@tonic-gate typedef struct sc_page_ctl {
690Sstevel@tonic-gate struct sc_page_ctl *spc_next;
700Sstevel@tonic-gate sc_shared_t *spc_base; /* base of kernel page */
710Sstevel@tonic-gate sc_shared_t *spc_end; /* end of usable space */
720Sstevel@tonic-gate ulong_t *spc_map; /* bitmap of allocated space on page */
730Sstevel@tonic-gate size_t spc_space; /* amount of space on page */
740Sstevel@tonic-gate caddr_t spc_uaddr; /* user-level address of the page */
750Sstevel@tonic-gate struct anon_map *spc_amp; /* anonymous memory structure */
760Sstevel@tonic-gate } sc_page_ctl_t;
770Sstevel@tonic-gate
780Sstevel@tonic-gate static size_t sc_pagesize; /* size of usable space on page */
790Sstevel@tonic-gate static size_t sc_bitmap_len; /* # of bits in allocation bitmap */
800Sstevel@tonic-gate static size_t sc_bitmap_words; /* # of words in allocation bitmap */
810Sstevel@tonic-gate
820Sstevel@tonic-gate /* Context ops */
830Sstevel@tonic-gate static void schedctl_save(sc_shared_t *);
840Sstevel@tonic-gate static void schedctl_restore(sc_shared_t *);
850Sstevel@tonic-gate static void schedctl_fork(kthread_t *, kthread_t *);
860Sstevel@tonic-gate
870Sstevel@tonic-gate /* Functions for handling shared pages */
880Sstevel@tonic-gate static int schedctl_shared_alloc(sc_shared_t **, uintptr_t *);
890Sstevel@tonic-gate static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *);
900Sstevel@tonic-gate static int schedctl_map(struct anon_map *, caddr_t *, caddr_t);
910Sstevel@tonic-gate static int schedctl_getpage(struct anon_map **, caddr_t *);
920Sstevel@tonic-gate static void schedctl_freepage(struct anon_map *, caddr_t);
930Sstevel@tonic-gate
940Sstevel@tonic-gate /*
950Sstevel@tonic-gate * System call interface to scheduler activations.
960Sstevel@tonic-gate * This always operates on the current lwp.
970Sstevel@tonic-gate */
980Sstevel@tonic-gate caddr_t
schedctl(void)990Sstevel@tonic-gate schedctl(void)
1000Sstevel@tonic-gate {
1010Sstevel@tonic-gate kthread_t *t = curthread;
1020Sstevel@tonic-gate sc_shared_t *ssp;
1030Sstevel@tonic-gate uintptr_t uaddr;
1040Sstevel@tonic-gate int error;
1050Sstevel@tonic-gate
1060Sstevel@tonic-gate if (t->t_schedctl == NULL) {
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate * Allocate and initialize the shared structure.
1090Sstevel@tonic-gate */
1100Sstevel@tonic-gate if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0)
1110Sstevel@tonic-gate return ((caddr_t)(uintptr_t)set_errno(error));
1120Sstevel@tonic-gate bzero(ssp, sizeof (*ssp));
1130Sstevel@tonic-gate
1140Sstevel@tonic-gate installctx(t, ssp, schedctl_save, schedctl_restore,
1150Sstevel@tonic-gate schedctl_fork, NULL, NULL, NULL);
1160Sstevel@tonic-gate
1170Sstevel@tonic-gate thread_lock(t); /* protect against ts_tick and ts_update */
1180Sstevel@tonic-gate t->t_schedctl = ssp;
1190Sstevel@tonic-gate t->t_sc_uaddr = uaddr;
1206247Sraf ssp->sc_cid = t->t_cid;
1216247Sraf ssp->sc_cpri = t->t_cpri;
1226247Sraf ssp->sc_priority = DISP_PRIO(t);
1230Sstevel@tonic-gate thread_unlock(t);
1240Sstevel@tonic-gate }
1250Sstevel@tonic-gate
1260Sstevel@tonic-gate return ((caddr_t)t->t_sc_uaddr);
1270Sstevel@tonic-gate }
1280Sstevel@tonic-gate
1290Sstevel@tonic-gate
1300Sstevel@tonic-gate /*
1310Sstevel@tonic-gate * Clean up scheduler activations state associated with an exiting
1320Sstevel@tonic-gate * (or execing) lwp. t is always the current thread.
1330Sstevel@tonic-gate */
1340Sstevel@tonic-gate void
schedctl_lwp_cleanup(kthread_t * t)1350Sstevel@tonic-gate schedctl_lwp_cleanup(kthread_t *t)
1360Sstevel@tonic-gate {
1370Sstevel@tonic-gate sc_shared_t *ssp = t->t_schedctl;
1380Sstevel@tonic-gate proc_t *p = ttoproc(t);
1390Sstevel@tonic-gate sc_page_ctl_t *pagep;
1400Sstevel@tonic-gate index_t index;
1410Sstevel@tonic-gate
1420Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&p->p_lock));
1430Sstevel@tonic-gate
1440Sstevel@tonic-gate thread_lock(t); /* protect against ts_tick and ts_update */
1450Sstevel@tonic-gate t->t_schedctl = NULL;
1460Sstevel@tonic-gate t->t_sc_uaddr = 0;
1470Sstevel@tonic-gate thread_unlock(t);
1480Sstevel@tonic-gate
1490Sstevel@tonic-gate /*
1500Sstevel@tonic-gate * Remove the context op to avoid the final call to
1510Sstevel@tonic-gate * schedctl_save when switching away from this lwp.
1520Sstevel@tonic-gate */
1530Sstevel@tonic-gate (void) removectx(t, ssp, schedctl_save, schedctl_restore,
1540Sstevel@tonic-gate schedctl_fork, NULL, NULL, NULL);
1550Sstevel@tonic-gate
1560Sstevel@tonic-gate /*
1570Sstevel@tonic-gate * Do not unmap the shared page until the process exits.
1580Sstevel@tonic-gate * User-level library code relies on this for adaptive mutex locking.
1590Sstevel@tonic-gate */
1600Sstevel@tonic-gate mutex_enter(&p->p_sc_lock);
1610Sstevel@tonic-gate ssp->sc_state = SC_FREE;
1620Sstevel@tonic-gate pagep = schedctl_page_lookup(ssp);
1630Sstevel@tonic-gate index = (index_t)(ssp - pagep->spc_base);
1640Sstevel@tonic-gate BT_CLEAR(pagep->spc_map, index);
1650Sstevel@tonic-gate pagep->spc_space += sizeof (sc_shared_t);
1660Sstevel@tonic-gate mutex_exit(&p->p_sc_lock);
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate
1695891Sraf
1700Sstevel@tonic-gate /*
1710Sstevel@tonic-gate * Cleanup the list of schedctl shared pages for the process.
1720Sstevel@tonic-gate * Called from exec() and exit() system calls.
1730Sstevel@tonic-gate */
1740Sstevel@tonic-gate void
schedctl_proc_cleanup(void)1755891Sraf schedctl_proc_cleanup(void)
1760Sstevel@tonic-gate {
1770Sstevel@tonic-gate proc_t *p = curproc;
1780Sstevel@tonic-gate sc_page_ctl_t *pagep;
1790Sstevel@tonic-gate sc_page_ctl_t *next;
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate ASSERT(p->p_lwpcnt == 1); /* we are single-threaded now */
1820Sstevel@tonic-gate ASSERT(curthread->t_schedctl == NULL);
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate /*
1850Sstevel@tonic-gate * Since we are single-threaded, we don't have to hold p->p_sc_lock.
1860Sstevel@tonic-gate */
1870Sstevel@tonic-gate pagep = p->p_pagep;
1880Sstevel@tonic-gate p->p_pagep = NULL;
1890Sstevel@tonic-gate while (pagep != NULL) {
1900Sstevel@tonic-gate ASSERT(pagep->spc_space == sc_pagesize);
1910Sstevel@tonic-gate next = pagep->spc_next;
1920Sstevel@tonic-gate /*
1930Sstevel@tonic-gate * Unmap the user space and free the mapping structure.
1940Sstevel@tonic-gate */
1950Sstevel@tonic-gate (void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE);
1960Sstevel@tonic-gate schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base));
1970Sstevel@tonic-gate kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words);
1980Sstevel@tonic-gate kmem_free(pagep, sizeof (sc_page_ctl_t));
1990Sstevel@tonic-gate pagep = next;
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate }
2020Sstevel@tonic-gate
2035891Sraf
2040Sstevel@tonic-gate /*
2050Sstevel@tonic-gate * Called by resume just before switching away from the current thread.
2060Sstevel@tonic-gate * Save new thread state.
2070Sstevel@tonic-gate */
2086247Sraf static void
schedctl_save(sc_shared_t * ssp)2090Sstevel@tonic-gate schedctl_save(sc_shared_t *ssp)
2100Sstevel@tonic-gate {
2110Sstevel@tonic-gate ssp->sc_state = curthread->t_state;
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate
2140Sstevel@tonic-gate
2150Sstevel@tonic-gate /*
2160Sstevel@tonic-gate * Called by resume after switching to the current thread.
2170Sstevel@tonic-gate * Save new thread state and CPU.
2180Sstevel@tonic-gate */
2196247Sraf static void
schedctl_restore(sc_shared_t * ssp)2200Sstevel@tonic-gate schedctl_restore(sc_shared_t *ssp)
2210Sstevel@tonic-gate {
2220Sstevel@tonic-gate ssp->sc_state = SC_ONPROC;
2230Sstevel@tonic-gate ssp->sc_cpu = CPU->cpu_id;
2240Sstevel@tonic-gate }
2250Sstevel@tonic-gate
2260Sstevel@tonic-gate
2270Sstevel@tonic-gate /*
2280Sstevel@tonic-gate * On fork, remove inherited mappings from the child's address space.
2290Sstevel@tonic-gate * The child's threads must call schedctl() to get new shared mappings.
2300Sstevel@tonic-gate */
2316247Sraf static void
schedctl_fork(kthread_t * pt,kthread_t * ct)2320Sstevel@tonic-gate schedctl_fork(kthread_t *pt, kthread_t *ct)
2330Sstevel@tonic-gate {
2340Sstevel@tonic-gate proc_t *pp = ttoproc(pt);
2350Sstevel@tonic-gate proc_t *cp = ttoproc(ct);
2360Sstevel@tonic-gate sc_page_ctl_t *pagep;
2370Sstevel@tonic-gate
2380Sstevel@tonic-gate ASSERT(ct->t_schedctl == NULL);
2390Sstevel@tonic-gate
2400Sstevel@tonic-gate /*
2410Sstevel@tonic-gate * Do this only once, whether we are doing fork1() or forkall().
2420Sstevel@tonic-gate * Don't do it at all if the child process is a child of vfork()
2430Sstevel@tonic-gate * because a child of vfork() borrows the parent's address space.
2440Sstevel@tonic-gate */
2450Sstevel@tonic-gate if (pt != curthread || (cp->p_flag & SVFORK))
2460Sstevel@tonic-gate return;
2470Sstevel@tonic-gate
2480Sstevel@tonic-gate mutex_enter(&pp->p_sc_lock);
2490Sstevel@tonic-gate for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next)
2500Sstevel@tonic-gate (void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE);
2510Sstevel@tonic-gate mutex_exit(&pp->p_sc_lock);
2520Sstevel@tonic-gate }
2530Sstevel@tonic-gate
2545891Sraf
2550Sstevel@tonic-gate /*
2560Sstevel@tonic-gate * Returns non-zero if the specified thread shouldn't be preempted at this time.
2576247Sraf * Called by ts_preempt(), ts_tick(), and ts_update().
2580Sstevel@tonic-gate */
2590Sstevel@tonic-gate int
schedctl_get_nopreempt(kthread_t * t)2600Sstevel@tonic-gate schedctl_get_nopreempt(kthread_t *t)
2610Sstevel@tonic-gate {
2620Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t));
2630Sstevel@tonic-gate return (t->t_schedctl->sc_preemptctl.sc_nopreempt);
2640Sstevel@tonic-gate }
2650Sstevel@tonic-gate
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate /*
2680Sstevel@tonic-gate * Sets the value of the nopreempt field for the specified thread.
2696247Sraf * Called by ts_preempt() to clear the field on preemption.
2700Sstevel@tonic-gate */
2710Sstevel@tonic-gate void
schedctl_set_nopreempt(kthread_t * t,short val)2720Sstevel@tonic-gate schedctl_set_nopreempt(kthread_t *t, short val)
2730Sstevel@tonic-gate {
2740Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t));
2750Sstevel@tonic-gate t->t_schedctl->sc_preemptctl.sc_nopreempt = val;
2760Sstevel@tonic-gate }
2770Sstevel@tonic-gate
2780Sstevel@tonic-gate
2790Sstevel@tonic-gate /*
2806247Sraf * Sets the value of the yield field for the specified thread.
2816247Sraf * Called by ts_preempt() and ts_tick() to set the field, and
2826247Sraf * ts_yield() to clear it.
2836247Sraf * The kernel never looks at this field so we don't need a
2846247Sraf * schedctl_get_yield() function.
2850Sstevel@tonic-gate */
2860Sstevel@tonic-gate void
schedctl_set_yield(kthread_t * t,short val)2870Sstevel@tonic-gate schedctl_set_yield(kthread_t *t, short val)
2880Sstevel@tonic-gate {
2890Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t));
2900Sstevel@tonic-gate t->t_schedctl->sc_preemptctl.sc_yield = val;
2910Sstevel@tonic-gate }
2920Sstevel@tonic-gate
2930Sstevel@tonic-gate
2940Sstevel@tonic-gate /*
2956247Sraf * Sets the values of the cid and priority fields for the specified thread.
2966247Sraf * Called from thread_change_pri(), thread_change_epri(), THREAD_CHANGE_PRI().
2976247Sraf * Called following calls to CL_FORKRET() and CL_ENTERCLASS().
2986247Sraf */
2996247Sraf void
schedctl_set_cidpri(kthread_t * t)3006247Sraf schedctl_set_cidpri(kthread_t *t)
3016247Sraf {
3026247Sraf sc_shared_t *tdp = t->t_schedctl;
3036247Sraf
3046247Sraf if (tdp != NULL) {
3056247Sraf tdp->sc_cid = t->t_cid;
3066247Sraf tdp->sc_cpri = t->t_cpri;
3076247Sraf tdp->sc_priority = DISP_PRIO(t);
3086247Sraf }
3096247Sraf }
3106247Sraf
3116247Sraf
3126247Sraf /*
3130Sstevel@tonic-gate * Returns non-zero if the specified thread has requested that all
3140Sstevel@tonic-gate * signals be blocked. Called by signal-related code that tests
3150Sstevel@tonic-gate * the signal mask of a thread that may not be the current thread
3160Sstevel@tonic-gate * and where the process's p_lock cannot be acquired.
3170Sstevel@tonic-gate */
3180Sstevel@tonic-gate int
schedctl_sigblock(kthread_t * t)3190Sstevel@tonic-gate schedctl_sigblock(kthread_t *t)
3200Sstevel@tonic-gate {
3210Sstevel@tonic-gate sc_shared_t *tdp = t->t_schedctl;
3220Sstevel@tonic-gate
3234389Ssl108498 if (tdp != NULL)
3240Sstevel@tonic-gate return (tdp->sc_sigblock);
3250Sstevel@tonic-gate return (0);
3260Sstevel@tonic-gate }
3270Sstevel@tonic-gate
3280Sstevel@tonic-gate
3290Sstevel@tonic-gate /*
3300Sstevel@tonic-gate * If the sc_sigblock field is set for the specified thread, set
3310Sstevel@tonic-gate * its signal mask to block all maskable signals, then clear the
3320Sstevel@tonic-gate * sc_sigblock field. This finishes what user-level code requested
3330Sstevel@tonic-gate * to be done when it set tdp->sc_shared->sc_sigblock non-zero.
33410341SRoger.Faulkner@Sun.COM * Called from signal-related code either by the current thread for
33510341SRoger.Faulkner@Sun.COM * itself or by a thread that holds the process's p_lock (/proc code).
3360Sstevel@tonic-gate */
3370Sstevel@tonic-gate void
schedctl_finish_sigblock(kthread_t * t)3380Sstevel@tonic-gate schedctl_finish_sigblock(kthread_t *t)
3390Sstevel@tonic-gate {
3400Sstevel@tonic-gate sc_shared_t *tdp = t->t_schedctl;
3410Sstevel@tonic-gate
34210341SRoger.Faulkner@Sun.COM ASSERT(t == curthread || MUTEX_HELD(&ttoproc(t)->p_lock));
3430Sstevel@tonic-gate
3444389Ssl108498 if (tdp != NULL && tdp->sc_sigblock) {
3450Sstevel@tonic-gate t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0;
3460Sstevel@tonic-gate t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1;
347*11913SRoger.Faulkner@Sun.COM t->t_hold.__sigbits[2] = FILLSET2 & ~CANTMASK2;
3480Sstevel@tonic-gate tdp->sc_sigblock = 0;
3490Sstevel@tonic-gate }
3500Sstevel@tonic-gate }
3510Sstevel@tonic-gate
3520Sstevel@tonic-gate
3530Sstevel@tonic-gate /*
3545891Sraf * Return non-zero if the current thread has declared that it has
3555891Sraf * a cancellation pending and that cancellation is not disabled.
3565891Sraf * If SIGCANCEL is blocked, we must be going over the wire in an
3575891Sraf * NFS transaction (sigintr() was called); return zero in this case.
3585891Sraf */
3595891Sraf int
schedctl_cancel_pending(void)3605891Sraf schedctl_cancel_pending(void)
3615891Sraf {
3625891Sraf sc_shared_t *tdp = curthread->t_schedctl;
3635891Sraf
3645891Sraf if (tdp != NULL &&
3655891Sraf (tdp->sc_flgs & SC_CANCEL_FLG) &&
3665891Sraf !tdp->sc_sigblock &&
3675891Sraf !sigismember(&curthread->t_hold, SIGCANCEL))
3685891Sraf return (1);
3695891Sraf return (0);
3705891Sraf }
3715891Sraf
3725891Sraf
3735891Sraf /*
3745891Sraf * Inform libc that the kernel returned EINTR from some system call
3755891Sraf * due to there being a cancellation pending (SC_CANCEL_FLG set or
3765891Sraf * we received an SI_LWP SIGCANCEL while in a system call), rather
3775891Sraf * than because of some other signal. User-level code can try to
3785891Sraf * recover from receiving other signals, but it can't recover from
3795891Sraf * being cancelled.
3805891Sraf */
3815891Sraf void
schedctl_cancel_eintr(void)3825891Sraf schedctl_cancel_eintr(void)
3835891Sraf {
3845891Sraf sc_shared_t *tdp = curthread->t_schedctl;
3855891Sraf
3865891Sraf if (tdp != NULL)
3875891Sraf tdp->sc_flgs |= SC_EINTR_FLG;
3885891Sraf }
3895891Sraf
3905891Sraf
3915891Sraf /*
3920Sstevel@tonic-gate * Return non-zero if the current thread has declared that
3930Sstevel@tonic-gate * it is calling into the kernel to park, else return zero.
3940Sstevel@tonic-gate */
3950Sstevel@tonic-gate int
schedctl_is_park(void)3965891Sraf schedctl_is_park(void)
3970Sstevel@tonic-gate {
3980Sstevel@tonic-gate sc_shared_t *tdp = curthread->t_schedctl;
3990Sstevel@tonic-gate
4004389Ssl108498 if (tdp != NULL)
4015891Sraf return ((tdp->sc_flgs & SC_PARK_FLG) != 0);
4020Sstevel@tonic-gate /*
4030Sstevel@tonic-gate * If we're here and there is no shared memory (how could
4040Sstevel@tonic-gate * that happen?) then just assume we really are here to park.
4050Sstevel@tonic-gate */
4060Sstevel@tonic-gate return (1);
4070Sstevel@tonic-gate }
4080Sstevel@tonic-gate
4095891Sraf
4104389Ssl108498 /*
4114389Ssl108498 * Declare thread is parking.
4124389Ssl108498 *
4135891Sraf * libc will set "sc_flgs |= SC_PARK_FLG" before calling lwpsys_park(0, tid)
4145891Sraf * in order to declare that the thread is calling into the kernel to park.
4154389Ssl108498 *
4164389Ssl108498 * This interface exists ONLY to support older versions of libthread which
4175891Sraf * are not aware of the SC_PARK_FLG flag.
4184389Ssl108498 *
4195891Sraf * Older versions of libthread which are not aware of the SC_PARK_FLG flag
4205891Sraf * need to be modified or emulated to call lwpsys_park(4, ...) instead of
4214389Ssl108498 * lwpsys_park(0, ...). This will invoke schedctl_set_park() before
4224389Ssl108498 * lwp_park() to declare that the thread is parking.
4234389Ssl108498 */
4244389Ssl108498 void
schedctl_set_park(void)4255891Sraf schedctl_set_park(void)
4265891Sraf {
4275891Sraf sc_shared_t *tdp = curthread->t_schedctl;
4285891Sraf if (tdp != NULL)
4295891Sraf tdp->sc_flgs |= SC_PARK_FLG;
4305891Sraf }
4315891Sraf
4325891Sraf
4335891Sraf /*
4345891Sraf * Clear the parking flag on return from parking in the kernel.
4355891Sraf */
4365891Sraf void
schedctl_unpark(void)4375891Sraf schedctl_unpark(void)
4384389Ssl108498 {
4394389Ssl108498 sc_shared_t *tdp = curthread->t_schedctl;
4404389Ssl108498
4414389Ssl108498 if (tdp != NULL)
4425891Sraf tdp->sc_flgs &= ~SC_PARK_FLG;
4430Sstevel@tonic-gate }
4440Sstevel@tonic-gate
4450Sstevel@tonic-gate
4460Sstevel@tonic-gate /*
4470Sstevel@tonic-gate * Page handling code.
4480Sstevel@tonic-gate */
4490Sstevel@tonic-gate
4500Sstevel@tonic-gate void
schedctl_init(void)4515891Sraf schedctl_init(void)
4520Sstevel@tonic-gate {
4530Sstevel@tonic-gate /*
4540Sstevel@tonic-gate * Amount of page that can hold sc_shared_t structures. If
4550Sstevel@tonic-gate * sizeof (sc_shared_t) is a power of 2, this should just be
4560Sstevel@tonic-gate * PAGESIZE.
4570Sstevel@tonic-gate */
4580Sstevel@tonic-gate sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t));
4590Sstevel@tonic-gate
4600Sstevel@tonic-gate /*
4610Sstevel@tonic-gate * Allocation bitmap is one bit per struct on a page.
4620Sstevel@tonic-gate */
4630Sstevel@tonic-gate sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t);
4640Sstevel@tonic-gate sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL);
4650Sstevel@tonic-gate }
4660Sstevel@tonic-gate
4675891Sraf
4686247Sraf static int
schedctl_shared_alloc(sc_shared_t ** kaddrp,uintptr_t * uaddrp)4690Sstevel@tonic-gate schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp)
4700Sstevel@tonic-gate {
4710Sstevel@tonic-gate proc_t *p = curproc;
4720Sstevel@tonic-gate sc_page_ctl_t *pagep;
4730Sstevel@tonic-gate sc_shared_t *ssp;
4740Sstevel@tonic-gate caddr_t base;
4750Sstevel@tonic-gate index_t index;
4760Sstevel@tonic-gate int error;
4770Sstevel@tonic-gate
4780Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&p->p_lock));
4790Sstevel@tonic-gate mutex_enter(&p->p_sc_lock);
4800Sstevel@tonic-gate
4810Sstevel@tonic-gate /*
4820Sstevel@tonic-gate * Try to find space for the new data in existing pages
4830Sstevel@tonic-gate * within the process's list of shared pages.
4840Sstevel@tonic-gate */
4850Sstevel@tonic-gate for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next)
4860Sstevel@tonic-gate if (pagep->spc_space != 0)
4870Sstevel@tonic-gate break;
4880Sstevel@tonic-gate
4890Sstevel@tonic-gate if (pagep != NULL)
4900Sstevel@tonic-gate base = pagep->spc_uaddr;
4910Sstevel@tonic-gate else {
4920Sstevel@tonic-gate struct anon_map *amp;
4930Sstevel@tonic-gate caddr_t kaddr;
4940Sstevel@tonic-gate
4950Sstevel@tonic-gate /*
4960Sstevel@tonic-gate * No room, need to allocate a new page. Also set up
4970Sstevel@tonic-gate * a mapping to the kernel address space for the new
4980Sstevel@tonic-gate * page and lock it in memory.
4990Sstevel@tonic-gate */
5000Sstevel@tonic-gate if ((error = schedctl_getpage(&, &kaddr)) != 0) {
5010Sstevel@tonic-gate mutex_exit(&p->p_sc_lock);
5020Sstevel@tonic-gate return (error);
5030Sstevel@tonic-gate }
5040Sstevel@tonic-gate if ((error = schedctl_map(amp, &base, kaddr)) != 0) {
5050Sstevel@tonic-gate schedctl_freepage(amp, kaddr);
5060Sstevel@tonic-gate mutex_exit(&p->p_sc_lock);
5070Sstevel@tonic-gate return (error);
5080Sstevel@tonic-gate }
5090Sstevel@tonic-gate
5100Sstevel@tonic-gate /*
5110Sstevel@tonic-gate * Allocate and initialize the page control structure.
5120Sstevel@tonic-gate */
5130Sstevel@tonic-gate pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP);
5140Sstevel@tonic-gate pagep->spc_amp = amp;
5150Sstevel@tonic-gate pagep->spc_base = (sc_shared_t *)kaddr;
5160Sstevel@tonic-gate pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize);
5170Sstevel@tonic-gate pagep->spc_uaddr = base;
5180Sstevel@tonic-gate
5190Sstevel@tonic-gate pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words,
5200Sstevel@tonic-gate KM_SLEEP);
5210Sstevel@tonic-gate pagep->spc_space = sc_pagesize;
5220Sstevel@tonic-gate
5230Sstevel@tonic-gate pagep->spc_next = p->p_pagep;
5240Sstevel@tonic-gate p->p_pagep = pagep;
5250Sstevel@tonic-gate }
5260Sstevel@tonic-gate
5270Sstevel@tonic-gate /*
5280Sstevel@tonic-gate * Got a page, now allocate space for the data. There should
5290Sstevel@tonic-gate * be space unless something's wrong.
5300Sstevel@tonic-gate */
5310Sstevel@tonic-gate ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t));
5320Sstevel@tonic-gate index = bt_availbit(pagep->spc_map, sc_bitmap_len);
5330Sstevel@tonic-gate ASSERT(index != -1);
5340Sstevel@tonic-gate
5350Sstevel@tonic-gate /*
5360Sstevel@tonic-gate * Get location with pointer arithmetic. spc_base is of type
5370Sstevel@tonic-gate * sc_shared_t *. Mark as allocated.
5380Sstevel@tonic-gate */
5390Sstevel@tonic-gate ssp = pagep->spc_base + index;
5400Sstevel@tonic-gate BT_SET(pagep->spc_map, index);
5410Sstevel@tonic-gate pagep->spc_space -= sizeof (sc_shared_t);
5420Sstevel@tonic-gate
5430Sstevel@tonic-gate mutex_exit(&p->p_sc_lock);
5440Sstevel@tonic-gate
5450Sstevel@tonic-gate /*
5460Sstevel@tonic-gate * Return kernel and user addresses.
5470Sstevel@tonic-gate */
5480Sstevel@tonic-gate *kaddrp = ssp;
5490Sstevel@tonic-gate *uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET);
5500Sstevel@tonic-gate return (0);
5510Sstevel@tonic-gate }
5520Sstevel@tonic-gate
5530Sstevel@tonic-gate
5540Sstevel@tonic-gate /*
5550Sstevel@tonic-gate * Find the page control structure corresponding to a kernel address.
5560Sstevel@tonic-gate */
5570Sstevel@tonic-gate static sc_page_ctl_t *
schedctl_page_lookup(sc_shared_t * ssp)5580Sstevel@tonic-gate schedctl_page_lookup(sc_shared_t *ssp)
5590Sstevel@tonic-gate {
5600Sstevel@tonic-gate proc_t *p = curproc;
5610Sstevel@tonic-gate sc_page_ctl_t *pagep;
5620Sstevel@tonic-gate
5630Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_sc_lock));
5640Sstevel@tonic-gate for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) {
5650Sstevel@tonic-gate if (ssp >= pagep->spc_base && ssp < pagep->spc_end)
5660Sstevel@tonic-gate return (pagep);
5670Sstevel@tonic-gate }
5680Sstevel@tonic-gate return (NULL); /* This "can't happen". Should we panic? */
5690Sstevel@tonic-gate }
5700Sstevel@tonic-gate
5710Sstevel@tonic-gate
5720Sstevel@tonic-gate /*
5730Sstevel@tonic-gate * This function is called when a page needs to be mapped into a
5740Sstevel@tonic-gate * process's address space. Allocate the user address space and
5750Sstevel@tonic-gate * set up the mapping to the page. Assumes the page has already
5760Sstevel@tonic-gate * been allocated and locked in memory via schedctl_getpage.
5770Sstevel@tonic-gate */
5780Sstevel@tonic-gate static int
schedctl_map(struct anon_map * amp,caddr_t * uaddrp,caddr_t kaddr)5790Sstevel@tonic-gate schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr)
5800Sstevel@tonic-gate {
5816036Smec caddr_t addr = NULL;
5820Sstevel@tonic-gate struct as *as = curproc->p_as;
5830Sstevel@tonic-gate struct segvn_crargs vn_a;
5840Sstevel@tonic-gate int error;
5850Sstevel@tonic-gate
5860Sstevel@tonic-gate as_rangelock(as);
5870Sstevel@tonic-gate /* pass address of kernel mapping as offset to avoid VAC conflicts */
5880Sstevel@tonic-gate map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0);
5890Sstevel@tonic-gate if (addr == NULL) {
5900Sstevel@tonic-gate as_rangeunlock(as);
5910Sstevel@tonic-gate return (ENOMEM);
5920Sstevel@tonic-gate }
5930Sstevel@tonic-gate
5940Sstevel@tonic-gate /*
5950Sstevel@tonic-gate * Use segvn to set up the mapping to the page.
5960Sstevel@tonic-gate */
5970Sstevel@tonic-gate vn_a.vp = NULL;
5980Sstevel@tonic-gate vn_a.offset = 0;
5990Sstevel@tonic-gate vn_a.cred = NULL;
6000Sstevel@tonic-gate vn_a.type = MAP_SHARED;
6010Sstevel@tonic-gate vn_a.prot = vn_a.maxprot = PROT_ALL;
6020Sstevel@tonic-gate vn_a.flags = 0;
6030Sstevel@tonic-gate vn_a.amp = amp;
6040Sstevel@tonic-gate vn_a.szc = 0;
6050Sstevel@tonic-gate vn_a.lgrp_mem_policy_flags = 0;
6060Sstevel@tonic-gate error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a);
6070Sstevel@tonic-gate as_rangeunlock(as);
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate if (error)
6100Sstevel@tonic-gate return (error);
6110Sstevel@tonic-gate
6120Sstevel@tonic-gate *uaddrp = addr;
6130Sstevel@tonic-gate return (0);
6140Sstevel@tonic-gate }
6150Sstevel@tonic-gate
6160Sstevel@tonic-gate
6170Sstevel@tonic-gate /*
6180Sstevel@tonic-gate * Allocate a new page from anonymous memory. Also, create a kernel
6190Sstevel@tonic-gate * mapping to the page and lock the page in memory.
6200Sstevel@tonic-gate */
6210Sstevel@tonic-gate static int
schedctl_getpage(struct anon_map ** newamp,caddr_t * newaddr)6220Sstevel@tonic-gate schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr)
6230Sstevel@tonic-gate {
6240Sstevel@tonic-gate struct anon_map *amp;
6250Sstevel@tonic-gate caddr_t kaddr;
6260Sstevel@tonic-gate
6270Sstevel@tonic-gate /*
6280Sstevel@tonic-gate * Set up anonymous memory struct. No swap reservation is
6290Sstevel@tonic-gate * needed since the page will be locked into memory.
6300Sstevel@tonic-gate */
6314426Saguzovsk amp = anonmap_alloc(PAGESIZE, 0, ANON_SLEEP);
6320Sstevel@tonic-gate
6330Sstevel@tonic-gate /*
6340Sstevel@tonic-gate * Allocate the page.
6350Sstevel@tonic-gate */
6363247Sgjelinek kaddr = segkp_get_withanonmap(segkp, PAGESIZE,
6373247Sgjelinek KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp);
6380Sstevel@tonic-gate if (kaddr == NULL) {
6390Sstevel@tonic-gate amp->refcnt--;
6400Sstevel@tonic-gate anonmap_free(amp);
6410Sstevel@tonic-gate return (ENOMEM);
6420Sstevel@tonic-gate }
6430Sstevel@tonic-gate
6440Sstevel@tonic-gate /*
6450Sstevel@tonic-gate * The page is left SE_SHARED locked so that it won't be
6460Sstevel@tonic-gate * paged out or relocated (KPD_LOCKED above).
6470Sstevel@tonic-gate */
6480Sstevel@tonic-gate
6490Sstevel@tonic-gate *newamp = amp;
6500Sstevel@tonic-gate *newaddr = kaddr;
6510Sstevel@tonic-gate return (0);
6520Sstevel@tonic-gate }
6530Sstevel@tonic-gate
6540Sstevel@tonic-gate
6550Sstevel@tonic-gate /*
6560Sstevel@tonic-gate * Take the necessary steps to allow a page to be released.
6570Sstevel@tonic-gate * This is called when the process is doing exit() or exec().
6580Sstevel@tonic-gate * There should be no accesses to the page after this.
6590Sstevel@tonic-gate * The kernel mapping of the page is released and the page is unlocked.
6600Sstevel@tonic-gate */
6610Sstevel@tonic-gate static void
schedctl_freepage(struct anon_map * amp,caddr_t kaddr)6620Sstevel@tonic-gate schedctl_freepage(struct anon_map *amp, caddr_t kaddr)
6630Sstevel@tonic-gate {
6640Sstevel@tonic-gate /*
6650Sstevel@tonic-gate * Release the lock on the page and remove the kernel mapping.
6660Sstevel@tonic-gate */
6670Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
6680Sstevel@tonic-gate segkp_release(segkp, kaddr);
6690Sstevel@tonic-gate
6700Sstevel@tonic-gate /*
6710Sstevel@tonic-gate * Decrement the refcnt so the anon_map structure will be freed.
6720Sstevel@tonic-gate */
6730Sstevel@tonic-gate if (--amp->refcnt == 0) {
6740Sstevel@tonic-gate /*
6750Sstevel@tonic-gate * The current process no longer has the page mapped, so
6760Sstevel@tonic-gate * we have to free everything rather than letting as_free
6770Sstevel@tonic-gate * do the work.
6780Sstevel@tonic-gate */
6796695Saguzovsk anonmap_purge(amp);
6800Sstevel@tonic-gate anon_free(amp->ahp, 0, PAGESIZE);
6810Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock);
6820Sstevel@tonic-gate anonmap_free(amp);
6830Sstevel@tonic-gate } else {
6840Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock);
6850Sstevel@tonic-gate }
6860Sstevel@tonic-gate }
687