10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 53247Sgjelinek * Common Development and Distribution License (the "License"). 63247Sgjelinek * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 215891Sraf 220Sstevel@tonic-gate /* 23*10341SRoger.Faulkner@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #include <sys/types.h> 280Sstevel@tonic-gate #include <sys/systm.h> 290Sstevel@tonic-gate #include <sys/schedctl.h> 300Sstevel@tonic-gate #include <sys/proc.h> 310Sstevel@tonic-gate #include <sys/thread.h> 320Sstevel@tonic-gate #include <sys/class.h> 330Sstevel@tonic-gate #include <sys/cred.h> 340Sstevel@tonic-gate #include <sys/kmem.h> 350Sstevel@tonic-gate #include <sys/cmn_err.h> 360Sstevel@tonic-gate #include <sys/stack.h> 370Sstevel@tonic-gate #include <sys/debug.h> 380Sstevel@tonic-gate #include <sys/cpuvar.h> 390Sstevel@tonic-gate #include <sys/sobject.h> 400Sstevel@tonic-gate #include <sys/door.h> 410Sstevel@tonic-gate #include <sys/modctl.h> 420Sstevel@tonic-gate #include <sys/syscall.h> 430Sstevel@tonic-gate #include <sys/sysmacros.h> 440Sstevel@tonic-gate #include <sys/vmsystm.h> 450Sstevel@tonic-gate #include <sys/mman.h> 460Sstevel@tonic-gate #include <sys/vnode.h> 470Sstevel@tonic-gate #include <sys/swap.h> 480Sstevel@tonic-gate #include <sys/lwp.h> 490Sstevel@tonic-gate #include <sys/bitmap.h> 500Sstevel@tonic-gate #include <sys/atomic.h> 510Sstevel@tonic-gate #include <sys/fcntl.h> 520Sstevel@tonic-gate #include <vm/seg_kp.h> 530Sstevel@tonic-gate #include <vm/seg_vn.h> 540Sstevel@tonic-gate #include <vm/as.h> 550Sstevel@tonic-gate #include <fs/fs_subr.h> 560Sstevel@tonic-gate 570Sstevel@tonic-gate /* 580Sstevel@tonic-gate * Page handling structures. This is set up as a list of per-page 590Sstevel@tonic-gate * control structures (sc_page_ctl), with p->p_pagep pointing to 600Sstevel@tonic-gate * the first. The per-page structures point to the actual pages 610Sstevel@tonic-gate * and contain pointers to the user address for each mapped page. 620Sstevel@tonic-gate * 630Sstevel@tonic-gate * All data is protected by p->p_sc_lock. Since this lock is 640Sstevel@tonic-gate * held while waiting for memory, schedctl_shared_alloc() should 650Sstevel@tonic-gate * not be called while holding p_lock. 660Sstevel@tonic-gate */ 670Sstevel@tonic-gate 680Sstevel@tonic-gate typedef struct sc_page_ctl { 690Sstevel@tonic-gate struct sc_page_ctl *spc_next; 700Sstevel@tonic-gate sc_shared_t *spc_base; /* base of kernel page */ 710Sstevel@tonic-gate sc_shared_t *spc_end; /* end of usable space */ 720Sstevel@tonic-gate ulong_t *spc_map; /* bitmap of allocated space on page */ 730Sstevel@tonic-gate size_t spc_space; /* amount of space on page */ 740Sstevel@tonic-gate caddr_t spc_uaddr; /* user-level address of the page */ 750Sstevel@tonic-gate struct anon_map *spc_amp; /* anonymous memory structure */ 760Sstevel@tonic-gate } sc_page_ctl_t; 770Sstevel@tonic-gate 780Sstevel@tonic-gate static size_t sc_pagesize; /* size of usable space on page */ 790Sstevel@tonic-gate static size_t sc_bitmap_len; /* # of bits in allocation bitmap */ 800Sstevel@tonic-gate static size_t sc_bitmap_words; /* # of words in allocation bitmap */ 810Sstevel@tonic-gate 820Sstevel@tonic-gate /* Context ops */ 830Sstevel@tonic-gate static void schedctl_save(sc_shared_t *); 840Sstevel@tonic-gate static void schedctl_restore(sc_shared_t *); 850Sstevel@tonic-gate static void schedctl_fork(kthread_t *, kthread_t *); 860Sstevel@tonic-gate 870Sstevel@tonic-gate /* Functions for handling shared pages */ 880Sstevel@tonic-gate static int schedctl_shared_alloc(sc_shared_t **, uintptr_t *); 890Sstevel@tonic-gate static sc_page_ctl_t *schedctl_page_lookup(sc_shared_t *); 900Sstevel@tonic-gate static int schedctl_map(struct anon_map *, caddr_t *, caddr_t); 910Sstevel@tonic-gate static int schedctl_getpage(struct anon_map **, caddr_t *); 920Sstevel@tonic-gate static void schedctl_freepage(struct anon_map *, caddr_t); 930Sstevel@tonic-gate 940Sstevel@tonic-gate /* 950Sstevel@tonic-gate * System call interface to scheduler activations. 960Sstevel@tonic-gate * This always operates on the current lwp. 970Sstevel@tonic-gate */ 980Sstevel@tonic-gate caddr_t 990Sstevel@tonic-gate schedctl(void) 1000Sstevel@tonic-gate { 1010Sstevel@tonic-gate kthread_t *t = curthread; 1020Sstevel@tonic-gate sc_shared_t *ssp; 1030Sstevel@tonic-gate uintptr_t uaddr; 1040Sstevel@tonic-gate int error; 1050Sstevel@tonic-gate 1060Sstevel@tonic-gate if (t->t_schedctl == NULL) { 1070Sstevel@tonic-gate /* 1080Sstevel@tonic-gate * Allocate and initialize the shared structure. 1090Sstevel@tonic-gate */ 1100Sstevel@tonic-gate if ((error = schedctl_shared_alloc(&ssp, &uaddr)) != 0) 1110Sstevel@tonic-gate return ((caddr_t)(uintptr_t)set_errno(error)); 1120Sstevel@tonic-gate bzero(ssp, sizeof (*ssp)); 1130Sstevel@tonic-gate 1140Sstevel@tonic-gate installctx(t, ssp, schedctl_save, schedctl_restore, 1150Sstevel@tonic-gate schedctl_fork, NULL, NULL, NULL); 1160Sstevel@tonic-gate 1170Sstevel@tonic-gate thread_lock(t); /* protect against ts_tick and ts_update */ 1180Sstevel@tonic-gate t->t_schedctl = ssp; 1190Sstevel@tonic-gate t->t_sc_uaddr = uaddr; 1206247Sraf ssp->sc_cid = t->t_cid; 1216247Sraf ssp->sc_cpri = t->t_cpri; 1226247Sraf ssp->sc_priority = DISP_PRIO(t); 1230Sstevel@tonic-gate thread_unlock(t); 1240Sstevel@tonic-gate } 1250Sstevel@tonic-gate 1260Sstevel@tonic-gate return ((caddr_t)t->t_sc_uaddr); 1270Sstevel@tonic-gate } 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate 1300Sstevel@tonic-gate /* 1310Sstevel@tonic-gate * Clean up scheduler activations state associated with an exiting 1320Sstevel@tonic-gate * (or execing) lwp. t is always the current thread. 1330Sstevel@tonic-gate */ 1340Sstevel@tonic-gate void 1350Sstevel@tonic-gate schedctl_lwp_cleanup(kthread_t *t) 1360Sstevel@tonic-gate { 1370Sstevel@tonic-gate sc_shared_t *ssp = t->t_schedctl; 1380Sstevel@tonic-gate proc_t *p = ttoproc(t); 1390Sstevel@tonic-gate sc_page_ctl_t *pagep; 1400Sstevel@tonic-gate index_t index; 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate thread_lock(t); /* protect against ts_tick and ts_update */ 1450Sstevel@tonic-gate t->t_schedctl = NULL; 1460Sstevel@tonic-gate t->t_sc_uaddr = 0; 1470Sstevel@tonic-gate thread_unlock(t); 1480Sstevel@tonic-gate 1490Sstevel@tonic-gate /* 1500Sstevel@tonic-gate * Remove the context op to avoid the final call to 1510Sstevel@tonic-gate * schedctl_save when switching away from this lwp. 1520Sstevel@tonic-gate */ 1530Sstevel@tonic-gate (void) removectx(t, ssp, schedctl_save, schedctl_restore, 1540Sstevel@tonic-gate schedctl_fork, NULL, NULL, NULL); 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate /* 1570Sstevel@tonic-gate * Do not unmap the shared page until the process exits. 1580Sstevel@tonic-gate * User-level library code relies on this for adaptive mutex locking. 1590Sstevel@tonic-gate */ 1600Sstevel@tonic-gate mutex_enter(&p->p_sc_lock); 1610Sstevel@tonic-gate ssp->sc_state = SC_FREE; 1620Sstevel@tonic-gate pagep = schedctl_page_lookup(ssp); 1630Sstevel@tonic-gate index = (index_t)(ssp - pagep->spc_base); 1640Sstevel@tonic-gate BT_CLEAR(pagep->spc_map, index); 1650Sstevel@tonic-gate pagep->spc_space += sizeof (sc_shared_t); 1660Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 1670Sstevel@tonic-gate } 1680Sstevel@tonic-gate 1695891Sraf 1700Sstevel@tonic-gate /* 1710Sstevel@tonic-gate * Cleanup the list of schedctl shared pages for the process. 1720Sstevel@tonic-gate * Called from exec() and exit() system calls. 1730Sstevel@tonic-gate */ 1740Sstevel@tonic-gate void 1755891Sraf schedctl_proc_cleanup(void) 1760Sstevel@tonic-gate { 1770Sstevel@tonic-gate proc_t *p = curproc; 1780Sstevel@tonic-gate sc_page_ctl_t *pagep; 1790Sstevel@tonic-gate sc_page_ctl_t *next; 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate ASSERT(p->p_lwpcnt == 1); /* we are single-threaded now */ 1820Sstevel@tonic-gate ASSERT(curthread->t_schedctl == NULL); 1830Sstevel@tonic-gate 1840Sstevel@tonic-gate /* 1850Sstevel@tonic-gate * Since we are single-threaded, we don't have to hold p->p_sc_lock. 1860Sstevel@tonic-gate */ 1870Sstevel@tonic-gate pagep = p->p_pagep; 1880Sstevel@tonic-gate p->p_pagep = NULL; 1890Sstevel@tonic-gate while (pagep != NULL) { 1900Sstevel@tonic-gate ASSERT(pagep->spc_space == sc_pagesize); 1910Sstevel@tonic-gate next = pagep->spc_next; 1920Sstevel@tonic-gate /* 1930Sstevel@tonic-gate * Unmap the user space and free the mapping structure. 1940Sstevel@tonic-gate */ 1950Sstevel@tonic-gate (void) as_unmap(p->p_as, pagep->spc_uaddr, PAGESIZE); 1960Sstevel@tonic-gate schedctl_freepage(pagep->spc_amp, (caddr_t)(pagep->spc_base)); 1970Sstevel@tonic-gate kmem_free(pagep->spc_map, sizeof (ulong_t) * sc_bitmap_words); 1980Sstevel@tonic-gate kmem_free(pagep, sizeof (sc_page_ctl_t)); 1990Sstevel@tonic-gate pagep = next; 2000Sstevel@tonic-gate } 2010Sstevel@tonic-gate } 2020Sstevel@tonic-gate 2035891Sraf 2040Sstevel@tonic-gate /* 2050Sstevel@tonic-gate * Called by resume just before switching away from the current thread. 2060Sstevel@tonic-gate * Save new thread state. 2070Sstevel@tonic-gate */ 2086247Sraf static void 2090Sstevel@tonic-gate schedctl_save(sc_shared_t *ssp) 2100Sstevel@tonic-gate { 2110Sstevel@tonic-gate ssp->sc_state = curthread->t_state; 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate /* 2160Sstevel@tonic-gate * Called by resume after switching to the current thread. 2170Sstevel@tonic-gate * Save new thread state and CPU. 2180Sstevel@tonic-gate */ 2196247Sraf static void 2200Sstevel@tonic-gate schedctl_restore(sc_shared_t *ssp) 2210Sstevel@tonic-gate { 2220Sstevel@tonic-gate ssp->sc_state = SC_ONPROC; 2230Sstevel@tonic-gate ssp->sc_cpu = CPU->cpu_id; 2240Sstevel@tonic-gate } 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate 2270Sstevel@tonic-gate /* 2280Sstevel@tonic-gate * On fork, remove inherited mappings from the child's address space. 2290Sstevel@tonic-gate * The child's threads must call schedctl() to get new shared mappings. 2300Sstevel@tonic-gate */ 2316247Sraf static void 2320Sstevel@tonic-gate schedctl_fork(kthread_t *pt, kthread_t *ct) 2330Sstevel@tonic-gate { 2340Sstevel@tonic-gate proc_t *pp = ttoproc(pt); 2350Sstevel@tonic-gate proc_t *cp = ttoproc(ct); 2360Sstevel@tonic-gate sc_page_ctl_t *pagep; 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate ASSERT(ct->t_schedctl == NULL); 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate /* 2410Sstevel@tonic-gate * Do this only once, whether we are doing fork1() or forkall(). 2420Sstevel@tonic-gate * Don't do it at all if the child process is a child of vfork() 2430Sstevel@tonic-gate * because a child of vfork() borrows the parent's address space. 2440Sstevel@tonic-gate */ 2450Sstevel@tonic-gate if (pt != curthread || (cp->p_flag & SVFORK)) 2460Sstevel@tonic-gate return; 2470Sstevel@tonic-gate 2480Sstevel@tonic-gate mutex_enter(&pp->p_sc_lock); 2490Sstevel@tonic-gate for (pagep = pp->p_pagep; pagep != NULL; pagep = pagep->spc_next) 2500Sstevel@tonic-gate (void) as_unmap(cp->p_as, pagep->spc_uaddr, PAGESIZE); 2510Sstevel@tonic-gate mutex_exit(&pp->p_sc_lock); 2520Sstevel@tonic-gate } 2530Sstevel@tonic-gate 2545891Sraf 2550Sstevel@tonic-gate /* 2560Sstevel@tonic-gate * Returns non-zero if the specified thread shouldn't be preempted at this time. 2576247Sraf * Called by ts_preempt(), ts_tick(), and ts_update(). 2580Sstevel@tonic-gate */ 2590Sstevel@tonic-gate int 2600Sstevel@tonic-gate schedctl_get_nopreempt(kthread_t *t) 2610Sstevel@tonic-gate { 2620Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t)); 2630Sstevel@tonic-gate return (t->t_schedctl->sc_preemptctl.sc_nopreempt); 2640Sstevel@tonic-gate } 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate 2670Sstevel@tonic-gate /* 2680Sstevel@tonic-gate * Sets the value of the nopreempt field for the specified thread. 2696247Sraf * Called by ts_preempt() to clear the field on preemption. 2700Sstevel@tonic-gate */ 2710Sstevel@tonic-gate void 2720Sstevel@tonic-gate schedctl_set_nopreempt(kthread_t *t, short val) 2730Sstevel@tonic-gate { 2740Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t)); 2750Sstevel@tonic-gate t->t_schedctl->sc_preemptctl.sc_nopreempt = val; 2760Sstevel@tonic-gate } 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate /* 2806247Sraf * Sets the value of the yield field for the specified thread. 2816247Sraf * Called by ts_preempt() and ts_tick() to set the field, and 2826247Sraf * ts_yield() to clear it. 2836247Sraf * The kernel never looks at this field so we don't need a 2846247Sraf * schedctl_get_yield() function. 2850Sstevel@tonic-gate */ 2860Sstevel@tonic-gate void 2870Sstevel@tonic-gate schedctl_set_yield(kthread_t *t, short val) 2880Sstevel@tonic-gate { 2890Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(t)); 2900Sstevel@tonic-gate t->t_schedctl->sc_preemptctl.sc_yield = val; 2910Sstevel@tonic-gate } 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate 2940Sstevel@tonic-gate /* 2956247Sraf * Sets the values of the cid and priority fields for the specified thread. 2966247Sraf * Called from thread_change_pri(), thread_change_epri(), THREAD_CHANGE_PRI(). 2976247Sraf * Called following calls to CL_FORKRET() and CL_ENTERCLASS(). 2986247Sraf */ 2996247Sraf void 3006247Sraf schedctl_set_cidpri(kthread_t *t) 3016247Sraf { 3026247Sraf sc_shared_t *tdp = t->t_schedctl; 3036247Sraf 3046247Sraf if (tdp != NULL) { 3056247Sraf tdp->sc_cid = t->t_cid; 3066247Sraf tdp->sc_cpri = t->t_cpri; 3076247Sraf tdp->sc_priority = DISP_PRIO(t); 3086247Sraf } 3096247Sraf } 3106247Sraf 3116247Sraf 3126247Sraf /* 3130Sstevel@tonic-gate * Returns non-zero if the specified thread has requested that all 3140Sstevel@tonic-gate * signals be blocked. Called by signal-related code that tests 3150Sstevel@tonic-gate * the signal mask of a thread that may not be the current thread 3160Sstevel@tonic-gate * and where the process's p_lock cannot be acquired. 3170Sstevel@tonic-gate */ 3180Sstevel@tonic-gate int 3190Sstevel@tonic-gate schedctl_sigblock(kthread_t *t) 3200Sstevel@tonic-gate { 3210Sstevel@tonic-gate sc_shared_t *tdp = t->t_schedctl; 3220Sstevel@tonic-gate 3234389Ssl108498 if (tdp != NULL) 3240Sstevel@tonic-gate return (tdp->sc_sigblock); 3250Sstevel@tonic-gate return (0); 3260Sstevel@tonic-gate } 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate /* 3300Sstevel@tonic-gate * If the sc_sigblock field is set for the specified thread, set 3310Sstevel@tonic-gate * its signal mask to block all maskable signals, then clear the 3320Sstevel@tonic-gate * sc_sigblock field. This finishes what user-level code requested 3330Sstevel@tonic-gate * to be done when it set tdp->sc_shared->sc_sigblock non-zero. 334*10341SRoger.Faulkner@Sun.COM * Called from signal-related code either by the current thread for 335*10341SRoger.Faulkner@Sun.COM * itself or by a thread that holds the process's p_lock (/proc code). 3360Sstevel@tonic-gate */ 3370Sstevel@tonic-gate void 3380Sstevel@tonic-gate schedctl_finish_sigblock(kthread_t *t) 3390Sstevel@tonic-gate { 3400Sstevel@tonic-gate sc_shared_t *tdp = t->t_schedctl; 3410Sstevel@tonic-gate 342*10341SRoger.Faulkner@Sun.COM ASSERT(t == curthread || MUTEX_HELD(&ttoproc(t)->p_lock)); 3430Sstevel@tonic-gate 3444389Ssl108498 if (tdp != NULL && tdp->sc_sigblock) { 3450Sstevel@tonic-gate t->t_hold.__sigbits[0] = FILLSET0 & ~CANTMASK0; 3460Sstevel@tonic-gate t->t_hold.__sigbits[1] = FILLSET1 & ~CANTMASK1; 3470Sstevel@tonic-gate tdp->sc_sigblock = 0; 3480Sstevel@tonic-gate } 3490Sstevel@tonic-gate } 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate 3520Sstevel@tonic-gate /* 3535891Sraf * Return non-zero if the current thread has declared that it has 3545891Sraf * a cancellation pending and that cancellation is not disabled. 3555891Sraf * If SIGCANCEL is blocked, we must be going over the wire in an 3565891Sraf * NFS transaction (sigintr() was called); return zero in this case. 3575891Sraf */ 3585891Sraf int 3595891Sraf schedctl_cancel_pending(void) 3605891Sraf { 3615891Sraf sc_shared_t *tdp = curthread->t_schedctl; 3625891Sraf 3635891Sraf if (tdp != NULL && 3645891Sraf (tdp->sc_flgs & SC_CANCEL_FLG) && 3655891Sraf !tdp->sc_sigblock && 3665891Sraf !sigismember(&curthread->t_hold, SIGCANCEL)) 3675891Sraf return (1); 3685891Sraf return (0); 3695891Sraf } 3705891Sraf 3715891Sraf 3725891Sraf /* 3735891Sraf * Inform libc that the kernel returned EINTR from some system call 3745891Sraf * due to there being a cancellation pending (SC_CANCEL_FLG set or 3755891Sraf * we received an SI_LWP SIGCANCEL while in a system call), rather 3765891Sraf * than because of some other signal. User-level code can try to 3775891Sraf * recover from receiving other signals, but it can't recover from 3785891Sraf * being cancelled. 3795891Sraf */ 3805891Sraf void 3815891Sraf schedctl_cancel_eintr(void) 3825891Sraf { 3835891Sraf sc_shared_t *tdp = curthread->t_schedctl; 3845891Sraf 3855891Sraf if (tdp != NULL) 3865891Sraf tdp->sc_flgs |= SC_EINTR_FLG; 3875891Sraf } 3885891Sraf 3895891Sraf 3905891Sraf /* 3910Sstevel@tonic-gate * Return non-zero if the current thread has declared that 3920Sstevel@tonic-gate * it is calling into the kernel to park, else return zero. 3930Sstevel@tonic-gate */ 3940Sstevel@tonic-gate int 3955891Sraf schedctl_is_park(void) 3960Sstevel@tonic-gate { 3970Sstevel@tonic-gate sc_shared_t *tdp = curthread->t_schedctl; 3980Sstevel@tonic-gate 3994389Ssl108498 if (tdp != NULL) 4005891Sraf return ((tdp->sc_flgs & SC_PARK_FLG) != 0); 4010Sstevel@tonic-gate /* 4020Sstevel@tonic-gate * If we're here and there is no shared memory (how could 4030Sstevel@tonic-gate * that happen?) then just assume we really are here to park. 4040Sstevel@tonic-gate */ 4050Sstevel@tonic-gate return (1); 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate 4085891Sraf 4094389Ssl108498 /* 4104389Ssl108498 * Declare thread is parking. 4114389Ssl108498 * 4125891Sraf * libc will set "sc_flgs |= SC_PARK_FLG" before calling lwpsys_park(0, tid) 4135891Sraf * in order to declare that the thread is calling into the kernel to park. 4144389Ssl108498 * 4154389Ssl108498 * This interface exists ONLY to support older versions of libthread which 4165891Sraf * are not aware of the SC_PARK_FLG flag. 4174389Ssl108498 * 4185891Sraf * Older versions of libthread which are not aware of the SC_PARK_FLG flag 4195891Sraf * need to be modified or emulated to call lwpsys_park(4, ...) instead of 4204389Ssl108498 * lwpsys_park(0, ...). This will invoke schedctl_set_park() before 4214389Ssl108498 * lwp_park() to declare that the thread is parking. 4224389Ssl108498 */ 4234389Ssl108498 void 4245891Sraf schedctl_set_park(void) 4255891Sraf { 4265891Sraf sc_shared_t *tdp = curthread->t_schedctl; 4275891Sraf if (tdp != NULL) 4285891Sraf tdp->sc_flgs |= SC_PARK_FLG; 4295891Sraf } 4305891Sraf 4315891Sraf 4325891Sraf /* 4335891Sraf * Clear the parking flag on return from parking in the kernel. 4345891Sraf */ 4355891Sraf void 4365891Sraf schedctl_unpark(void) 4374389Ssl108498 { 4384389Ssl108498 sc_shared_t *tdp = curthread->t_schedctl; 4394389Ssl108498 4404389Ssl108498 if (tdp != NULL) 4415891Sraf tdp->sc_flgs &= ~SC_PARK_FLG; 4420Sstevel@tonic-gate } 4430Sstevel@tonic-gate 4440Sstevel@tonic-gate 4450Sstevel@tonic-gate /* 4460Sstevel@tonic-gate * Page handling code. 4470Sstevel@tonic-gate */ 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate void 4505891Sraf schedctl_init(void) 4510Sstevel@tonic-gate { 4520Sstevel@tonic-gate /* 4530Sstevel@tonic-gate * Amount of page that can hold sc_shared_t structures. If 4540Sstevel@tonic-gate * sizeof (sc_shared_t) is a power of 2, this should just be 4550Sstevel@tonic-gate * PAGESIZE. 4560Sstevel@tonic-gate */ 4570Sstevel@tonic-gate sc_pagesize = PAGESIZE - (PAGESIZE % sizeof (sc_shared_t)); 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate /* 4600Sstevel@tonic-gate * Allocation bitmap is one bit per struct on a page. 4610Sstevel@tonic-gate */ 4620Sstevel@tonic-gate sc_bitmap_len = sc_pagesize / sizeof (sc_shared_t); 4630Sstevel@tonic-gate sc_bitmap_words = howmany(sc_bitmap_len, BT_NBIPUL); 4640Sstevel@tonic-gate } 4650Sstevel@tonic-gate 4665891Sraf 4676247Sraf static int 4680Sstevel@tonic-gate schedctl_shared_alloc(sc_shared_t **kaddrp, uintptr_t *uaddrp) 4690Sstevel@tonic-gate { 4700Sstevel@tonic-gate proc_t *p = curproc; 4710Sstevel@tonic-gate sc_page_ctl_t *pagep; 4720Sstevel@tonic-gate sc_shared_t *ssp; 4730Sstevel@tonic-gate caddr_t base; 4740Sstevel@tonic-gate index_t index; 4750Sstevel@tonic-gate int error; 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(&p->p_lock)); 4780Sstevel@tonic-gate mutex_enter(&p->p_sc_lock); 4790Sstevel@tonic-gate 4800Sstevel@tonic-gate /* 4810Sstevel@tonic-gate * Try to find space for the new data in existing pages 4820Sstevel@tonic-gate * within the process's list of shared pages. 4830Sstevel@tonic-gate */ 4840Sstevel@tonic-gate for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) 4850Sstevel@tonic-gate if (pagep->spc_space != 0) 4860Sstevel@tonic-gate break; 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate if (pagep != NULL) 4890Sstevel@tonic-gate base = pagep->spc_uaddr; 4900Sstevel@tonic-gate else { 4910Sstevel@tonic-gate struct anon_map *amp; 4920Sstevel@tonic-gate caddr_t kaddr; 4930Sstevel@tonic-gate 4940Sstevel@tonic-gate /* 4950Sstevel@tonic-gate * No room, need to allocate a new page. Also set up 4960Sstevel@tonic-gate * a mapping to the kernel address space for the new 4970Sstevel@tonic-gate * page and lock it in memory. 4980Sstevel@tonic-gate */ 4990Sstevel@tonic-gate if ((error = schedctl_getpage(&, &kaddr)) != 0) { 5000Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 5010Sstevel@tonic-gate return (error); 5020Sstevel@tonic-gate } 5030Sstevel@tonic-gate if ((error = schedctl_map(amp, &base, kaddr)) != 0) { 5040Sstevel@tonic-gate schedctl_freepage(amp, kaddr); 5050Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 5060Sstevel@tonic-gate return (error); 5070Sstevel@tonic-gate } 5080Sstevel@tonic-gate 5090Sstevel@tonic-gate /* 5100Sstevel@tonic-gate * Allocate and initialize the page control structure. 5110Sstevel@tonic-gate */ 5120Sstevel@tonic-gate pagep = kmem_alloc(sizeof (sc_page_ctl_t), KM_SLEEP); 5130Sstevel@tonic-gate pagep->spc_amp = amp; 5140Sstevel@tonic-gate pagep->spc_base = (sc_shared_t *)kaddr; 5150Sstevel@tonic-gate pagep->spc_end = (sc_shared_t *)(kaddr + sc_pagesize); 5160Sstevel@tonic-gate pagep->spc_uaddr = base; 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate pagep->spc_map = kmem_zalloc(sizeof (ulong_t) * sc_bitmap_words, 5190Sstevel@tonic-gate KM_SLEEP); 5200Sstevel@tonic-gate pagep->spc_space = sc_pagesize; 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate pagep->spc_next = p->p_pagep; 5230Sstevel@tonic-gate p->p_pagep = pagep; 5240Sstevel@tonic-gate } 5250Sstevel@tonic-gate 5260Sstevel@tonic-gate /* 5270Sstevel@tonic-gate * Got a page, now allocate space for the data. There should 5280Sstevel@tonic-gate * be space unless something's wrong. 5290Sstevel@tonic-gate */ 5300Sstevel@tonic-gate ASSERT(pagep != NULL && pagep->spc_space >= sizeof (sc_shared_t)); 5310Sstevel@tonic-gate index = bt_availbit(pagep->spc_map, sc_bitmap_len); 5320Sstevel@tonic-gate ASSERT(index != -1); 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate /* 5350Sstevel@tonic-gate * Get location with pointer arithmetic. spc_base is of type 5360Sstevel@tonic-gate * sc_shared_t *. Mark as allocated. 5370Sstevel@tonic-gate */ 5380Sstevel@tonic-gate ssp = pagep->spc_base + index; 5390Sstevel@tonic-gate BT_SET(pagep->spc_map, index); 5400Sstevel@tonic-gate pagep->spc_space -= sizeof (sc_shared_t); 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate mutex_exit(&p->p_sc_lock); 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate /* 5450Sstevel@tonic-gate * Return kernel and user addresses. 5460Sstevel@tonic-gate */ 5470Sstevel@tonic-gate *kaddrp = ssp; 5480Sstevel@tonic-gate *uaddrp = (uintptr_t)base + ((uintptr_t)ssp & PAGEOFFSET); 5490Sstevel@tonic-gate return (0); 5500Sstevel@tonic-gate } 5510Sstevel@tonic-gate 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate /* 5540Sstevel@tonic-gate * Find the page control structure corresponding to a kernel address. 5550Sstevel@tonic-gate */ 5560Sstevel@tonic-gate static sc_page_ctl_t * 5570Sstevel@tonic-gate schedctl_page_lookup(sc_shared_t *ssp) 5580Sstevel@tonic-gate { 5590Sstevel@tonic-gate proc_t *p = curproc; 5600Sstevel@tonic-gate sc_page_ctl_t *pagep; 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_sc_lock)); 5630Sstevel@tonic-gate for (pagep = p->p_pagep; pagep != NULL; pagep = pagep->spc_next) { 5640Sstevel@tonic-gate if (ssp >= pagep->spc_base && ssp < pagep->spc_end) 5650Sstevel@tonic-gate return (pagep); 5660Sstevel@tonic-gate } 5670Sstevel@tonic-gate return (NULL); /* This "can't happen". Should we panic? */ 5680Sstevel@tonic-gate } 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate 5710Sstevel@tonic-gate /* 5720Sstevel@tonic-gate * This function is called when a page needs to be mapped into a 5730Sstevel@tonic-gate * process's address space. Allocate the user address space and 5740Sstevel@tonic-gate * set up the mapping to the page. Assumes the page has already 5750Sstevel@tonic-gate * been allocated and locked in memory via schedctl_getpage. 5760Sstevel@tonic-gate */ 5770Sstevel@tonic-gate static int 5780Sstevel@tonic-gate schedctl_map(struct anon_map *amp, caddr_t *uaddrp, caddr_t kaddr) 5790Sstevel@tonic-gate { 5806036Smec caddr_t addr = NULL; 5810Sstevel@tonic-gate struct as *as = curproc->p_as; 5820Sstevel@tonic-gate struct segvn_crargs vn_a; 5830Sstevel@tonic-gate int error; 5840Sstevel@tonic-gate 5850Sstevel@tonic-gate as_rangelock(as); 5860Sstevel@tonic-gate /* pass address of kernel mapping as offset to avoid VAC conflicts */ 5870Sstevel@tonic-gate map_addr(&addr, PAGESIZE, (offset_t)(uintptr_t)kaddr, 1, 0); 5880Sstevel@tonic-gate if (addr == NULL) { 5890Sstevel@tonic-gate as_rangeunlock(as); 5900Sstevel@tonic-gate return (ENOMEM); 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate 5930Sstevel@tonic-gate /* 5940Sstevel@tonic-gate * Use segvn to set up the mapping to the page. 5950Sstevel@tonic-gate */ 5960Sstevel@tonic-gate vn_a.vp = NULL; 5970Sstevel@tonic-gate vn_a.offset = 0; 5980Sstevel@tonic-gate vn_a.cred = NULL; 5990Sstevel@tonic-gate vn_a.type = MAP_SHARED; 6000Sstevel@tonic-gate vn_a.prot = vn_a.maxprot = PROT_ALL; 6010Sstevel@tonic-gate vn_a.flags = 0; 6020Sstevel@tonic-gate vn_a.amp = amp; 6030Sstevel@tonic-gate vn_a.szc = 0; 6040Sstevel@tonic-gate vn_a.lgrp_mem_policy_flags = 0; 6050Sstevel@tonic-gate error = as_map(as, addr, PAGESIZE, segvn_create, &vn_a); 6060Sstevel@tonic-gate as_rangeunlock(as); 6070Sstevel@tonic-gate 6080Sstevel@tonic-gate if (error) 6090Sstevel@tonic-gate return (error); 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate *uaddrp = addr; 6120Sstevel@tonic-gate return (0); 6130Sstevel@tonic-gate } 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate /* 6170Sstevel@tonic-gate * Allocate a new page from anonymous memory. Also, create a kernel 6180Sstevel@tonic-gate * mapping to the page and lock the page in memory. 6190Sstevel@tonic-gate */ 6200Sstevel@tonic-gate static int 6210Sstevel@tonic-gate schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr) 6220Sstevel@tonic-gate { 6230Sstevel@tonic-gate struct anon_map *amp; 6240Sstevel@tonic-gate caddr_t kaddr; 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate /* 6270Sstevel@tonic-gate * Set up anonymous memory struct. No swap reservation is 6280Sstevel@tonic-gate * needed since the page will be locked into memory. 6290Sstevel@tonic-gate */ 6304426Saguzovsk amp = anonmap_alloc(PAGESIZE, 0, ANON_SLEEP); 6310Sstevel@tonic-gate 6320Sstevel@tonic-gate /* 6330Sstevel@tonic-gate * Allocate the page. 6340Sstevel@tonic-gate */ 6353247Sgjelinek kaddr = segkp_get_withanonmap(segkp, PAGESIZE, 6363247Sgjelinek KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp); 6370Sstevel@tonic-gate if (kaddr == NULL) { 6380Sstevel@tonic-gate amp->refcnt--; 6390Sstevel@tonic-gate anonmap_free(amp); 6400Sstevel@tonic-gate return (ENOMEM); 6410Sstevel@tonic-gate } 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate /* 6440Sstevel@tonic-gate * The page is left SE_SHARED locked so that it won't be 6450Sstevel@tonic-gate * paged out or relocated (KPD_LOCKED above). 6460Sstevel@tonic-gate */ 6470Sstevel@tonic-gate 6480Sstevel@tonic-gate *newamp = amp; 6490Sstevel@tonic-gate *newaddr = kaddr; 6500Sstevel@tonic-gate return (0); 6510Sstevel@tonic-gate } 6520Sstevel@tonic-gate 6530Sstevel@tonic-gate 6540Sstevel@tonic-gate /* 6550Sstevel@tonic-gate * Take the necessary steps to allow a page to be released. 6560Sstevel@tonic-gate * This is called when the process is doing exit() or exec(). 6570Sstevel@tonic-gate * There should be no accesses to the page after this. 6580Sstevel@tonic-gate * The kernel mapping of the page is released and the page is unlocked. 6590Sstevel@tonic-gate */ 6600Sstevel@tonic-gate static void 6610Sstevel@tonic-gate schedctl_freepage(struct anon_map *amp, caddr_t kaddr) 6620Sstevel@tonic-gate { 6630Sstevel@tonic-gate /* 6640Sstevel@tonic-gate * Release the lock on the page and remove the kernel mapping. 6650Sstevel@tonic-gate */ 6660Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 6670Sstevel@tonic-gate segkp_release(segkp, kaddr); 6680Sstevel@tonic-gate 6690Sstevel@tonic-gate /* 6700Sstevel@tonic-gate * Decrement the refcnt so the anon_map structure will be freed. 6710Sstevel@tonic-gate */ 6720Sstevel@tonic-gate if (--amp->refcnt == 0) { 6730Sstevel@tonic-gate /* 6740Sstevel@tonic-gate * The current process no longer has the page mapped, so 6750Sstevel@tonic-gate * we have to free everything rather than letting as_free 6760Sstevel@tonic-gate * do the work. 6770Sstevel@tonic-gate */ 6786695Saguzovsk anonmap_purge(amp); 6790Sstevel@tonic-gate anon_free(amp->ahp, 0, PAGESIZE); 6800Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6810Sstevel@tonic-gate anonmap_free(amp); 6820Sstevel@tonic-gate } else { 6830Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 6840Sstevel@tonic-gate } 6850Sstevel@tonic-gate } 686