xref: /onnv-gate/usr/src/uts/common/disp/cpupart.c (revision 12149:607008ac563e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52722Sjohnlev  * Common Development and Distribution License (the "License").
62722Sjohnlev  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*12149Srafael.vanoni@sun.com  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #include <sys/types.h>
260Sstevel@tonic-gate #include <sys/systm.h>
270Sstevel@tonic-gate #include <sys/cmn_err.h>
280Sstevel@tonic-gate #include <sys/cpuvar.h>
290Sstevel@tonic-gate #include <sys/thread.h>
300Sstevel@tonic-gate #include <sys/disp.h>
310Sstevel@tonic-gate #include <sys/kmem.h>
320Sstevel@tonic-gate #include <sys/debug.h>
330Sstevel@tonic-gate #include <sys/cpupart.h>
340Sstevel@tonic-gate #include <sys/pset.h>
350Sstevel@tonic-gate #include <sys/var.h>
360Sstevel@tonic-gate #include <sys/cyclic.h>
370Sstevel@tonic-gate #include <sys/lgrp.h>
383434Sesaxe #include <sys/pghw.h>
390Sstevel@tonic-gate #include <sys/loadavg.h>
400Sstevel@tonic-gate #include <sys/class.h>
410Sstevel@tonic-gate #include <sys/fss.h>
420Sstevel@tonic-gate #include <sys/pool.h>
430Sstevel@tonic-gate #include <sys/pool_pset.h>
440Sstevel@tonic-gate #include <sys/policy.h>
450Sstevel@tonic-gate 
460Sstevel@tonic-gate /*
470Sstevel@tonic-gate  * Calling pool_lock() protects the pools configuration, which includes
480Sstevel@tonic-gate  * CPU partitions.  cpu_lock protects the CPU partition list, and prevents
490Sstevel@tonic-gate  * partitions from being created or destroyed while the lock is held.
500Sstevel@tonic-gate  * The lock ordering with respect to related locks is:
510Sstevel@tonic-gate  *
520Sstevel@tonic-gate  *    pool_lock() ---> cpu_lock  --->  pidlock  -->  p_lock
530Sstevel@tonic-gate  *
540Sstevel@tonic-gate  * Blocking memory allocations may be made while holding "pool_lock"
550Sstevel@tonic-gate  * or cpu_lock.
560Sstevel@tonic-gate  */
570Sstevel@tonic-gate 
580Sstevel@tonic-gate /*
590Sstevel@tonic-gate  * The cp_default partition is allocated statically, but its lgroup load average
600Sstevel@tonic-gate  * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
610Sstevel@tonic-gate  * saves some memory since the space allocated reflects the actual number of
620Sstevel@tonic-gate  * lgroups supported by the platform. The lgrp facility provides a temporary
630Sstevel@tonic-gate  * space to hold lpl information during system bootstrap.
640Sstevel@tonic-gate  */
650Sstevel@tonic-gate 
660Sstevel@tonic-gate cpupart_t		*cp_list_head;
670Sstevel@tonic-gate cpupart_t		cp_default;
680Sstevel@tonic-gate static cpupartid_t	cp_id_next;
690Sstevel@tonic-gate uint_t			cp_numparts;
700Sstevel@tonic-gate uint_t			cp_numparts_nonempty;
710Sstevel@tonic-gate 
720Sstevel@tonic-gate /*
730Sstevel@tonic-gate  * Need to limit total number of partitions to avoid slowing down the
740Sstevel@tonic-gate  * clock code too much.  The clock code traverses the list of
750Sstevel@tonic-gate  * partitions and needs to be able to execute in a reasonable amount
760Sstevel@tonic-gate  * of time (less than 1/hz seconds).  The maximum is sized based on
770Sstevel@tonic-gate  * max_ncpus so it shouldn't be a problem unless there are large
780Sstevel@tonic-gate  * numbers of empty partitions.
790Sstevel@tonic-gate  */
800Sstevel@tonic-gate static uint_t		cp_max_numparts;
810Sstevel@tonic-gate 
820Sstevel@tonic-gate /*
830Sstevel@tonic-gate  * Processor sets and CPU partitions are different but related concepts.
840Sstevel@tonic-gate  * A processor set is a user-level abstraction allowing users to create
850Sstevel@tonic-gate  * sets of CPUs and bind threads exclusively to those sets.  A CPU
860Sstevel@tonic-gate  * partition is a kernel dispatcher object consisting of a set of CPUs
870Sstevel@tonic-gate  * and a global dispatch queue.  The processor set abstraction is
880Sstevel@tonic-gate  * implemented via a CPU partition, and currently there is a 1-1
890Sstevel@tonic-gate  * mapping between processor sets and partitions (excluding the default
900Sstevel@tonic-gate  * partition, which is not visible as a processor set).  Hence, the
910Sstevel@tonic-gate  * numbering for processor sets and CPU partitions is identical.  This
920Sstevel@tonic-gate  * may not always be true in the future, and these macros could become
930Sstevel@tonic-gate  * less trivial if we support e.g. a processor set containing multiple
940Sstevel@tonic-gate  * CPU partitions.
950Sstevel@tonic-gate  */
960Sstevel@tonic-gate #define	PSTOCP(psid)	((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
970Sstevel@tonic-gate #define	CPTOPS(cpid)	((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
980Sstevel@tonic-gate 
996298Sakolb static int cpupart_unbind_threads(cpupart_t *, boolean_t);
1006298Sakolb 
1010Sstevel@tonic-gate /*
1020Sstevel@tonic-gate  * Find a CPU partition given a processor set ID.
1030Sstevel@tonic-gate  */
1040Sstevel@tonic-gate static cpupart_t *
cpupart_find_all(psetid_t psid)1050Sstevel@tonic-gate cpupart_find_all(psetid_t psid)
1060Sstevel@tonic-gate {
1070Sstevel@tonic-gate 	cpupart_t *cp;
1080Sstevel@tonic-gate 	cpupartid_t cpid = PSTOCP(psid);
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1110Sstevel@tonic-gate 
1120Sstevel@tonic-gate 	/* default partition not visible as a processor set */
1130Sstevel@tonic-gate 	if (psid == CP_DEFAULT)
1140Sstevel@tonic-gate 		return (NULL);
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 	if (psid == PS_MYID)
1170Sstevel@tonic-gate 		return (curthread->t_cpupart);
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 	cp = cp_list_head;
1200Sstevel@tonic-gate 	do {
1210Sstevel@tonic-gate 		if (cp->cp_id == cpid)
1220Sstevel@tonic-gate 			return (cp);
1230Sstevel@tonic-gate 		cp = cp->cp_next;
1240Sstevel@tonic-gate 	} while (cp != cp_list_head);
1250Sstevel@tonic-gate 	return (NULL);
1260Sstevel@tonic-gate }
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate /*
1290Sstevel@tonic-gate  * Find a CPU partition given a processor set ID if the processor set
1300Sstevel@tonic-gate  * should be visible from the calling zone.
1310Sstevel@tonic-gate  */
1320Sstevel@tonic-gate cpupart_t *
cpupart_find(psetid_t psid)1330Sstevel@tonic-gate cpupart_find(psetid_t psid)
1340Sstevel@tonic-gate {
1350Sstevel@tonic-gate 	cpupart_t *cp;
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1380Sstevel@tonic-gate 	cp = cpupart_find_all(psid);
1390Sstevel@tonic-gate 	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
1400Sstevel@tonic-gate 	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
1410Sstevel@tonic-gate 			return (NULL);
1420Sstevel@tonic-gate 	return (cp);
1430Sstevel@tonic-gate }
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate static int
cpupart_kstat_update(kstat_t * ksp,int rw)1460Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw)
1470Sstevel@tonic-gate {
1480Sstevel@tonic-gate 	cpupart_t *cp = (cpupart_t *)ksp->ks_private;
1490Sstevel@tonic-gate 	cpupart_kstat_t *cpksp = ksp->ks_data;
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
1520Sstevel@tonic-gate 		return (EACCES);
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	cpksp->cpk_updates.value.ui64 = cp->cp_updates;
1550Sstevel@tonic-gate 	cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum;
1560Sstevel@tonic-gate 	cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum;
1570Sstevel@tonic-gate 	cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus;
1580Sstevel@tonic-gate 	cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >>
1590Sstevel@tonic-gate 	    (16 - FSHIFT);
1600Sstevel@tonic-gate 	cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >>
1610Sstevel@tonic-gate 	    (16 - FSHIFT);
1620Sstevel@tonic-gate 	cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >>
1630Sstevel@tonic-gate 	    (16 - FSHIFT);
1640Sstevel@tonic-gate 	return (0);
1650Sstevel@tonic-gate }
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate static void
cpupart_kstat_create(cpupart_t * cp)1680Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp)
1690Sstevel@tonic-gate {
1700Sstevel@tonic-gate 	kstat_t *ksp;
1710Sstevel@tonic-gate 	zoneid_t zoneid;
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	/*
1760Sstevel@tonic-gate 	 * We have a bit of a chicken-egg problem since this code will
1770Sstevel@tonic-gate 	 * get called to create the kstats for CP_DEFAULT before the
1780Sstevel@tonic-gate 	 * pools framework gets initialized.  We circumvent the problem
1790Sstevel@tonic-gate 	 * by special-casing cp_default.
1800Sstevel@tonic-gate 	 */
1810Sstevel@tonic-gate 	if (cp != &cp_default && pool_pset_enabled())
1820Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
1830Sstevel@tonic-gate 	else
1840Sstevel@tonic-gate 		zoneid = ALL_ZONES;
1850Sstevel@tonic-gate 	ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc",
1860Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
1870Sstevel@tonic-gate 	    sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid);
1880Sstevel@tonic-gate 	if (ksp != NULL) {
1890Sstevel@tonic-gate 		cpupart_kstat_t *cpksp = ksp->ks_data;
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_updates, "updates",
1920Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1930Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_runnable, "runnable",
1940Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1950Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_waiting, "waiting",
1960Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1970Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_ncpus, "ncpus",
1980Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
1990Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min",
2000Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2010Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min",
2020Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2030Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min",
2040Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate 		ksp->ks_update = cpupart_kstat_update;
2070Sstevel@tonic-gate 		ksp->ks_private = cp;
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 		kstat_install(ksp);
2100Sstevel@tonic-gate 	}
2110Sstevel@tonic-gate 	cp->cp_kstat = ksp;
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate /*
2158408SEric.Saxe@Sun.COM  * Initialize the cpupart's lgrp partions (lpls)
2168408SEric.Saxe@Sun.COM  */
2178408SEric.Saxe@Sun.COM static void
cpupart_lpl_initialize(cpupart_t * cp)2188408SEric.Saxe@Sun.COM cpupart_lpl_initialize(cpupart_t *cp)
2198408SEric.Saxe@Sun.COM {
2208408SEric.Saxe@Sun.COM 	int i, sz;
2218408SEric.Saxe@Sun.COM 
2228408SEric.Saxe@Sun.COM 	sz = cp->cp_nlgrploads = lgrp_plat_max_lgrps();
2238408SEric.Saxe@Sun.COM 	cp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * sz, KM_SLEEP);
2248408SEric.Saxe@Sun.COM 
2258408SEric.Saxe@Sun.COM 	for (i = 0; i < sz; i++) {
2268408SEric.Saxe@Sun.COM 		/*
2278408SEric.Saxe@Sun.COM 		 * The last entry of the lpl's resource set is always NULL
2288408SEric.Saxe@Sun.COM 		 * by design (to facilitate iteration)...hence the "oversizing"
2298408SEric.Saxe@Sun.COM 		 * by 1.
2308408SEric.Saxe@Sun.COM 		 */
2318408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_rset_sz = sz + 1;
2328408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_rset =
2338408SEric.Saxe@Sun.COM 		    kmem_zalloc(sizeof (struct lgrp_ld *) * (sz + 1), KM_SLEEP);
2348408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_id2rset =
2358408SEric.Saxe@Sun.COM 		    kmem_zalloc(sizeof (int) * (sz + 1), KM_SLEEP);
2368408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_lgrpid = i;
2378408SEric.Saxe@Sun.COM 	}
2388408SEric.Saxe@Sun.COM }
2398408SEric.Saxe@Sun.COM 
2408408SEric.Saxe@Sun.COM /*
2418408SEric.Saxe@Sun.COM  * Teardown the cpupart's lgrp partitions
2428408SEric.Saxe@Sun.COM  */
2438408SEric.Saxe@Sun.COM static void
cpupart_lpl_teardown(cpupart_t * cp)2448408SEric.Saxe@Sun.COM cpupart_lpl_teardown(cpupart_t *cp)
2458408SEric.Saxe@Sun.COM {
2468408SEric.Saxe@Sun.COM 	int i, sz;
2478408SEric.Saxe@Sun.COM 	lpl_t *lpl;
2488408SEric.Saxe@Sun.COM 
2498408SEric.Saxe@Sun.COM 	for (i = 0; i < cp->cp_nlgrploads; i++) {
2508408SEric.Saxe@Sun.COM 		lpl = &cp->cp_lgrploads[i];
2518408SEric.Saxe@Sun.COM 
2528408SEric.Saxe@Sun.COM 		sz = lpl->lpl_rset_sz;
2538408SEric.Saxe@Sun.COM 		kmem_free(lpl->lpl_rset, sizeof (struct lgrp_ld *) * sz);
2548408SEric.Saxe@Sun.COM 		kmem_free(lpl->lpl_id2rset, sizeof (int) * sz);
2558408SEric.Saxe@Sun.COM 		lpl->lpl_rset = NULL;
2568408SEric.Saxe@Sun.COM 		lpl->lpl_id2rset = NULL;
2578408SEric.Saxe@Sun.COM 	}
2588408SEric.Saxe@Sun.COM 	kmem_free(cp->cp_lgrploads, sizeof (lpl_t) * cp->cp_nlgrploads);
2598408SEric.Saxe@Sun.COM 	cp->cp_lgrploads = NULL;
2608408SEric.Saxe@Sun.COM }
2618408SEric.Saxe@Sun.COM 
2628408SEric.Saxe@Sun.COM /*
2630Sstevel@tonic-gate  * Initialize the default partition and kpreempt disp queue.
2640Sstevel@tonic-gate  */
2650Sstevel@tonic-gate void
cpupart_initialize_default(void)2660Sstevel@tonic-gate cpupart_initialize_default(void)
2670Sstevel@tonic-gate {
2680Sstevel@tonic-gate 	lgrp_id_t i;
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 	cp_list_head = &cp_default;
2710Sstevel@tonic-gate 	cp_default.cp_next = &cp_default;
2720Sstevel@tonic-gate 	cp_default.cp_prev = &cp_default;
2730Sstevel@tonic-gate 	cp_default.cp_id = CP_DEFAULT;
2740Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_maxrunpri = -1;
2750Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_max_unbound_pri = -1;
2760Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_cpu = NULL;
2770Sstevel@tonic-gate 	cp_default.cp_gen = 0;
2780Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_cur = 0;
2790Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_len = 0;
2800Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_total = 0;
2810Sstevel@tonic-gate 	for (i = 0; i < S_LOADAVG_SZ; i++) {
2820Sstevel@tonic-gate 		cp_default.cp_loadavg.lg_loads[i] = 0;
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock);
2850Sstevel@tonic-gate 	cp_id_next = CP_DEFAULT + 1;
2860Sstevel@tonic-gate 	cpupart_kstat_create(&cp_default);
2870Sstevel@tonic-gate 	cp_numparts = 1;
2880Sstevel@tonic-gate 	if (cp_max_numparts == 0)	/* allow for /etc/system tuning */
2890Sstevel@tonic-gate 		cp_max_numparts = max_ncpus * 2 + 1;
2900Sstevel@tonic-gate 	/*
2910Sstevel@tonic-gate 	 * Allocate space for cp_default list of lgrploads
2920Sstevel@tonic-gate 	 */
2938408SEric.Saxe@Sun.COM 	cpupart_lpl_initialize(&cp_default);
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate 	/*
2960Sstevel@tonic-gate 	 * The initial lpl topology is created in a special lpl list
2970Sstevel@tonic-gate 	 * lpl_bootstrap. It should be copied to cp_default.
2980Sstevel@tonic-gate 	 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
2990Sstevel@tonic-gate 	 *	 to the correct lpl in the cp_default.cp_lgrploads list.
3000Sstevel@tonic-gate 	 */
3010Sstevel@tonic-gate 	lpl_topo_bootstrap(cp_default.cp_lgrploads,
3020Sstevel@tonic-gate 	    cp_default.cp_nlgrploads);
3030Sstevel@tonic-gate 
3048408SEric.Saxe@Sun.COM 
3050Sstevel@tonic-gate 	cp_default.cp_attr = PSET_NOESCAPE;
3060Sstevel@tonic-gate 	cp_numparts_nonempty = 1;
3070Sstevel@tonic-gate 	/*
3080Sstevel@tonic-gate 	 * Set t0's home
3090Sstevel@tonic-gate 	 */
3100Sstevel@tonic-gate 	t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
3113434Sesaxe 
3123434Sesaxe 	bitset_init(&cp_default.cp_cmt_pgs);
313*12149Srafael.vanoni@sun.com 	bitset_init_fanout(&cp_default.cp_haltset, cp_haltset_fanout);
314*12149Srafael.vanoni@sun.com 
3158408SEric.Saxe@Sun.COM 	bitset_resize(&cp_default.cp_haltset, max_ncpus);
3160Sstevel@tonic-gate }
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate static int
cpupart_move_cpu(cpu_t * cp,cpupart_t * newpp,int forced)3200Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
3210Sstevel@tonic-gate {
3220Sstevel@tonic-gate 	cpupart_t *oldpp;
3230Sstevel@tonic-gate 	cpu_t	*ncp, *newlist;
3240Sstevel@tonic-gate 	kthread_t *t;
3250Sstevel@tonic-gate 	int	move_threads = 1;
3260Sstevel@tonic-gate 	lgrp_id_t lgrpid;
3270Sstevel@tonic-gate 	proc_t 	*p;
3280Sstevel@tonic-gate 	int lgrp_diff_lpl;
3290Sstevel@tonic-gate 	lpl_t	*cpu_lpl;
3300Sstevel@tonic-gate 	int	ret;
3316298Sakolb 	boolean_t unbind_all_threads = (forced != 0);
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
3340Sstevel@tonic-gate 	ASSERT(newpp != NULL);
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 	oldpp = cp->cpu_part;
3370Sstevel@tonic-gate 	ASSERT(oldpp != NULL);
3380Sstevel@tonic-gate 	ASSERT(oldpp->cp_ncpus > 0);
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	if (newpp == oldpp) {
3410Sstevel@tonic-gate 		/*
3420Sstevel@tonic-gate 		 * Don't need to do anything.
3430Sstevel@tonic-gate 		 */
3440Sstevel@tonic-gate 		return (0);
3450Sstevel@tonic-gate 	}
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate 	if (!disp_bound_partition(cp, 0)) {
3500Sstevel@tonic-gate 		/*
3510Sstevel@tonic-gate 		 * Don't need to move threads if there are no threads in
3520Sstevel@tonic-gate 		 * the partition.  Note that threads can't enter the
3530Sstevel@tonic-gate 		 * partition while we're holding cpu_lock.
3540Sstevel@tonic-gate 		 */
3550Sstevel@tonic-gate 		move_threads = 0;
3560Sstevel@tonic-gate 	} else if (oldpp->cp_ncpus == 1) {
3576298Sakolb 		/*
3586298Sakolb 		 * The last CPU is removed from a partition which has threads
3596298Sakolb 		 * running in it. Some of these threads may be bound to this
3606298Sakolb 		 * CPU.
3616298Sakolb 		 *
3626298Sakolb 		 * Attempt to unbind threads from the CPU and from the processor
3636298Sakolb 		 * set. Note that no threads should be bound to this CPU since
3646298Sakolb 		 * cpupart_move_threads will refuse to move bound threads to
3656298Sakolb 		 * other CPUs.
3666298Sakolb 		 */
3676298Sakolb 		(void) cpu_unbind(oldpp->cp_cpulist->cpu_id, B_FALSE);
3686298Sakolb 		(void) cpupart_unbind_threads(oldpp, B_FALSE);
3696298Sakolb 
3706298Sakolb 		if (!disp_bound_partition(cp, 0)) {
3716298Sakolb 			/*
3726298Sakolb 			 * No bound threads in this partition any more
3736298Sakolb 			 */
3746298Sakolb 			move_threads = 0;
3756298Sakolb 		} else {
3766298Sakolb 			/*
3776298Sakolb 			 * There are still threads bound to the partition
3786298Sakolb 			 */
3796298Sakolb 			cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3806298Sakolb 			return (EBUSY);
3816298Sakolb 		}
3820Sstevel@tonic-gate 	}
3830Sstevel@tonic-gate 
3846298Sakolb 	/*
3856298Sakolb 	 * If forced flag is set unbind any threads from this CPU.
3866298Sakolb 	 * Otherwise unbind soft-bound threads only.
3876298Sakolb 	 */
3886298Sakolb 	if ((ret = cpu_unbind(cp->cpu_id, unbind_all_threads)) != 0) {
3890Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3900Sstevel@tonic-gate 		return (ret);
3910Sstevel@tonic-gate 	}
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 	/*
3940Sstevel@tonic-gate 	 * Stop further threads weak binding to this cpu.
3950Sstevel@tonic-gate 	 */
3960Sstevel@tonic-gate 	cpu_inmotion = cp;
3970Sstevel@tonic-gate 	membar_enter();
3980Sstevel@tonic-gate 
3993434Sesaxe 	/*
4003434Sesaxe 	 * Notify the Processor Groups subsystem that the CPU
4013434Sesaxe 	 * will be moving cpu partitions. This is done before
4023434Sesaxe 	 * CPUs are paused to provide an opportunity for any
4033434Sesaxe 	 * needed memory allocations.
4043434Sesaxe 	 */
4053434Sesaxe 	pg_cpupart_out(cp, oldpp);
4063434Sesaxe 	pg_cpupart_in(cp, newpp);
4073434Sesaxe 
4080Sstevel@tonic-gate again:
4090Sstevel@tonic-gate 	if (move_threads) {
4100Sstevel@tonic-gate 		int loop_count;
4110Sstevel@tonic-gate 		/*
4120Sstevel@tonic-gate 		 * Check for threads strong or weak bound to this CPU.
4130Sstevel@tonic-gate 		 */
4140Sstevel@tonic-gate 		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
4150Sstevel@tonic-gate 			if (loop_count >= 5) {
4160Sstevel@tonic-gate 				cpu_state_change_notify(cp->cpu_id,
4170Sstevel@tonic-gate 				    CPU_CPUPART_IN);
4183434Sesaxe 				pg_cpupart_out(cp, newpp);
4193434Sesaxe 				pg_cpupart_in(cp, oldpp);
4200Sstevel@tonic-gate 				cpu_inmotion = NULL;
4210Sstevel@tonic-gate 				return (EBUSY);	/* some threads still bound */
4220Sstevel@tonic-gate 			}
4230Sstevel@tonic-gate 			delay(1);
4240Sstevel@tonic-gate 		}
4250Sstevel@tonic-gate 	}
4260Sstevel@tonic-gate 
4270Sstevel@tonic-gate 	/*
4280Sstevel@tonic-gate 	 * Before we actually start changing data structures, notify
4290Sstevel@tonic-gate 	 * the cyclic subsystem that we want to move this CPU out of its
4300Sstevel@tonic-gate 	 * partition.
4310Sstevel@tonic-gate 	 */
4320Sstevel@tonic-gate 	if (!cyclic_move_out(cp)) {
4330Sstevel@tonic-gate 		/*
4340Sstevel@tonic-gate 		 * This CPU must be the last CPU in a processor set with
4350Sstevel@tonic-gate 		 * a bound cyclic.
4360Sstevel@tonic-gate 		 */
4370Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
4383434Sesaxe 		pg_cpupart_out(cp, newpp);
4393434Sesaxe 		pg_cpupart_in(cp, oldpp);
4400Sstevel@tonic-gate 		cpu_inmotion = NULL;
4410Sstevel@tonic-gate 		return (EBUSY);
4420Sstevel@tonic-gate 	}
4430Sstevel@tonic-gate 
4440Sstevel@tonic-gate 	pause_cpus(cp);
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 	if (move_threads) {
4470Sstevel@tonic-gate 		/*
4480Sstevel@tonic-gate 		 * The thread on cpu before the pause thread may have read
4490Sstevel@tonic-gate 		 * cpu_inmotion before we raised the barrier above.  Check
4500Sstevel@tonic-gate 		 * again.
4510Sstevel@tonic-gate 		 */
4520Sstevel@tonic-gate 		if (disp_bound_threads(cp, 1)) {
4530Sstevel@tonic-gate 			start_cpus();
4540Sstevel@tonic-gate 			goto again;
4550Sstevel@tonic-gate 		}
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	}
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	/*
4603434Sesaxe 	 * Now that CPUs are paused, let the PG subsystem perform
4613434Sesaxe 	 * any necessary data structure updates.
4620Sstevel@tonic-gate 	 */
4633434Sesaxe 	pg_cpupart_move(cp, oldpp, newpp);
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate 	/* save this cpu's lgroup -- it'll be the same in the new partition */
4660Sstevel@tonic-gate 	lgrpid = cp->cpu_lpl->lpl_lgrpid;
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	cpu_lpl = cp->cpu_lpl;
4690Sstevel@tonic-gate 	/*
4700Sstevel@tonic-gate 	 * let the lgroup framework know cp has left the partition
4710Sstevel@tonic-gate 	 */
4720Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 	/* move out of old partition */
4750Sstevel@tonic-gate 	oldpp->cp_ncpus--;
4760Sstevel@tonic-gate 	if (oldpp->cp_ncpus > 0) {
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
4790Sstevel@tonic-gate 		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
4800Sstevel@tonic-gate 		if (oldpp->cp_cpulist == cp) {
4810Sstevel@tonic-gate 			oldpp->cp_cpulist = ncp;
4820Sstevel@tonic-gate 		}
4830Sstevel@tonic-gate 	} else {
4840Sstevel@tonic-gate 		ncp = oldpp->cp_cpulist = NULL;
4850Sstevel@tonic-gate 		cp_numparts_nonempty--;
4860Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4870Sstevel@tonic-gate 	}
4880Sstevel@tonic-gate 	oldpp->cp_gen++;
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate 	/* move into new partition */
4910Sstevel@tonic-gate 	newlist = newpp->cp_cpulist;
4920Sstevel@tonic-gate 	if (newlist == NULL) {
4930Sstevel@tonic-gate 		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
4940Sstevel@tonic-gate 		cp_numparts_nonempty++;
4950Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4960Sstevel@tonic-gate 	} else {
4970Sstevel@tonic-gate 		cp->cpu_next_part = newlist;
4980Sstevel@tonic-gate 		cp->cpu_prev_part = newlist->cpu_prev_part;
4990Sstevel@tonic-gate 		newlist->cpu_prev_part->cpu_next_part = cp;
5000Sstevel@tonic-gate 		newlist->cpu_prev_part = cp;
5010Sstevel@tonic-gate 	}
5020Sstevel@tonic-gate 	cp->cpu_part = newpp;
5030Sstevel@tonic-gate 	newpp->cp_ncpus++;
5040Sstevel@tonic-gate 	newpp->cp_gen++;
5050Sstevel@tonic-gate 
5068408SEric.Saxe@Sun.COM 	ASSERT(bitset_is_null(&newpp->cp_haltset));
5078408SEric.Saxe@Sun.COM 	ASSERT(bitset_is_null(&oldpp->cp_haltset));
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate 	/*
5100Sstevel@tonic-gate 	 * let the lgroup framework know cp has entered the partition
5110Sstevel@tonic-gate 	 */
5120Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 	/*
5150Sstevel@tonic-gate 	 * If necessary, move threads off processor.
5160Sstevel@tonic-gate 	 */
5170Sstevel@tonic-gate 	if (move_threads) {
5180Sstevel@tonic-gate 		ASSERT(ncp != NULL);
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate 		/*
5210Sstevel@tonic-gate 		 * Walk thru the active process list to look for
5220Sstevel@tonic-gate 		 * threads that need to have a new home lgroup,
5230Sstevel@tonic-gate 		 * or the last CPU they run on is the same CPU
5240Sstevel@tonic-gate 		 * being moved out of the partition.
5250Sstevel@tonic-gate 		 */
5260Sstevel@tonic-gate 
5270Sstevel@tonic-gate 		for (p = practive; p != NULL; p = p->p_next) {
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 			t = p->p_tlist;
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate 			if (t == NULL)
5320Sstevel@tonic-gate 				continue;
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 			lgrp_diff_lpl = 0;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 			do {
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 				ASSERT(t->t_lpl != NULL);
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 				/*
5410Sstevel@tonic-gate 				 * Update the count of how many threads are
5420Sstevel@tonic-gate 				 * in this CPU's lgroup but have a different lpl
5430Sstevel@tonic-gate 				 */
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate 				if (t->t_lpl != cpu_lpl &&
5460Sstevel@tonic-gate 				    t->t_lpl->lpl_lgrpid == lgrpid)
5470Sstevel@tonic-gate 					lgrp_diff_lpl++;
5480Sstevel@tonic-gate 				/*
5490Sstevel@tonic-gate 				 * If the lgroup that t is assigned to no
5500Sstevel@tonic-gate 				 * longer has any CPUs in t's partition,
5510Sstevel@tonic-gate 				 * we'll have to choose a new lgroup for t.
5520Sstevel@tonic-gate 				 */
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate 				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
5550Sstevel@tonic-gate 				    t->t_cpupart)) {
5560Sstevel@tonic-gate 					lgrp_move_thread(t,
5570Sstevel@tonic-gate 					    lgrp_choose(t, t->t_cpupart), 0);
5580Sstevel@tonic-gate 				}
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 				/*
5610Sstevel@tonic-gate 				 * make sure lpl points to our own partition
5620Sstevel@tonic-gate 				 */
5630Sstevel@tonic-gate 				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
5640Sstevel@tonic-gate 				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
5658408SEric.Saxe@Sun.COM 				    t->t_cpupart->cp_nlgrploads));
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate 				ASSERT(t->t_lpl->lpl_ncpu > 0);
5680Sstevel@tonic-gate 
5690Sstevel@tonic-gate 				/* Update CPU last ran on if it was this CPU */
5700Sstevel@tonic-gate 				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
5710Sstevel@tonic-gate 				    t->t_bound_cpu != cp) {
5720Sstevel@tonic-gate 					t->t_cpu = disp_lowpri_cpu(ncp,
5730Sstevel@tonic-gate 					    t->t_lpl, t->t_pri, NULL);
5740Sstevel@tonic-gate 				}
5750Sstevel@tonic-gate 				t = t->t_forw;
5760Sstevel@tonic-gate 			} while (t != p->p_tlist);
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 			/*
5790Sstevel@tonic-gate 			 * Didn't find any threads in the same lgroup as this
5800Sstevel@tonic-gate 			 * CPU with a different lpl, so remove the lgroup from
5810Sstevel@tonic-gate 			 * the process lgroup bitmask.
5820Sstevel@tonic-gate 			 */
5830Sstevel@tonic-gate 
5840Sstevel@tonic-gate 			if (lgrp_diff_lpl)
5850Sstevel@tonic-gate 				klgrpset_del(p->p_lgrpset, lgrpid);
5860Sstevel@tonic-gate 		}
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate 		/*
5890Sstevel@tonic-gate 		 * Walk thread list looking for threads that need to be
5900Sstevel@tonic-gate 		 * rehomed, since there are some threads that are not in
5910Sstevel@tonic-gate 		 * their process's p_tlist.
5920Sstevel@tonic-gate 		 */
5930Sstevel@tonic-gate 
5940Sstevel@tonic-gate 		t = curthread;
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate 		do {
5970Sstevel@tonic-gate 			ASSERT(t != NULL && t->t_lpl != NULL);
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate 			/*
6000Sstevel@tonic-gate 			 * If the lgroup that t is assigned to no
6010Sstevel@tonic-gate 			 * longer has any CPUs in t's partition,
6020Sstevel@tonic-gate 			 * we'll have to choose a new lgroup for t.
6030Sstevel@tonic-gate 			 * Also, choose best lgroup for home when
6040Sstevel@tonic-gate 			 * thread has specified lgroup affinities,
6050Sstevel@tonic-gate 			 * since there may be an lgroup with more
6060Sstevel@tonic-gate 			 * affinity available after moving CPUs
6070Sstevel@tonic-gate 			 * around.
6080Sstevel@tonic-gate 			 */
6090Sstevel@tonic-gate 			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
6100Sstevel@tonic-gate 			    t->t_cpupart) || t->t_lgrp_affinity) {
6110Sstevel@tonic-gate 				lgrp_move_thread(t,
6120Sstevel@tonic-gate 				    lgrp_choose(t, t->t_cpupart), 1);
6130Sstevel@tonic-gate 			}
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 			/* make sure lpl points to our own partition */
6160Sstevel@tonic-gate 			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
6170Sstevel@tonic-gate 			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
6188408SEric.Saxe@Sun.COM 			    t->t_cpupart->cp_nlgrploads));
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate 			ASSERT(t->t_lpl->lpl_ncpu > 0);
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 			/* Update CPU last ran on if it was this CPU */
6230Sstevel@tonic-gate 			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
6240Sstevel@tonic-gate 			    t->t_bound_cpu != cp) {
6250Sstevel@tonic-gate 				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
6260Sstevel@tonic-gate 				    t->t_pri, NULL);
6270Sstevel@tonic-gate 			}
6280Sstevel@tonic-gate 
6290Sstevel@tonic-gate 			t = t->t_next;
6300Sstevel@tonic-gate 		} while (t != curthread);
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 		/*
6330Sstevel@tonic-gate 		 * Clear off the CPU's run queue, and the kp queue if the
6340Sstevel@tonic-gate 		 * partition is now empty.
6350Sstevel@tonic-gate 		 */
6360Sstevel@tonic-gate 		disp_cpu_inactive(cp);
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 		/*
6390Sstevel@tonic-gate 		 * Make cp switch to a thread from the new partition.
6400Sstevel@tonic-gate 		 */
6410Sstevel@tonic-gate 		cp->cpu_runrun = 1;
6420Sstevel@tonic-gate 		cp->cpu_kprunrun = 1;
6430Sstevel@tonic-gate 	}
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 	cpu_inmotion = NULL;
6460Sstevel@tonic-gate 	start_cpus();
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	/*
6490Sstevel@tonic-gate 	 * Let anyone interested know that cpu has been added to the set.
6500Sstevel@tonic-gate 	 */
6510Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 	/*
6540Sstevel@tonic-gate 	 * Now let the cyclic subsystem know that it can reshuffle cyclics
6550Sstevel@tonic-gate 	 * bound to the new processor set.
6560Sstevel@tonic-gate 	 */
6570Sstevel@tonic-gate 	cyclic_move_in(cp);
6580Sstevel@tonic-gate 
6590Sstevel@tonic-gate 	return (0);
6600Sstevel@tonic-gate }
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate /*
6630Sstevel@tonic-gate  * Check if thread can be moved to a new cpu partition.  Called by
6640Sstevel@tonic-gate  * cpupart_move_thread() and pset_bind_start().
6650Sstevel@tonic-gate  */
6660Sstevel@tonic-gate int
cpupart_movable_thread(kthread_id_t tp,cpupart_t * cp,int ignore)6670Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore)
6680Sstevel@tonic-gate {
6690Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
6700Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
6710Sstevel@tonic-gate 	ASSERT(cp != NULL);
6720Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate 	/*
6750Sstevel@tonic-gate 	 * CPU-bound threads can't be moved.
6760Sstevel@tonic-gate 	 */
6770Sstevel@tonic-gate 	if (!ignore) {
6780Sstevel@tonic-gate 		cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu :
6790Sstevel@tonic-gate 		    tp->t_weakbound_cpu;
6800Sstevel@tonic-gate 		if (boundcpu != NULL && boundcpu->cpu_part != cp)
6810Sstevel@tonic-gate 			return (EBUSY);
6820Sstevel@tonic-gate 	}
68311173SJonathan.Adams@Sun.COM 
68411173SJonathan.Adams@Sun.COM 	if (tp->t_cid == sysdccid) {
68511173SJonathan.Adams@Sun.COM 		return (EINVAL);	/* For now, sysdc threads can't move */
68611173SJonathan.Adams@Sun.COM 	}
68711173SJonathan.Adams@Sun.COM 
6880Sstevel@tonic-gate 	return (0);
6890Sstevel@tonic-gate }
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate /*
6920Sstevel@tonic-gate  * Move thread to new partition.  If ignore is non-zero, then CPU
6930Sstevel@tonic-gate  * bindings should be ignored (this is used when destroying a
6940Sstevel@tonic-gate  * partition).
6950Sstevel@tonic-gate  */
6960Sstevel@tonic-gate static int
cpupart_move_thread(kthread_id_t tp,cpupart_t * newpp,int ignore,void * projbuf,void * zonebuf)6970Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore,
6980Sstevel@tonic-gate     void *projbuf, void *zonebuf)
6990Sstevel@tonic-gate {
7000Sstevel@tonic-gate 	cpupart_t *oldpp = tp->t_cpupart;
7010Sstevel@tonic-gate 	int ret;
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7040Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7050Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
7060Sstevel@tonic-gate 	ASSERT(newpp != NULL);
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 	if (newpp->cp_cpulist == NULL)
7090Sstevel@tonic-gate 		return (EINVAL);
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 	/*
7120Sstevel@tonic-gate 	 * Check for errors first.
7130Sstevel@tonic-gate 	 */
7140Sstevel@tonic-gate 	thread_lock(tp);
7150Sstevel@tonic-gate 	if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) {
7160Sstevel@tonic-gate 		thread_unlock(tp);
7170Sstevel@tonic-gate 		return (ret);
7180Sstevel@tonic-gate 	}
7190Sstevel@tonic-gate 
7200Sstevel@tonic-gate 	/* move the thread */
7210Sstevel@tonic-gate 	if (oldpp != newpp) {
7220Sstevel@tonic-gate 		/*
7230Sstevel@tonic-gate 		 * Make the thread switch to the new partition.
7240Sstevel@tonic-gate 		 */
7250Sstevel@tonic-gate 		tp->t_cpupart = newpp;
7260Sstevel@tonic-gate 		ASSERT(tp->t_lpl != NULL);
7270Sstevel@tonic-gate 		/*
7280Sstevel@tonic-gate 		 * Leave the thread on the same lgroup if possible; otherwise
7290Sstevel@tonic-gate 		 * choose a new lgroup for it.  In either case, update its
7300Sstevel@tonic-gate 		 * t_lpl.
7310Sstevel@tonic-gate 		 */
7320Sstevel@tonic-gate 		if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) &&
7330Sstevel@tonic-gate 		    tp->t_lgrp_affinity == NULL) {
7340Sstevel@tonic-gate 			/*
7350Sstevel@tonic-gate 			 * The thread's lgroup has CPUs in the thread's new
7360Sstevel@tonic-gate 			 * partition, so the thread can stay assigned to the
7370Sstevel@tonic-gate 			 * same lgroup.  Update its t_lpl to point to the
7380Sstevel@tonic-gate 			 * lpl_t for its lgroup in its new partition.
7390Sstevel@tonic-gate 			 */
7400Sstevel@tonic-gate 			lgrp_move_thread(tp, &tp->t_cpupart->\
7410Sstevel@tonic-gate 			    cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1);
7420Sstevel@tonic-gate 		} else {
7430Sstevel@tonic-gate 			/*
7440Sstevel@tonic-gate 			 * The thread's lgroup has no cpus in its new
7450Sstevel@tonic-gate 			 * partition or it has specified lgroup affinities,
7460Sstevel@tonic-gate 			 * so choose the best lgroup for the thread and
7470Sstevel@tonic-gate 			 * assign it to that lgroup.
7480Sstevel@tonic-gate 			 */
7490Sstevel@tonic-gate 			lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart),
7500Sstevel@tonic-gate 			    1);
7510Sstevel@tonic-gate 		}
7520Sstevel@tonic-gate 		/*
7530Sstevel@tonic-gate 		 * make sure lpl points to our own partition
7540Sstevel@tonic-gate 		 */
7550Sstevel@tonic-gate 		ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) &&
7560Sstevel@tonic-gate 		    (tp->t_lpl < tp->t_cpupart->cp_lgrploads +
7578408SEric.Saxe@Sun.COM 		    tp->t_cpupart->cp_nlgrploads));
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 		ASSERT(tp->t_lpl->lpl_ncpu > 0);
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC) {
7620Sstevel@tonic-gate 			cpu_surrender(tp);
7630Sstevel@tonic-gate 		} else if (tp->t_state == TS_RUN) {
7640Sstevel@tonic-gate 			(void) dispdeq(tp);
7650Sstevel@tonic-gate 			setbackdq(tp);
7660Sstevel@tonic-gate 		}
7670Sstevel@tonic-gate 	}
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate 	/*
7700Sstevel@tonic-gate 	 * Our binding has changed; set TP_CHANGEBIND.
7710Sstevel@tonic-gate 	 */
7720Sstevel@tonic-gate 	tp->t_proc_flag |= TP_CHANGEBIND;
7730Sstevel@tonic-gate 	aston(tp);
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 	thread_unlock(tp);
7760Sstevel@tonic-gate 	fss_changepset(tp, newpp, projbuf, zonebuf);
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate 	return (0);		/* success */
7790Sstevel@tonic-gate }
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate /*
7830Sstevel@tonic-gate  * This function binds a thread to a partition.  Must be called with the
7840Sstevel@tonic-gate  * p_lock of the containing process held (to keep the thread from going
7850Sstevel@tonic-gate  * away), and thus also with cpu_lock held (since cpu_lock must be
7860Sstevel@tonic-gate  * acquired before p_lock).  If ignore is non-zero, then CPU bindings
7870Sstevel@tonic-gate  * should be ignored (this is used when destroying a partition).
7880Sstevel@tonic-gate  */
7890Sstevel@tonic-gate int
cpupart_bind_thread(kthread_id_t tp,psetid_t psid,int ignore,void * projbuf,void * zonebuf)7900Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf,
7910Sstevel@tonic-gate     void *zonebuf)
7920Sstevel@tonic-gate {
7930Sstevel@tonic-gate 	cpupart_t	*newpp;
7940Sstevel@tonic-gate 
7950Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7960Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7970Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7980Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
7990Sstevel@tonic-gate 
8000Sstevel@tonic-gate 	if (psid == PS_NONE)
8010Sstevel@tonic-gate 		newpp = &cp_default;
8020Sstevel@tonic-gate 	else {
8030Sstevel@tonic-gate 		newpp = cpupart_find(psid);
8040Sstevel@tonic-gate 		if (newpp == NULL) {
8050Sstevel@tonic-gate 			return (EINVAL);
8060Sstevel@tonic-gate 		}
8070Sstevel@tonic-gate 	}
8080Sstevel@tonic-gate 	return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf));
8090Sstevel@tonic-gate }
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate 
8120Sstevel@tonic-gate /*
8130Sstevel@tonic-gate  * Create a new partition.  On MP systems, this also allocates a
8140Sstevel@tonic-gate  * kpreempt disp queue for that partition.
8150Sstevel@tonic-gate  */
8160Sstevel@tonic-gate int
cpupart_create(psetid_t * psid)8170Sstevel@tonic-gate cpupart_create(psetid_t *psid)
8180Sstevel@tonic-gate {
8190Sstevel@tonic-gate 	cpupart_t	*pp;
8200Sstevel@tonic-gate 
8210Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate 	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
8240Sstevel@tonic-gate 	pp->cp_nlgrploads = lgrp_plat_max_lgrps();
8250Sstevel@tonic-gate 	pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads,
8260Sstevel@tonic-gate 	    KM_SLEEP);
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
8290Sstevel@tonic-gate 	if (cp_numparts == cp_max_numparts) {
8300Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
8310Sstevel@tonic-gate 		kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
8320Sstevel@tonic-gate 		pp->cp_lgrploads = NULL;
8330Sstevel@tonic-gate 		kmem_free(pp, sizeof (cpupart_t));
8340Sstevel@tonic-gate 		return (ENOMEM);
8350Sstevel@tonic-gate 	}
8360Sstevel@tonic-gate 	cp_numparts++;
8370Sstevel@tonic-gate 	/* find the next free partition ID */
8380Sstevel@tonic-gate 	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
8390Sstevel@tonic-gate 		cp_id_next++;
8400Sstevel@tonic-gate 	pp->cp_id = cp_id_next++;
8410Sstevel@tonic-gate 	pp->cp_ncpus = 0;
8420Sstevel@tonic-gate 	pp->cp_cpulist = NULL;
8430Sstevel@tonic-gate 	pp->cp_attr = 0;
8440Sstevel@tonic-gate 	klgrpset_clear(pp->cp_lgrpset);
8450Sstevel@tonic-gate 	pp->cp_kp_queue.disp_maxrunpri = -1;
8460Sstevel@tonic-gate 	pp->cp_kp_queue.disp_max_unbound_pri = -1;
8470Sstevel@tonic-gate 	pp->cp_kp_queue.disp_cpu = NULL;
8480Sstevel@tonic-gate 	pp->cp_gen = 0;
8490Sstevel@tonic-gate 	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
8500Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
8510Sstevel@tonic-gate 	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
8520Sstevel@tonic-gate 	cpupart_kstat_create(pp);
8538408SEric.Saxe@Sun.COM 	cpupart_lpl_initialize(pp);
8548408SEric.Saxe@Sun.COM 
8553434Sesaxe 	bitset_init(&pp->cp_cmt_pgs);
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	/*
858*12149Srafael.vanoni@sun.com 	 * Initialize and size the partition's bitset of halted CPUs.
8598408SEric.Saxe@Sun.COM 	 */
860*12149Srafael.vanoni@sun.com 	bitset_init_fanout(&pp->cp_haltset, cp_haltset_fanout);
8618408SEric.Saxe@Sun.COM 	bitset_resize(&pp->cp_haltset, max_ncpus);
8628408SEric.Saxe@Sun.COM 
8638408SEric.Saxe@Sun.COM 	/*
8640Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
8650Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
8660Sstevel@tonic-gate 	 * cpu_lock) isn't running.
8670Sstevel@tonic-gate 	 */
8680Sstevel@tonic-gate 	pause_cpus(NULL);
8690Sstevel@tonic-gate 	pp->cp_next = cp_list_head;
8700Sstevel@tonic-gate 	pp->cp_prev = cp_list_head->cp_prev;
8710Sstevel@tonic-gate 	cp_list_head->cp_prev->cp_next = pp;
8720Sstevel@tonic-gate 	cp_list_head->cp_prev = pp;
8730Sstevel@tonic-gate 	start_cpus();
8740Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate 	return (0);
8770Sstevel@tonic-gate }
8780Sstevel@tonic-gate 
8790Sstevel@tonic-gate /*
8806298Sakolb  * Move threads from specified partition to cp_default. If `force' is specified,
8816298Sakolb  * move all threads, otherwise move only soft-bound threads.
8820Sstevel@tonic-gate  */
8836298Sakolb static int
cpupart_unbind_threads(cpupart_t * pp,boolean_t unbind_all)8846298Sakolb cpupart_unbind_threads(cpupart_t *pp, boolean_t unbind_all)
8850Sstevel@tonic-gate {
8860Sstevel@tonic-gate 	void 	*projbuf, *zonebuf;
8870Sstevel@tonic-gate 	kthread_t *t;
8880Sstevel@tonic-gate 	proc_t	*p;
8896298Sakolb 	int	err = 0;
8906298Sakolb 	psetid_t psid = pp->cp_id;
8910Sstevel@tonic-gate 
8920Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8936298Sakolb 	ASSERT(MUTEX_HELD(&cpu_lock));
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	if (pp == NULL || pp == &cp_default) {
8960Sstevel@tonic-gate 		return (EINVAL);
8970Sstevel@tonic-gate 	}
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate 	/*
9000Sstevel@tonic-gate 	 * Pre-allocate enough buffers for FSS for all active projects and
9010Sstevel@tonic-gate 	 * for all active zones on the system.  Unused buffers will be
9020Sstevel@tonic-gate 	 * freed later by fss_freebuf().
9030Sstevel@tonic-gate 	 */
9040Sstevel@tonic-gate 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
9050Sstevel@tonic-gate 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 	mutex_enter(&pidlock);
9080Sstevel@tonic-gate 	t = curthread;
9090Sstevel@tonic-gate 	do {
9100Sstevel@tonic-gate 		if (t->t_bind_pset == psid) {
9110Sstevel@tonic-gate again:			p = ttoproc(t);
9120Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
9130Sstevel@tonic-gate 			if (ttoproc(t) != p) {
9140Sstevel@tonic-gate 				/*
9150Sstevel@tonic-gate 				 * lwp_exit has changed this thread's process
9160Sstevel@tonic-gate 				 * pointer before we grabbed its p_lock.
9170Sstevel@tonic-gate 				 */
9180Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
9190Sstevel@tonic-gate 				goto again;
9200Sstevel@tonic-gate 			}
9216298Sakolb 
9226298Sakolb 			/*
9236298Sakolb 			 * Can only unbind threads which have revocable binding
9246298Sakolb 			 * unless force unbinding requested.
9256298Sakolb 			 */
9266298Sakolb 			if (unbind_all || TB_PSET_IS_SOFT(t)) {
9276298Sakolb 				err = cpupart_bind_thread(t, PS_NONE, 1,
9286298Sakolb 				    projbuf, zonebuf);
9296298Sakolb 				if (err) {
9306298Sakolb 					mutex_exit(&p->p_lock);
9316298Sakolb 					mutex_exit(&pidlock);
9326298Sakolb 					fss_freebuf(projbuf, FSS_ALLOC_PROJ);
9336298Sakolb 					fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
9346298Sakolb 					return (err);
9356298Sakolb 				}
9366298Sakolb 				t->t_bind_pset = PS_NONE;
9370Sstevel@tonic-gate 			}
9380Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
9390Sstevel@tonic-gate 		}
9400Sstevel@tonic-gate 		t = t->t_next;
9410Sstevel@tonic-gate 	} while (t != curthread);
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 	mutex_exit(&pidlock);
9440Sstevel@tonic-gate 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
9450Sstevel@tonic-gate 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
9466298Sakolb 	return (err);
9476298Sakolb }
9486298Sakolb 
9496298Sakolb /*
9506298Sakolb  * Destroy a partition.
9516298Sakolb  */
9526298Sakolb int
cpupart_destroy(psetid_t psid)9536298Sakolb cpupart_destroy(psetid_t psid)
9546298Sakolb {
9556298Sakolb 	cpu_t	*cp, *first_cp;
9566298Sakolb 	cpupart_t *pp, *newpp;
9576298Sakolb 	int	err = 0;
9586298Sakolb 
9596298Sakolb 	ASSERT(pool_lock_held());
9606298Sakolb 	mutex_enter(&cpu_lock);
9616298Sakolb 
9626298Sakolb 	pp = cpupart_find(psid);
9636298Sakolb 	if (pp == NULL || pp == &cp_default) {
9646298Sakolb 		mutex_exit(&cpu_lock);
9656298Sakolb 		return (EINVAL);
9666298Sakolb 	}
9676298Sakolb 
9686298Sakolb 	/*
9696298Sakolb 	 * Unbind all the threads currently bound to the partition.
9706298Sakolb 	 */
9716298Sakolb 	err = cpupart_unbind_threads(pp, B_TRUE);
9726298Sakolb 	if (err) {
9736298Sakolb 		mutex_exit(&cpu_lock);
9746298Sakolb 		return (err);
9756298Sakolb 	}
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate 	newpp = &cp_default;
9780Sstevel@tonic-gate 	while ((cp = pp->cp_cpulist) != NULL) {
9790Sstevel@tonic-gate 		if (err = cpupart_move_cpu(cp, newpp, 0)) {
9800Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
9810Sstevel@tonic-gate 			return (err);
9820Sstevel@tonic-gate 		}
9830Sstevel@tonic-gate 	}
9840Sstevel@tonic-gate 
9853434Sesaxe 	ASSERT(bitset_is_null(&pp->cp_cmt_pgs));
9868408SEric.Saxe@Sun.COM 	ASSERT(bitset_is_null(&pp->cp_haltset));
9870Sstevel@tonic-gate 
9880Sstevel@tonic-gate 	/*
9898408SEric.Saxe@Sun.COM 	 * Teardown the partition's group of active CMT PGs and halted
9908408SEric.Saxe@Sun.COM 	 * CPUs now that they have all left.
9913434Sesaxe 	 */
9923434Sesaxe 	bitset_fini(&pp->cp_cmt_pgs);
9938408SEric.Saxe@Sun.COM 	bitset_fini(&pp->cp_haltset);
9943434Sesaxe 
9953434Sesaxe 	/*
9960Sstevel@tonic-gate 	 * Reset the pointers in any offline processors so they won't
9970Sstevel@tonic-gate 	 * try to rejoin the destroyed partition when they're turned
9980Sstevel@tonic-gate 	 * online.
9990Sstevel@tonic-gate 	 */
10000Sstevel@tonic-gate 	first_cp = cp = CPU;
10010Sstevel@tonic-gate 	do {
10020Sstevel@tonic-gate 		if (cp->cpu_part == pp) {
10030Sstevel@tonic-gate 			ASSERT(cp->cpu_flags & CPU_OFFLINE);
10040Sstevel@tonic-gate 			cp->cpu_part = newpp;
10050Sstevel@tonic-gate 		}
10060Sstevel@tonic-gate 		cp = cp->cpu_next;
10070Sstevel@tonic-gate 	} while (cp != first_cp);
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate 	/*
10100Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
10110Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
10120Sstevel@tonic-gate 	 * cpu_lock) isn't running.
10130Sstevel@tonic-gate 	 */
10140Sstevel@tonic-gate 	pause_cpus(NULL);
10150Sstevel@tonic-gate 	pp->cp_prev->cp_next = pp->cp_next;
10160Sstevel@tonic-gate 	pp->cp_next->cp_prev = pp->cp_prev;
10170Sstevel@tonic-gate 	if (cp_list_head == pp)
10180Sstevel@tonic-gate 		cp_list_head = pp->cp_next;
10190Sstevel@tonic-gate 	start_cpus();
10200Sstevel@tonic-gate 
10210Sstevel@tonic-gate 	if (cp_id_next > pp->cp_id)
10220Sstevel@tonic-gate 		cp_id_next = pp->cp_id;
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate 	if (pp->cp_kstat)
10250Sstevel@tonic-gate 		kstat_delete(pp->cp_kstat);
10260Sstevel@tonic-gate 
10270Sstevel@tonic-gate 	cp_numparts--;
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 	disp_kp_free(&pp->cp_kp_queue);
10308408SEric.Saxe@Sun.COM 
10318408SEric.Saxe@Sun.COM 	cpupart_lpl_teardown(pp);
10328408SEric.Saxe@Sun.COM 
10330Sstevel@tonic-gate 	kmem_free(pp, sizeof (cpupart_t));
10340Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
10350Sstevel@tonic-gate 
10360Sstevel@tonic-gate 	return (err);
10370Sstevel@tonic-gate }
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate /*
10410Sstevel@tonic-gate  * Return the ID of the partition to which the specified processor belongs.
10420Sstevel@tonic-gate  */
10430Sstevel@tonic-gate psetid_t
cpupart_query_cpu(cpu_t * cp)10440Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp)
10450Sstevel@tonic-gate {
10460Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10470Sstevel@tonic-gate 
10480Sstevel@tonic-gate 	return (CPTOPS(cp->cpu_part->cp_id));
10490Sstevel@tonic-gate }
10500Sstevel@tonic-gate 
10510Sstevel@tonic-gate 
10520Sstevel@tonic-gate /*
10530Sstevel@tonic-gate  * Attach a processor to an existing partition.
10540Sstevel@tonic-gate  */
10550Sstevel@tonic-gate int
cpupart_attach_cpu(psetid_t psid,cpu_t * cp,int forced)10560Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced)
10570Sstevel@tonic-gate {
10580Sstevel@tonic-gate 	cpupart_t	*pp;
10590Sstevel@tonic-gate 	int		err;
10600Sstevel@tonic-gate 
10610Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10620Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10630Sstevel@tonic-gate 
10640Sstevel@tonic-gate 	pp = cpupart_find(psid);
10650Sstevel@tonic-gate 	if (pp == NULL)
10660Sstevel@tonic-gate 		return (EINVAL);
10670Sstevel@tonic-gate 	if (cp->cpu_flags & CPU_OFFLINE)
10680Sstevel@tonic-gate 		return (EINVAL);
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate 	err = cpupart_move_cpu(cp, pp, forced);
10710Sstevel@tonic-gate 	return (err);
10720Sstevel@tonic-gate }
10730Sstevel@tonic-gate 
10740Sstevel@tonic-gate /*
10750Sstevel@tonic-gate  * Get a list of cpus belonging to the partition.  If numcpus is NULL,
10760Sstevel@tonic-gate  * this just checks for a valid partition.  If numcpus is non-NULL but
10770Sstevel@tonic-gate  * cpulist is NULL, the current number of cpus is stored in *numcpus.
10780Sstevel@tonic-gate  * If both are non-NULL, the current number of cpus is stored in *numcpus,
10790Sstevel@tonic-gate  * and a list of those cpus up to the size originally in *numcpus is
10800Sstevel@tonic-gate  * stored in cpulist[].  Also, store the processor set id in *psid.
10810Sstevel@tonic-gate  * This is useful in case the processor set id passed in was PS_MYID.
10820Sstevel@tonic-gate  */
10830Sstevel@tonic-gate int
cpupart_get_cpus(psetid_t * psid,processorid_t * cpulist,uint_t * numcpus)10840Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus)
10850Sstevel@tonic-gate {
10860Sstevel@tonic-gate 	cpupart_t	*pp;
10870Sstevel@tonic-gate 	uint_t		ncpus;
10880Sstevel@tonic-gate 	cpu_t		*c;
10890Sstevel@tonic-gate 	int		i;
10900Sstevel@tonic-gate 
10910Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
10920Sstevel@tonic-gate 	pp = cpupart_find(*psid);
10930Sstevel@tonic-gate 	if (pp == NULL) {
10940Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
10950Sstevel@tonic-gate 		return (EINVAL);
10960Sstevel@tonic-gate 	}
10970Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
10980Sstevel@tonic-gate 	ncpus = pp->cp_ncpus;
10990Sstevel@tonic-gate 	if (numcpus) {
11000Sstevel@tonic-gate 		if (ncpus > *numcpus) {
11010Sstevel@tonic-gate 			/*
11020Sstevel@tonic-gate 			 * Only copy as many cpus as were passed in, but
11030Sstevel@tonic-gate 			 * pass back the real number.
11040Sstevel@tonic-gate 			 */
11050Sstevel@tonic-gate 			uint_t t = ncpus;
11060Sstevel@tonic-gate 			ncpus = *numcpus;
11070Sstevel@tonic-gate 			*numcpus = t;
11080Sstevel@tonic-gate 		} else
11090Sstevel@tonic-gate 			*numcpus = ncpus;
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate 		if (cpulist) {
11120Sstevel@tonic-gate 			c = pp->cp_cpulist;
11130Sstevel@tonic-gate 			for (i = 0; i < ncpus; i++) {
11140Sstevel@tonic-gate 				ASSERT(c != NULL);
11150Sstevel@tonic-gate 				cpulist[i] = c->cpu_id;
11160Sstevel@tonic-gate 				c = c->cpu_next_part;
11170Sstevel@tonic-gate 			}
11180Sstevel@tonic-gate 		}
11190Sstevel@tonic-gate 	}
11200Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
11210Sstevel@tonic-gate 	return (0);
11220Sstevel@tonic-gate }
11230Sstevel@tonic-gate 
11240Sstevel@tonic-gate /*
11250Sstevel@tonic-gate  * Reallocate kpreempt queues for each CPU partition.  Called from
11260Sstevel@tonic-gate  * disp_setup when a new scheduling class is loaded that increases the
11270Sstevel@tonic-gate  * number of priorities in the system.
11280Sstevel@tonic-gate  */
11290Sstevel@tonic-gate void
cpupart_kpqalloc(pri_t npri)11300Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri)
11310Sstevel@tonic-gate {
11320Sstevel@tonic-gate 	cpupart_t *cpp;
11330Sstevel@tonic-gate 
11340Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11350Sstevel@tonic-gate 	cpp = cp_list_head;
11360Sstevel@tonic-gate 	do {
11370Sstevel@tonic-gate 		disp_kp_alloc(&cpp->cp_kp_queue, npri);
11380Sstevel@tonic-gate 		cpp = cpp->cp_next;
11390Sstevel@tonic-gate 	} while (cpp != cp_list_head);
11400Sstevel@tonic-gate }
11410Sstevel@tonic-gate 
11420Sstevel@tonic-gate int
cpupart_get_loadavg(psetid_t psid,int * buf,int nelem)11430Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem)
11440Sstevel@tonic-gate {
11450Sstevel@tonic-gate 	cpupart_t *cp;
11460Sstevel@tonic-gate 	int i;
11470Sstevel@tonic-gate 
11480Sstevel@tonic-gate 	ASSERT(nelem >= 0);
11490Sstevel@tonic-gate 	ASSERT(nelem <= LOADAVG_NSTATS);
11500Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 	cp = cpupart_find(psid);
11530Sstevel@tonic-gate 	if (cp == NULL)
11540Sstevel@tonic-gate 		return (EINVAL);
11550Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
11560Sstevel@tonic-gate 		buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT);
11570Sstevel@tonic-gate 
11580Sstevel@tonic-gate 	return (0);
11590Sstevel@tonic-gate }
11600Sstevel@tonic-gate 
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate uint_t
cpupart_list(psetid_t * list,uint_t nelem,int flag)11630Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag)
11640Sstevel@tonic-gate {
11650Sstevel@tonic-gate 	uint_t numpart = 0;
11660Sstevel@tonic-gate 	cpupart_t *cp;
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11690Sstevel@tonic-gate 	ASSERT(flag == CP_ALL || flag == CP_NONEMPTY);
11700Sstevel@tonic-gate 
11710Sstevel@tonic-gate 	if (list != NULL) {
11720Sstevel@tonic-gate 		cp = cp_list_head;
11730Sstevel@tonic-gate 		do {
11740Sstevel@tonic-gate 			if (((flag == CP_ALL) && (cp != &cp_default)) ||
11750Sstevel@tonic-gate 			    ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) {
11760Sstevel@tonic-gate 				if (numpart == nelem)
11770Sstevel@tonic-gate 					break;
11780Sstevel@tonic-gate 				list[numpart++] = CPTOPS(cp->cp_id);
11790Sstevel@tonic-gate 			}
11800Sstevel@tonic-gate 			cp = cp->cp_next;
11810Sstevel@tonic-gate 		} while (cp != cp_list_head);
11820Sstevel@tonic-gate 	}
11830Sstevel@tonic-gate 
11840Sstevel@tonic-gate 	ASSERT(numpart < cp_numparts);
11850Sstevel@tonic-gate 
11860Sstevel@tonic-gate 	if (flag == CP_ALL)
11870Sstevel@tonic-gate 		numpart = cp_numparts - 1; /* leave out default partition */
11880Sstevel@tonic-gate 	else if (flag == CP_NONEMPTY)
11890Sstevel@tonic-gate 		numpart = cp_numparts_nonempty;
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate 	return (numpart);
11920Sstevel@tonic-gate }
11930Sstevel@tonic-gate 
11940Sstevel@tonic-gate int
cpupart_setattr(psetid_t psid,uint_t attr)11950Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr)
11960Sstevel@tonic-gate {
11970Sstevel@tonic-gate 	cpupart_t *cp;
11980Sstevel@tonic-gate 
11990Sstevel@tonic-gate 	ASSERT(pool_lock_held());
12000Sstevel@tonic-gate 
12010Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
12020Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
12030Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12040Sstevel@tonic-gate 		return (EINVAL);
12050Sstevel@tonic-gate 	}
12060Sstevel@tonic-gate 	/*
12070Sstevel@tonic-gate 	 * PSET_NOESCAPE attribute for default cpu partition is always set
12080Sstevel@tonic-gate 	 */
12090Sstevel@tonic-gate 	if (cp == &cp_default && !(attr & PSET_NOESCAPE)) {
12100Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12110Sstevel@tonic-gate 		return (EINVAL);
12120Sstevel@tonic-gate 	}
12130Sstevel@tonic-gate 	cp->cp_attr = attr;
12140Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
12150Sstevel@tonic-gate 	return (0);
12160Sstevel@tonic-gate }
12170Sstevel@tonic-gate 
12180Sstevel@tonic-gate int
cpupart_getattr(psetid_t psid,uint_t * attrp)12190Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp)
12200Sstevel@tonic-gate {
12210Sstevel@tonic-gate 	cpupart_t *cp;
12220Sstevel@tonic-gate 
12230Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
12240Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
12250Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12260Sstevel@tonic-gate 		return (EINVAL);
12270Sstevel@tonic-gate 	}
12280Sstevel@tonic-gate 	*attrp = cp->cp_attr;
12290Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
12300Sstevel@tonic-gate 	return (0);
12310Sstevel@tonic-gate }
1232