xref: /onnv-gate/usr/src/uts/common/disp/cpupart.c (revision 8408:7b4e48a75d0c)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52722Sjohnlev  * Common Development and Distribution License (the "License").
62722Sjohnlev  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
226298Sakolb  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/types.h>
270Sstevel@tonic-gate #include <sys/systm.h>
280Sstevel@tonic-gate #include <sys/cmn_err.h>
290Sstevel@tonic-gate #include <sys/cpuvar.h>
300Sstevel@tonic-gate #include <sys/thread.h>
310Sstevel@tonic-gate #include <sys/disp.h>
320Sstevel@tonic-gate #include <sys/kmem.h>
330Sstevel@tonic-gate #include <sys/debug.h>
340Sstevel@tonic-gate #include <sys/cpupart.h>
350Sstevel@tonic-gate #include <sys/pset.h>
360Sstevel@tonic-gate #include <sys/var.h>
370Sstevel@tonic-gate #include <sys/cyclic.h>
380Sstevel@tonic-gate #include <sys/lgrp.h>
393434Sesaxe #include <sys/pghw.h>
400Sstevel@tonic-gate #include <sys/loadavg.h>
410Sstevel@tonic-gate #include <sys/class.h>
420Sstevel@tonic-gate #include <sys/fss.h>
430Sstevel@tonic-gate #include <sys/pool.h>
440Sstevel@tonic-gate #include <sys/pool_pset.h>
450Sstevel@tonic-gate #include <sys/policy.h>
460Sstevel@tonic-gate 
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate  * Calling pool_lock() protects the pools configuration, which includes
490Sstevel@tonic-gate  * CPU partitions.  cpu_lock protects the CPU partition list, and prevents
500Sstevel@tonic-gate  * partitions from being created or destroyed while the lock is held.
510Sstevel@tonic-gate  * The lock ordering with respect to related locks is:
520Sstevel@tonic-gate  *
530Sstevel@tonic-gate  *    pool_lock() ---> cpu_lock  --->  pidlock  -->  p_lock
540Sstevel@tonic-gate  *
550Sstevel@tonic-gate  * Blocking memory allocations may be made while holding "pool_lock"
560Sstevel@tonic-gate  * or cpu_lock.
570Sstevel@tonic-gate  */
580Sstevel@tonic-gate 
590Sstevel@tonic-gate /*
600Sstevel@tonic-gate  * The cp_default partition is allocated statically, but its lgroup load average
610Sstevel@tonic-gate  * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
620Sstevel@tonic-gate  * saves some memory since the space allocated reflects the actual number of
630Sstevel@tonic-gate  * lgroups supported by the platform. The lgrp facility provides a temporary
640Sstevel@tonic-gate  * space to hold lpl information during system bootstrap.
650Sstevel@tonic-gate  */
660Sstevel@tonic-gate 
670Sstevel@tonic-gate cpupart_t		*cp_list_head;
680Sstevel@tonic-gate cpupart_t		cp_default;
690Sstevel@tonic-gate static cpupartid_t	cp_id_next;
700Sstevel@tonic-gate uint_t			cp_numparts;
710Sstevel@tonic-gate uint_t			cp_numparts_nonempty;
720Sstevel@tonic-gate 
730Sstevel@tonic-gate /*
740Sstevel@tonic-gate  * Need to limit total number of partitions to avoid slowing down the
750Sstevel@tonic-gate  * clock code too much.  The clock code traverses the list of
760Sstevel@tonic-gate  * partitions and needs to be able to execute in a reasonable amount
770Sstevel@tonic-gate  * of time (less than 1/hz seconds).  The maximum is sized based on
780Sstevel@tonic-gate  * max_ncpus so it shouldn't be a problem unless there are large
790Sstevel@tonic-gate  * numbers of empty partitions.
800Sstevel@tonic-gate  */
810Sstevel@tonic-gate static uint_t		cp_max_numparts;
820Sstevel@tonic-gate 
830Sstevel@tonic-gate /*
840Sstevel@tonic-gate  * Processor sets and CPU partitions are different but related concepts.
850Sstevel@tonic-gate  * A processor set is a user-level abstraction allowing users to create
860Sstevel@tonic-gate  * sets of CPUs and bind threads exclusively to those sets.  A CPU
870Sstevel@tonic-gate  * partition is a kernel dispatcher object consisting of a set of CPUs
880Sstevel@tonic-gate  * and a global dispatch queue.  The processor set abstraction is
890Sstevel@tonic-gate  * implemented via a CPU partition, and currently there is a 1-1
900Sstevel@tonic-gate  * mapping between processor sets and partitions (excluding the default
910Sstevel@tonic-gate  * partition, which is not visible as a processor set).  Hence, the
920Sstevel@tonic-gate  * numbering for processor sets and CPU partitions is identical.  This
930Sstevel@tonic-gate  * may not always be true in the future, and these macros could become
940Sstevel@tonic-gate  * less trivial if we support e.g. a processor set containing multiple
950Sstevel@tonic-gate  * CPU partitions.
960Sstevel@tonic-gate  */
970Sstevel@tonic-gate #define	PSTOCP(psid)	((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
980Sstevel@tonic-gate #define	CPTOPS(cpid)	((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
990Sstevel@tonic-gate 
1006298Sakolb 
1016298Sakolb static int cpupart_unbind_threads(cpupart_t *, boolean_t);
1026298Sakolb 
1030Sstevel@tonic-gate /*
1040Sstevel@tonic-gate  * Find a CPU partition given a processor set ID.
1050Sstevel@tonic-gate  */
1060Sstevel@tonic-gate static cpupart_t *
1070Sstevel@tonic-gate cpupart_find_all(psetid_t psid)
1080Sstevel@tonic-gate {
1090Sstevel@tonic-gate 	cpupart_t *cp;
1100Sstevel@tonic-gate 	cpupartid_t cpid = PSTOCP(psid);
1110Sstevel@tonic-gate 
1120Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1130Sstevel@tonic-gate 
1140Sstevel@tonic-gate 	/* default partition not visible as a processor set */
1150Sstevel@tonic-gate 	if (psid == CP_DEFAULT)
1160Sstevel@tonic-gate 		return (NULL);
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate 	if (psid == PS_MYID)
1190Sstevel@tonic-gate 		return (curthread->t_cpupart);
1200Sstevel@tonic-gate 
1210Sstevel@tonic-gate 	cp = cp_list_head;
1220Sstevel@tonic-gate 	do {
1230Sstevel@tonic-gate 		if (cp->cp_id == cpid)
1240Sstevel@tonic-gate 			return (cp);
1250Sstevel@tonic-gate 		cp = cp->cp_next;
1260Sstevel@tonic-gate 	} while (cp != cp_list_head);
1270Sstevel@tonic-gate 	return (NULL);
1280Sstevel@tonic-gate }
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate /*
1310Sstevel@tonic-gate  * Find a CPU partition given a processor set ID if the processor set
1320Sstevel@tonic-gate  * should be visible from the calling zone.
1330Sstevel@tonic-gate  */
1340Sstevel@tonic-gate cpupart_t *
1350Sstevel@tonic-gate cpupart_find(psetid_t psid)
1360Sstevel@tonic-gate {
1370Sstevel@tonic-gate 	cpupart_t *cp;
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1400Sstevel@tonic-gate 	cp = cpupart_find_all(psid);
1410Sstevel@tonic-gate 	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
1420Sstevel@tonic-gate 	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
1430Sstevel@tonic-gate 			return (NULL);
1440Sstevel@tonic-gate 	return (cp);
1450Sstevel@tonic-gate }
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate static int
1480Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw)
1490Sstevel@tonic-gate {
1500Sstevel@tonic-gate 	cpupart_t *cp = (cpupart_t *)ksp->ks_private;
1510Sstevel@tonic-gate 	cpupart_kstat_t *cpksp = ksp->ks_data;
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
1540Sstevel@tonic-gate 		return (EACCES);
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	cpksp->cpk_updates.value.ui64 = cp->cp_updates;
1570Sstevel@tonic-gate 	cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum;
1580Sstevel@tonic-gate 	cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum;
1590Sstevel@tonic-gate 	cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus;
1600Sstevel@tonic-gate 	cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >>
1610Sstevel@tonic-gate 	    (16 - FSHIFT);
1620Sstevel@tonic-gate 	cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >>
1630Sstevel@tonic-gate 	    (16 - FSHIFT);
1640Sstevel@tonic-gate 	cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >>
1650Sstevel@tonic-gate 	    (16 - FSHIFT);
1660Sstevel@tonic-gate 	return (0);
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate static void
1700Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp)
1710Sstevel@tonic-gate {
1720Sstevel@tonic-gate 	kstat_t *ksp;
1730Sstevel@tonic-gate 	zoneid_t zoneid;
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	/*
1780Sstevel@tonic-gate 	 * We have a bit of a chicken-egg problem since this code will
1790Sstevel@tonic-gate 	 * get called to create the kstats for CP_DEFAULT before the
1800Sstevel@tonic-gate 	 * pools framework gets initialized.  We circumvent the problem
1810Sstevel@tonic-gate 	 * by special-casing cp_default.
1820Sstevel@tonic-gate 	 */
1830Sstevel@tonic-gate 	if (cp != &cp_default && pool_pset_enabled())
1840Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
1850Sstevel@tonic-gate 	else
1860Sstevel@tonic-gate 		zoneid = ALL_ZONES;
1870Sstevel@tonic-gate 	ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc",
1880Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
1890Sstevel@tonic-gate 	    sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid);
1900Sstevel@tonic-gate 	if (ksp != NULL) {
1910Sstevel@tonic-gate 		cpupart_kstat_t *cpksp = ksp->ks_data;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_updates, "updates",
1940Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1950Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_runnable, "runnable",
1960Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1970Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_waiting, "waiting",
1980Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1990Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_ncpus, "ncpus",
2000Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2010Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min",
2020Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2030Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min",
2040Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2050Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min",
2060Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2070Sstevel@tonic-gate 
2080Sstevel@tonic-gate 		ksp->ks_update = cpupart_kstat_update;
2090Sstevel@tonic-gate 		ksp->ks_private = cp;
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 		kstat_install(ksp);
2120Sstevel@tonic-gate 	}
2130Sstevel@tonic-gate 	cp->cp_kstat = ksp;
2140Sstevel@tonic-gate }
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate /*
217*8408SEric.Saxe@Sun.COM  * Initialize the cpupart's lgrp partions (lpls)
218*8408SEric.Saxe@Sun.COM  */
219*8408SEric.Saxe@Sun.COM static void
220*8408SEric.Saxe@Sun.COM cpupart_lpl_initialize(cpupart_t *cp)
221*8408SEric.Saxe@Sun.COM {
222*8408SEric.Saxe@Sun.COM 	int i, sz;
223*8408SEric.Saxe@Sun.COM 
224*8408SEric.Saxe@Sun.COM 	sz = cp->cp_nlgrploads = lgrp_plat_max_lgrps();
225*8408SEric.Saxe@Sun.COM 	cp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * sz, KM_SLEEP);
226*8408SEric.Saxe@Sun.COM 
227*8408SEric.Saxe@Sun.COM 	for (i = 0; i < sz; i++) {
228*8408SEric.Saxe@Sun.COM 		/*
229*8408SEric.Saxe@Sun.COM 		 * The last entry of the lpl's resource set is always NULL
230*8408SEric.Saxe@Sun.COM 		 * by design (to facilitate iteration)...hence the "oversizing"
231*8408SEric.Saxe@Sun.COM 		 * by 1.
232*8408SEric.Saxe@Sun.COM 		 */
233*8408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_rset_sz = sz + 1;
234*8408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_rset =
235*8408SEric.Saxe@Sun.COM 		    kmem_zalloc(sizeof (struct lgrp_ld *) * (sz + 1), KM_SLEEP);
236*8408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_id2rset =
237*8408SEric.Saxe@Sun.COM 		    kmem_zalloc(sizeof (int) * (sz + 1), KM_SLEEP);
238*8408SEric.Saxe@Sun.COM 		cp->cp_lgrploads[i].lpl_lgrpid = i;
239*8408SEric.Saxe@Sun.COM 	}
240*8408SEric.Saxe@Sun.COM }
241*8408SEric.Saxe@Sun.COM 
242*8408SEric.Saxe@Sun.COM /*
243*8408SEric.Saxe@Sun.COM  * Teardown the cpupart's lgrp partitions
244*8408SEric.Saxe@Sun.COM  */
245*8408SEric.Saxe@Sun.COM static void
246*8408SEric.Saxe@Sun.COM cpupart_lpl_teardown(cpupart_t *cp)
247*8408SEric.Saxe@Sun.COM {
248*8408SEric.Saxe@Sun.COM 	int i, sz;
249*8408SEric.Saxe@Sun.COM 	lpl_t *lpl;
250*8408SEric.Saxe@Sun.COM 
251*8408SEric.Saxe@Sun.COM 	for (i = 0; i < cp->cp_nlgrploads; i++) {
252*8408SEric.Saxe@Sun.COM 		lpl = &cp->cp_lgrploads[i];
253*8408SEric.Saxe@Sun.COM 
254*8408SEric.Saxe@Sun.COM 		sz = lpl->lpl_rset_sz;
255*8408SEric.Saxe@Sun.COM 		kmem_free(lpl->lpl_rset, sizeof (struct lgrp_ld *) * sz);
256*8408SEric.Saxe@Sun.COM 		kmem_free(lpl->lpl_id2rset, sizeof (int) * sz);
257*8408SEric.Saxe@Sun.COM 		lpl->lpl_rset = NULL;
258*8408SEric.Saxe@Sun.COM 		lpl->lpl_id2rset = NULL;
259*8408SEric.Saxe@Sun.COM 	}
260*8408SEric.Saxe@Sun.COM 	kmem_free(cp->cp_lgrploads, sizeof (lpl_t) * cp->cp_nlgrploads);
261*8408SEric.Saxe@Sun.COM 	cp->cp_lgrploads = NULL;
262*8408SEric.Saxe@Sun.COM }
263*8408SEric.Saxe@Sun.COM 
264*8408SEric.Saxe@Sun.COM /*
2650Sstevel@tonic-gate  * Initialize the default partition and kpreempt disp queue.
2660Sstevel@tonic-gate  */
2670Sstevel@tonic-gate void
2680Sstevel@tonic-gate cpupart_initialize_default(void)
2690Sstevel@tonic-gate {
2700Sstevel@tonic-gate 	lgrp_id_t i;
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 	cp_list_head = &cp_default;
2730Sstevel@tonic-gate 	cp_default.cp_next = &cp_default;
2740Sstevel@tonic-gate 	cp_default.cp_prev = &cp_default;
2750Sstevel@tonic-gate 	cp_default.cp_id = CP_DEFAULT;
2760Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_maxrunpri = -1;
2770Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_max_unbound_pri = -1;
2780Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_cpu = NULL;
2790Sstevel@tonic-gate 	cp_default.cp_gen = 0;
2800Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_cur = 0;
2810Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_len = 0;
2820Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_total = 0;
2830Sstevel@tonic-gate 	for (i = 0; i < S_LOADAVG_SZ; i++) {
2840Sstevel@tonic-gate 		cp_default.cp_loadavg.lg_loads[i] = 0;
2850Sstevel@tonic-gate 	}
2860Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock);
2870Sstevel@tonic-gate 	cp_id_next = CP_DEFAULT + 1;
2880Sstevel@tonic-gate 	cpupart_kstat_create(&cp_default);
2890Sstevel@tonic-gate 	cp_numparts = 1;
2900Sstevel@tonic-gate 	if (cp_max_numparts == 0)	/* allow for /etc/system tuning */
2910Sstevel@tonic-gate 		cp_max_numparts = max_ncpus * 2 + 1;
2920Sstevel@tonic-gate 	/*
2930Sstevel@tonic-gate 	 * Allocate space for cp_default list of lgrploads
2940Sstevel@tonic-gate 	 */
295*8408SEric.Saxe@Sun.COM 	cpupart_lpl_initialize(&cp_default);
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	/*
2980Sstevel@tonic-gate 	 * The initial lpl topology is created in a special lpl list
2990Sstevel@tonic-gate 	 * lpl_bootstrap. It should be copied to cp_default.
3000Sstevel@tonic-gate 	 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
3010Sstevel@tonic-gate 	 *	 to the correct lpl in the cp_default.cp_lgrploads list.
3020Sstevel@tonic-gate 	 */
3030Sstevel@tonic-gate 	lpl_topo_bootstrap(cp_default.cp_lgrploads,
3040Sstevel@tonic-gate 	    cp_default.cp_nlgrploads);
3050Sstevel@tonic-gate 
306*8408SEric.Saxe@Sun.COM 
3070Sstevel@tonic-gate 	cp_default.cp_attr = PSET_NOESCAPE;
3080Sstevel@tonic-gate 	cp_numparts_nonempty = 1;
3090Sstevel@tonic-gate 	/*
3100Sstevel@tonic-gate 	 * Set t0's home
3110Sstevel@tonic-gate 	 */
3120Sstevel@tonic-gate 	t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
3133434Sesaxe 
3143434Sesaxe 	bitset_init(&cp_default.cp_cmt_pgs);
315*8408SEric.Saxe@Sun.COM 	bitset_init(&cp_default.cp_haltset);
316*8408SEric.Saxe@Sun.COM 	bitset_resize(&cp_default.cp_haltset, max_ncpus);
3170Sstevel@tonic-gate }
3180Sstevel@tonic-gate 
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate static int
3210Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
3220Sstevel@tonic-gate {
3230Sstevel@tonic-gate 	cpupart_t *oldpp;
3240Sstevel@tonic-gate 	cpu_t	*ncp, *newlist;
3250Sstevel@tonic-gate 	kthread_t *t;
3260Sstevel@tonic-gate 	int	move_threads = 1;
3270Sstevel@tonic-gate 	lgrp_id_t lgrpid;
3280Sstevel@tonic-gate 	proc_t 	*p;
3290Sstevel@tonic-gate 	int lgrp_diff_lpl;
3300Sstevel@tonic-gate 	lpl_t	*cpu_lpl;
3310Sstevel@tonic-gate 	int	ret;
3326298Sakolb 	boolean_t unbind_all_threads = (forced != 0);
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
3350Sstevel@tonic-gate 	ASSERT(newpp != NULL);
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate 	oldpp = cp->cpu_part;
3380Sstevel@tonic-gate 	ASSERT(oldpp != NULL);
3390Sstevel@tonic-gate 	ASSERT(oldpp->cp_ncpus > 0);
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate 	if (newpp == oldpp) {
3420Sstevel@tonic-gate 		/*
3430Sstevel@tonic-gate 		 * Don't need to do anything.
3440Sstevel@tonic-gate 		 */
3450Sstevel@tonic-gate 		return (0);
3460Sstevel@tonic-gate 	}
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 	if (!disp_bound_partition(cp, 0)) {
3510Sstevel@tonic-gate 		/*
3520Sstevel@tonic-gate 		 * Don't need to move threads if there are no threads in
3530Sstevel@tonic-gate 		 * the partition.  Note that threads can't enter the
3540Sstevel@tonic-gate 		 * partition while we're holding cpu_lock.
3550Sstevel@tonic-gate 		 */
3560Sstevel@tonic-gate 		move_threads = 0;
3570Sstevel@tonic-gate 	} else if (oldpp->cp_ncpus == 1) {
3586298Sakolb 		/*
3596298Sakolb 		 * The last CPU is removed from a partition which has threads
3606298Sakolb 		 * running in it. Some of these threads may be bound to this
3616298Sakolb 		 * CPU.
3626298Sakolb 		 *
3636298Sakolb 		 * Attempt to unbind threads from the CPU and from the processor
3646298Sakolb 		 * set. Note that no threads should be bound to this CPU since
3656298Sakolb 		 * cpupart_move_threads will refuse to move bound threads to
3666298Sakolb 		 * other CPUs.
3676298Sakolb 		 */
3686298Sakolb 		(void) cpu_unbind(oldpp->cp_cpulist->cpu_id, B_FALSE);
3696298Sakolb 		(void) cpupart_unbind_threads(oldpp, B_FALSE);
3706298Sakolb 
3716298Sakolb 		if (!disp_bound_partition(cp, 0)) {
3726298Sakolb 			/*
3736298Sakolb 			 * No bound threads in this partition any more
3746298Sakolb 			 */
3756298Sakolb 			move_threads = 0;
3766298Sakolb 		} else {
3776298Sakolb 			/*
3786298Sakolb 			 * There are still threads bound to the partition
3796298Sakolb 			 */
3806298Sakolb 			cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3816298Sakolb 			return (EBUSY);
3826298Sakolb 		}
3830Sstevel@tonic-gate 	}
3840Sstevel@tonic-gate 
3856298Sakolb 	/*
3866298Sakolb 	 * If forced flag is set unbind any threads from this CPU.
3876298Sakolb 	 * Otherwise unbind soft-bound threads only.
3886298Sakolb 	 */
3896298Sakolb 	if ((ret = cpu_unbind(cp->cpu_id, unbind_all_threads)) != 0) {
3900Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3910Sstevel@tonic-gate 		return (ret);
3920Sstevel@tonic-gate 	}
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate 	/*
3950Sstevel@tonic-gate 	 * Stop further threads weak binding to this cpu.
3960Sstevel@tonic-gate 	 */
3970Sstevel@tonic-gate 	cpu_inmotion = cp;
3980Sstevel@tonic-gate 	membar_enter();
3990Sstevel@tonic-gate 
4003434Sesaxe 	/*
4013434Sesaxe 	 * Notify the Processor Groups subsystem that the CPU
4023434Sesaxe 	 * will be moving cpu partitions. This is done before
4033434Sesaxe 	 * CPUs are paused to provide an opportunity for any
4043434Sesaxe 	 * needed memory allocations.
4053434Sesaxe 	 */
4063434Sesaxe 	pg_cpupart_out(cp, oldpp);
4073434Sesaxe 	pg_cpupart_in(cp, newpp);
4083434Sesaxe 
4090Sstevel@tonic-gate again:
4100Sstevel@tonic-gate 	if (move_threads) {
4110Sstevel@tonic-gate 		int loop_count;
4120Sstevel@tonic-gate 		/*
4130Sstevel@tonic-gate 		 * Check for threads strong or weak bound to this CPU.
4140Sstevel@tonic-gate 		 */
4150Sstevel@tonic-gate 		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
4160Sstevel@tonic-gate 			if (loop_count >= 5) {
4170Sstevel@tonic-gate 				cpu_state_change_notify(cp->cpu_id,
4180Sstevel@tonic-gate 				    CPU_CPUPART_IN);
4193434Sesaxe 				pg_cpupart_out(cp, newpp);
4203434Sesaxe 				pg_cpupart_in(cp, oldpp);
4210Sstevel@tonic-gate 				cpu_inmotion = NULL;
4220Sstevel@tonic-gate 				return (EBUSY);	/* some threads still bound */
4230Sstevel@tonic-gate 			}
4240Sstevel@tonic-gate 			delay(1);
4250Sstevel@tonic-gate 		}
4260Sstevel@tonic-gate 	}
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate 	/*
4290Sstevel@tonic-gate 	 * Before we actually start changing data structures, notify
4300Sstevel@tonic-gate 	 * the cyclic subsystem that we want to move this CPU out of its
4310Sstevel@tonic-gate 	 * partition.
4320Sstevel@tonic-gate 	 */
4330Sstevel@tonic-gate 	if (!cyclic_move_out(cp)) {
4340Sstevel@tonic-gate 		/*
4350Sstevel@tonic-gate 		 * This CPU must be the last CPU in a processor set with
4360Sstevel@tonic-gate 		 * a bound cyclic.
4370Sstevel@tonic-gate 		 */
4380Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
4393434Sesaxe 		pg_cpupart_out(cp, newpp);
4403434Sesaxe 		pg_cpupart_in(cp, oldpp);
4410Sstevel@tonic-gate 		cpu_inmotion = NULL;
4420Sstevel@tonic-gate 		return (EBUSY);
4430Sstevel@tonic-gate 	}
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate 	pause_cpus(cp);
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate 	if (move_threads) {
4480Sstevel@tonic-gate 		/*
4490Sstevel@tonic-gate 		 * The thread on cpu before the pause thread may have read
4500Sstevel@tonic-gate 		 * cpu_inmotion before we raised the barrier above.  Check
4510Sstevel@tonic-gate 		 * again.
4520Sstevel@tonic-gate 		 */
4530Sstevel@tonic-gate 		if (disp_bound_threads(cp, 1)) {
4540Sstevel@tonic-gate 			start_cpus();
4550Sstevel@tonic-gate 			goto again;
4560Sstevel@tonic-gate 		}
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	}
4590Sstevel@tonic-gate 
4600Sstevel@tonic-gate 	/*
4613434Sesaxe 	 * Now that CPUs are paused, let the PG subsystem perform
4623434Sesaxe 	 * any necessary data structure updates.
4630Sstevel@tonic-gate 	 */
4643434Sesaxe 	pg_cpupart_move(cp, oldpp, newpp);
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 	/* save this cpu's lgroup -- it'll be the same in the new partition */
4670Sstevel@tonic-gate 	lgrpid = cp->cpu_lpl->lpl_lgrpid;
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 	cpu_lpl = cp->cpu_lpl;
4700Sstevel@tonic-gate 	/*
4710Sstevel@tonic-gate 	 * let the lgroup framework know cp has left the partition
4720Sstevel@tonic-gate 	 */
4730Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	/* move out of old partition */
4760Sstevel@tonic-gate 	oldpp->cp_ncpus--;
4770Sstevel@tonic-gate 	if (oldpp->cp_ncpus > 0) {
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
4800Sstevel@tonic-gate 		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
4810Sstevel@tonic-gate 		if (oldpp->cp_cpulist == cp) {
4820Sstevel@tonic-gate 			oldpp->cp_cpulist = ncp;
4830Sstevel@tonic-gate 		}
4840Sstevel@tonic-gate 	} else {
4850Sstevel@tonic-gate 		ncp = oldpp->cp_cpulist = NULL;
4860Sstevel@tonic-gate 		cp_numparts_nonempty--;
4870Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4880Sstevel@tonic-gate 	}
4890Sstevel@tonic-gate 	oldpp->cp_gen++;
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 	/* move into new partition */
4920Sstevel@tonic-gate 	newlist = newpp->cp_cpulist;
4930Sstevel@tonic-gate 	if (newlist == NULL) {
4940Sstevel@tonic-gate 		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
4950Sstevel@tonic-gate 		cp_numparts_nonempty++;
4960Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4970Sstevel@tonic-gate 	} else {
4980Sstevel@tonic-gate 		cp->cpu_next_part = newlist;
4990Sstevel@tonic-gate 		cp->cpu_prev_part = newlist->cpu_prev_part;
5000Sstevel@tonic-gate 		newlist->cpu_prev_part->cpu_next_part = cp;
5010Sstevel@tonic-gate 		newlist->cpu_prev_part = cp;
5020Sstevel@tonic-gate 	}
5030Sstevel@tonic-gate 	cp->cpu_part = newpp;
5040Sstevel@tonic-gate 	newpp->cp_ncpus++;
5050Sstevel@tonic-gate 	newpp->cp_gen++;
5060Sstevel@tonic-gate 
507*8408SEric.Saxe@Sun.COM 	ASSERT(bitset_is_null(&newpp->cp_haltset));
508*8408SEric.Saxe@Sun.COM 	ASSERT(bitset_is_null(&oldpp->cp_haltset));
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate 	/*
5110Sstevel@tonic-gate 	 * let the lgroup framework know cp has entered the partition
5120Sstevel@tonic-gate 	 */
5130Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 	/*
5160Sstevel@tonic-gate 	 * If necessary, move threads off processor.
5170Sstevel@tonic-gate 	 */
5180Sstevel@tonic-gate 	if (move_threads) {
5190Sstevel@tonic-gate 		ASSERT(ncp != NULL);
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate 		/*
5220Sstevel@tonic-gate 		 * Walk thru the active process list to look for
5230Sstevel@tonic-gate 		 * threads that need to have a new home lgroup,
5240Sstevel@tonic-gate 		 * or the last CPU they run on is the same CPU
5250Sstevel@tonic-gate 		 * being moved out of the partition.
5260Sstevel@tonic-gate 		 */
5270Sstevel@tonic-gate 
5280Sstevel@tonic-gate 		for (p = practive; p != NULL; p = p->p_next) {
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 			t = p->p_tlist;
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 			if (t == NULL)
5330Sstevel@tonic-gate 				continue;
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate 			lgrp_diff_lpl = 0;
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 			do {
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 				ASSERT(t->t_lpl != NULL);
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 				/*
5420Sstevel@tonic-gate 				 * Update the count of how many threads are
5430Sstevel@tonic-gate 				 * in this CPU's lgroup but have a different lpl
5440Sstevel@tonic-gate 				 */
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 				if (t->t_lpl != cpu_lpl &&
5470Sstevel@tonic-gate 				    t->t_lpl->lpl_lgrpid == lgrpid)
5480Sstevel@tonic-gate 					lgrp_diff_lpl++;
5490Sstevel@tonic-gate 				/*
5500Sstevel@tonic-gate 				 * If the lgroup that t is assigned to no
5510Sstevel@tonic-gate 				 * longer has any CPUs in t's partition,
5520Sstevel@tonic-gate 				 * we'll have to choose a new lgroup for t.
5530Sstevel@tonic-gate 				 */
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate 				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
5560Sstevel@tonic-gate 				    t->t_cpupart)) {
5570Sstevel@tonic-gate 					lgrp_move_thread(t,
5580Sstevel@tonic-gate 					    lgrp_choose(t, t->t_cpupart), 0);
5590Sstevel@tonic-gate 				}
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate 				/*
5620Sstevel@tonic-gate 				 * make sure lpl points to our own partition
5630Sstevel@tonic-gate 				 */
5640Sstevel@tonic-gate 				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
5650Sstevel@tonic-gate 				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
566*8408SEric.Saxe@Sun.COM 				    t->t_cpupart->cp_nlgrploads));
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate 				ASSERT(t->t_lpl->lpl_ncpu > 0);
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate 				/* Update CPU last ran on if it was this CPU */
5710Sstevel@tonic-gate 				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
5720Sstevel@tonic-gate 				    t->t_bound_cpu != cp) {
5730Sstevel@tonic-gate 					t->t_cpu = disp_lowpri_cpu(ncp,
5740Sstevel@tonic-gate 					    t->t_lpl, t->t_pri, NULL);
5750Sstevel@tonic-gate 				}
5760Sstevel@tonic-gate 				t = t->t_forw;
5770Sstevel@tonic-gate 			} while (t != p->p_tlist);
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate 			/*
5800Sstevel@tonic-gate 			 * Didn't find any threads in the same lgroup as this
5810Sstevel@tonic-gate 			 * CPU with a different lpl, so remove the lgroup from
5820Sstevel@tonic-gate 			 * the process lgroup bitmask.
5830Sstevel@tonic-gate 			 */
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 			if (lgrp_diff_lpl)
5860Sstevel@tonic-gate 				klgrpset_del(p->p_lgrpset, lgrpid);
5870Sstevel@tonic-gate 		}
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate 		/*
5900Sstevel@tonic-gate 		 * Walk thread list looking for threads that need to be
5910Sstevel@tonic-gate 		 * rehomed, since there are some threads that are not in
5920Sstevel@tonic-gate 		 * their process's p_tlist.
5930Sstevel@tonic-gate 		 */
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 		t = curthread;
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate 		do {
5980Sstevel@tonic-gate 			ASSERT(t != NULL && t->t_lpl != NULL);
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate 			/*
6010Sstevel@tonic-gate 			 * If the lgroup that t is assigned to no
6020Sstevel@tonic-gate 			 * longer has any CPUs in t's partition,
6030Sstevel@tonic-gate 			 * we'll have to choose a new lgroup for t.
6040Sstevel@tonic-gate 			 * Also, choose best lgroup for home when
6050Sstevel@tonic-gate 			 * thread has specified lgroup affinities,
6060Sstevel@tonic-gate 			 * since there may be an lgroup with more
6070Sstevel@tonic-gate 			 * affinity available after moving CPUs
6080Sstevel@tonic-gate 			 * around.
6090Sstevel@tonic-gate 			 */
6100Sstevel@tonic-gate 			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
6110Sstevel@tonic-gate 			    t->t_cpupart) || t->t_lgrp_affinity) {
6120Sstevel@tonic-gate 				lgrp_move_thread(t,
6130Sstevel@tonic-gate 				    lgrp_choose(t, t->t_cpupart), 1);
6140Sstevel@tonic-gate 			}
6150Sstevel@tonic-gate 
6160Sstevel@tonic-gate 			/* make sure lpl points to our own partition */
6170Sstevel@tonic-gate 			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
6180Sstevel@tonic-gate 			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
619*8408SEric.Saxe@Sun.COM 			    t->t_cpupart->cp_nlgrploads));
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 			ASSERT(t->t_lpl->lpl_ncpu > 0);
6220Sstevel@tonic-gate 
6230Sstevel@tonic-gate 			/* Update CPU last ran on if it was this CPU */
6240Sstevel@tonic-gate 			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
6250Sstevel@tonic-gate 			    t->t_bound_cpu != cp) {
6260Sstevel@tonic-gate 				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
6270Sstevel@tonic-gate 				    t->t_pri, NULL);
6280Sstevel@tonic-gate 			}
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate 			t = t->t_next;
6310Sstevel@tonic-gate 		} while (t != curthread);
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate 		/*
6340Sstevel@tonic-gate 		 * Clear off the CPU's run queue, and the kp queue if the
6350Sstevel@tonic-gate 		 * partition is now empty.
6360Sstevel@tonic-gate 		 */
6370Sstevel@tonic-gate 		disp_cpu_inactive(cp);
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate 		/*
6400Sstevel@tonic-gate 		 * Make cp switch to a thread from the new partition.
6410Sstevel@tonic-gate 		 */
6420Sstevel@tonic-gate 		cp->cpu_runrun = 1;
6430Sstevel@tonic-gate 		cp->cpu_kprunrun = 1;
6440Sstevel@tonic-gate 	}
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	cpu_inmotion = NULL;
6470Sstevel@tonic-gate 	start_cpus();
6480Sstevel@tonic-gate 
6490Sstevel@tonic-gate 	/*
6500Sstevel@tonic-gate 	 * Let anyone interested know that cpu has been added to the set.
6510Sstevel@tonic-gate 	 */
6520Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
6530Sstevel@tonic-gate 
6540Sstevel@tonic-gate 	/*
6550Sstevel@tonic-gate 	 * Now let the cyclic subsystem know that it can reshuffle cyclics
6560Sstevel@tonic-gate 	 * bound to the new processor set.
6570Sstevel@tonic-gate 	 */
6580Sstevel@tonic-gate 	cyclic_move_in(cp);
6590Sstevel@tonic-gate 
6600Sstevel@tonic-gate 	return (0);
6610Sstevel@tonic-gate }
6620Sstevel@tonic-gate 
6630Sstevel@tonic-gate /*
6640Sstevel@tonic-gate  * Check if thread can be moved to a new cpu partition.  Called by
6650Sstevel@tonic-gate  * cpupart_move_thread() and pset_bind_start().
6660Sstevel@tonic-gate  */
6670Sstevel@tonic-gate int
6680Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore)
6690Sstevel@tonic-gate {
6700Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
6710Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
6720Sstevel@tonic-gate 	ASSERT(cp != NULL);
6730Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
6740Sstevel@tonic-gate 
6750Sstevel@tonic-gate 	/*
6760Sstevel@tonic-gate 	 * CPU-bound threads can't be moved.
6770Sstevel@tonic-gate 	 */
6780Sstevel@tonic-gate 	if (!ignore) {
6790Sstevel@tonic-gate 		cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu :
6800Sstevel@tonic-gate 		    tp->t_weakbound_cpu;
6810Sstevel@tonic-gate 		if (boundcpu != NULL && boundcpu->cpu_part != cp)
6820Sstevel@tonic-gate 			return (EBUSY);
6830Sstevel@tonic-gate 	}
6840Sstevel@tonic-gate 	return (0);
6850Sstevel@tonic-gate }
6860Sstevel@tonic-gate 
6870Sstevel@tonic-gate /*
6880Sstevel@tonic-gate  * Move thread to new partition.  If ignore is non-zero, then CPU
6890Sstevel@tonic-gate  * bindings should be ignored (this is used when destroying a
6900Sstevel@tonic-gate  * partition).
6910Sstevel@tonic-gate  */
6920Sstevel@tonic-gate static int
6930Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore,
6940Sstevel@tonic-gate     void *projbuf, void *zonebuf)
6950Sstevel@tonic-gate {
6960Sstevel@tonic-gate 	cpupart_t *oldpp = tp->t_cpupart;
6970Sstevel@tonic-gate 	int ret;
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7000Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7010Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
7020Sstevel@tonic-gate 	ASSERT(newpp != NULL);
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate 	if (newpp->cp_cpulist == NULL)
7050Sstevel@tonic-gate 		return (EINVAL);
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate 	/*
7080Sstevel@tonic-gate 	 * Check for errors first.
7090Sstevel@tonic-gate 	 */
7100Sstevel@tonic-gate 	thread_lock(tp);
7110Sstevel@tonic-gate 	if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) {
7120Sstevel@tonic-gate 		thread_unlock(tp);
7130Sstevel@tonic-gate 		return (ret);
7140Sstevel@tonic-gate 	}
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	/* move the thread */
7170Sstevel@tonic-gate 	if (oldpp != newpp) {
7180Sstevel@tonic-gate 		/*
7190Sstevel@tonic-gate 		 * Make the thread switch to the new partition.
7200Sstevel@tonic-gate 		 */
7210Sstevel@tonic-gate 		tp->t_cpupart = newpp;
7220Sstevel@tonic-gate 		ASSERT(tp->t_lpl != NULL);
7230Sstevel@tonic-gate 		/*
7240Sstevel@tonic-gate 		 * Leave the thread on the same lgroup if possible; otherwise
7250Sstevel@tonic-gate 		 * choose a new lgroup for it.  In either case, update its
7260Sstevel@tonic-gate 		 * t_lpl.
7270Sstevel@tonic-gate 		 */
7280Sstevel@tonic-gate 		if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) &&
7290Sstevel@tonic-gate 		    tp->t_lgrp_affinity == NULL) {
7300Sstevel@tonic-gate 			/*
7310Sstevel@tonic-gate 			 * The thread's lgroup has CPUs in the thread's new
7320Sstevel@tonic-gate 			 * partition, so the thread can stay assigned to the
7330Sstevel@tonic-gate 			 * same lgroup.  Update its t_lpl to point to the
7340Sstevel@tonic-gate 			 * lpl_t for its lgroup in its new partition.
7350Sstevel@tonic-gate 			 */
7360Sstevel@tonic-gate 			lgrp_move_thread(tp, &tp->t_cpupart->\
7370Sstevel@tonic-gate 			    cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1);
7380Sstevel@tonic-gate 		} else {
7390Sstevel@tonic-gate 			/*
7400Sstevel@tonic-gate 			 * The thread's lgroup has no cpus in its new
7410Sstevel@tonic-gate 			 * partition or it has specified lgroup affinities,
7420Sstevel@tonic-gate 			 * so choose the best lgroup for the thread and
7430Sstevel@tonic-gate 			 * assign it to that lgroup.
7440Sstevel@tonic-gate 			 */
7450Sstevel@tonic-gate 			lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart),
7460Sstevel@tonic-gate 			    1);
7470Sstevel@tonic-gate 		}
7480Sstevel@tonic-gate 		/*
7490Sstevel@tonic-gate 		 * make sure lpl points to our own partition
7500Sstevel@tonic-gate 		 */
7510Sstevel@tonic-gate 		ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) &&
7520Sstevel@tonic-gate 		    (tp->t_lpl < tp->t_cpupart->cp_lgrploads +
753*8408SEric.Saxe@Sun.COM 		    tp->t_cpupart->cp_nlgrploads));
7540Sstevel@tonic-gate 
7550Sstevel@tonic-gate 		ASSERT(tp->t_lpl->lpl_ncpu > 0);
7560Sstevel@tonic-gate 
7570Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC) {
7580Sstevel@tonic-gate 			cpu_surrender(tp);
7590Sstevel@tonic-gate 		} else if (tp->t_state == TS_RUN) {
7600Sstevel@tonic-gate 			(void) dispdeq(tp);
7610Sstevel@tonic-gate 			setbackdq(tp);
7620Sstevel@tonic-gate 		}
7630Sstevel@tonic-gate 	}
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate 	/*
7660Sstevel@tonic-gate 	 * Our binding has changed; set TP_CHANGEBIND.
7670Sstevel@tonic-gate 	 */
7680Sstevel@tonic-gate 	tp->t_proc_flag |= TP_CHANGEBIND;
7690Sstevel@tonic-gate 	aston(tp);
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 	thread_unlock(tp);
7720Sstevel@tonic-gate 	fss_changepset(tp, newpp, projbuf, zonebuf);
7730Sstevel@tonic-gate 
7740Sstevel@tonic-gate 	return (0);		/* success */
7750Sstevel@tonic-gate }
7760Sstevel@tonic-gate 
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate /*
7790Sstevel@tonic-gate  * This function binds a thread to a partition.  Must be called with the
7800Sstevel@tonic-gate  * p_lock of the containing process held (to keep the thread from going
7810Sstevel@tonic-gate  * away), and thus also with cpu_lock held (since cpu_lock must be
7820Sstevel@tonic-gate  * acquired before p_lock).  If ignore is non-zero, then CPU bindings
7830Sstevel@tonic-gate  * should be ignored (this is used when destroying a partition).
7840Sstevel@tonic-gate  */
7850Sstevel@tonic-gate int
7860Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf,
7870Sstevel@tonic-gate     void *zonebuf)
7880Sstevel@tonic-gate {
7890Sstevel@tonic-gate 	cpupart_t	*newpp;
7900Sstevel@tonic-gate 
7910Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7920Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7930Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7940Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate 	if (psid == PS_NONE)
7970Sstevel@tonic-gate 		newpp = &cp_default;
7980Sstevel@tonic-gate 	else {
7990Sstevel@tonic-gate 		newpp = cpupart_find(psid);
8000Sstevel@tonic-gate 		if (newpp == NULL) {
8010Sstevel@tonic-gate 			return (EINVAL);
8020Sstevel@tonic-gate 		}
8030Sstevel@tonic-gate 	}
8040Sstevel@tonic-gate 	return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf));
8050Sstevel@tonic-gate }
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate /*
8090Sstevel@tonic-gate  * Create a new partition.  On MP systems, this also allocates a
8100Sstevel@tonic-gate  * kpreempt disp queue for that partition.
8110Sstevel@tonic-gate  */
8120Sstevel@tonic-gate int
8130Sstevel@tonic-gate cpupart_create(psetid_t *psid)
8140Sstevel@tonic-gate {
8150Sstevel@tonic-gate 	cpupart_t	*pp;
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate 	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
8200Sstevel@tonic-gate 	pp->cp_nlgrploads = lgrp_plat_max_lgrps();
8210Sstevel@tonic-gate 	pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads,
8220Sstevel@tonic-gate 	    KM_SLEEP);
8230Sstevel@tonic-gate 
8240Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
8250Sstevel@tonic-gate 	if (cp_numparts == cp_max_numparts) {
8260Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
8270Sstevel@tonic-gate 		kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
8280Sstevel@tonic-gate 		pp->cp_lgrploads = NULL;
8290Sstevel@tonic-gate 		kmem_free(pp, sizeof (cpupart_t));
8300Sstevel@tonic-gate 		return (ENOMEM);
8310Sstevel@tonic-gate 	}
8320Sstevel@tonic-gate 	cp_numparts++;
8330Sstevel@tonic-gate 	/* find the next free partition ID */
8340Sstevel@tonic-gate 	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
8350Sstevel@tonic-gate 		cp_id_next++;
8360Sstevel@tonic-gate 	pp->cp_id = cp_id_next++;
8370Sstevel@tonic-gate 	pp->cp_ncpus = 0;
8380Sstevel@tonic-gate 	pp->cp_cpulist = NULL;
8390Sstevel@tonic-gate 	pp->cp_attr = 0;
8400Sstevel@tonic-gate 	klgrpset_clear(pp->cp_lgrpset);
8410Sstevel@tonic-gate 	pp->cp_kp_queue.disp_maxrunpri = -1;
8420Sstevel@tonic-gate 	pp->cp_kp_queue.disp_max_unbound_pri = -1;
8430Sstevel@tonic-gate 	pp->cp_kp_queue.disp_cpu = NULL;
8440Sstevel@tonic-gate 	pp->cp_gen = 0;
8450Sstevel@tonic-gate 	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
8460Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
8470Sstevel@tonic-gate 	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
8480Sstevel@tonic-gate 	cpupart_kstat_create(pp);
849*8408SEric.Saxe@Sun.COM 	cpupart_lpl_initialize(pp);
850*8408SEric.Saxe@Sun.COM 
8513434Sesaxe 	bitset_init(&pp->cp_cmt_pgs);
8520Sstevel@tonic-gate 
8530Sstevel@tonic-gate 	/*
854*8408SEric.Saxe@Sun.COM 	 * Initialize and size the partition's bitset of halted CPUs
855*8408SEric.Saxe@Sun.COM 	 */
856*8408SEric.Saxe@Sun.COM 	bitset_init(&pp->cp_haltset);
857*8408SEric.Saxe@Sun.COM 	bitset_resize(&pp->cp_haltset, max_ncpus);
858*8408SEric.Saxe@Sun.COM 
859*8408SEric.Saxe@Sun.COM 	/*
8600Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
8610Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
8620Sstevel@tonic-gate 	 * cpu_lock) isn't running.
8630Sstevel@tonic-gate 	 */
8640Sstevel@tonic-gate 	pause_cpus(NULL);
8650Sstevel@tonic-gate 	pp->cp_next = cp_list_head;
8660Sstevel@tonic-gate 	pp->cp_prev = cp_list_head->cp_prev;
8670Sstevel@tonic-gate 	cp_list_head->cp_prev->cp_next = pp;
8680Sstevel@tonic-gate 	cp_list_head->cp_prev = pp;
8690Sstevel@tonic-gate 	start_cpus();
8700Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate 	return (0);
8730Sstevel@tonic-gate }
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate /*
8766298Sakolb  * Move threads from specified partition to cp_default. If `force' is specified,
8776298Sakolb  * move all threads, otherwise move only soft-bound threads.
8780Sstevel@tonic-gate  */
8796298Sakolb static int
8806298Sakolb cpupart_unbind_threads(cpupart_t *pp, boolean_t unbind_all)
8810Sstevel@tonic-gate {
8820Sstevel@tonic-gate 	void 	*projbuf, *zonebuf;
8830Sstevel@tonic-gate 	kthread_t *t;
8840Sstevel@tonic-gate 	proc_t	*p;
8856298Sakolb 	int	err = 0;
8866298Sakolb 	psetid_t psid = pp->cp_id;
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8896298Sakolb 	ASSERT(MUTEX_HELD(&cpu_lock));
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate 	if (pp == NULL || pp == &cp_default) {
8920Sstevel@tonic-gate 		return (EINVAL);
8930Sstevel@tonic-gate 	}
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	/*
8960Sstevel@tonic-gate 	 * Pre-allocate enough buffers for FSS for all active projects and
8970Sstevel@tonic-gate 	 * for all active zones on the system.  Unused buffers will be
8980Sstevel@tonic-gate 	 * freed later by fss_freebuf().
8990Sstevel@tonic-gate 	 */
9000Sstevel@tonic-gate 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
9010Sstevel@tonic-gate 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
9020Sstevel@tonic-gate 
9030Sstevel@tonic-gate 	mutex_enter(&pidlock);
9040Sstevel@tonic-gate 	t = curthread;
9050Sstevel@tonic-gate 	do {
9060Sstevel@tonic-gate 		if (t->t_bind_pset == psid) {
9070Sstevel@tonic-gate again:			p = ttoproc(t);
9080Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
9090Sstevel@tonic-gate 			if (ttoproc(t) != p) {
9100Sstevel@tonic-gate 				/*
9110Sstevel@tonic-gate 				 * lwp_exit has changed this thread's process
9120Sstevel@tonic-gate 				 * pointer before we grabbed its p_lock.
9130Sstevel@tonic-gate 				 */
9140Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
9150Sstevel@tonic-gate 				goto again;
9160Sstevel@tonic-gate 			}
9176298Sakolb 
9186298Sakolb 			/*
9196298Sakolb 			 * Can only unbind threads which have revocable binding
9206298Sakolb 			 * unless force unbinding requested.
9216298Sakolb 			 */
9226298Sakolb 			if (unbind_all || TB_PSET_IS_SOFT(t)) {
9236298Sakolb 				err = cpupart_bind_thread(t, PS_NONE, 1,
9246298Sakolb 				    projbuf, zonebuf);
9256298Sakolb 				if (err) {
9266298Sakolb 					mutex_exit(&p->p_lock);
9276298Sakolb 					mutex_exit(&pidlock);
9286298Sakolb 					fss_freebuf(projbuf, FSS_ALLOC_PROJ);
9296298Sakolb 					fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
9306298Sakolb 					return (err);
9316298Sakolb 				}
9326298Sakolb 				t->t_bind_pset = PS_NONE;
9330Sstevel@tonic-gate 			}
9340Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
9350Sstevel@tonic-gate 		}
9360Sstevel@tonic-gate 		t = t->t_next;
9370Sstevel@tonic-gate 	} while (t != curthread);
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 	mutex_exit(&pidlock);
9400Sstevel@tonic-gate 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
9410Sstevel@tonic-gate 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
9426298Sakolb 	return (err);
9436298Sakolb }
9446298Sakolb 
9456298Sakolb /*
9466298Sakolb  * Destroy a partition.
9476298Sakolb  */
9486298Sakolb int
9496298Sakolb cpupart_destroy(psetid_t psid)
9506298Sakolb {
9516298Sakolb 	cpu_t	*cp, *first_cp;
9526298Sakolb 	cpupart_t *pp, *newpp;
9536298Sakolb 	int	err = 0;
9546298Sakolb 
9556298Sakolb 	ASSERT(pool_lock_held());
9566298Sakolb 	mutex_enter(&cpu_lock);
9576298Sakolb 
9586298Sakolb 	pp = cpupart_find(psid);
9596298Sakolb 	if (pp == NULL || pp == &cp_default) {
9606298Sakolb 		mutex_exit(&cpu_lock);
9616298Sakolb 		return (EINVAL);
9626298Sakolb 	}
9636298Sakolb 
9646298Sakolb 	/*
9656298Sakolb 	 * Unbind all the threads currently bound to the partition.
9666298Sakolb 	 */
9676298Sakolb 	err = cpupart_unbind_threads(pp, B_TRUE);
9686298Sakolb 	if (err) {
9696298Sakolb 		mutex_exit(&cpu_lock);
9706298Sakolb 		return (err);
9716298Sakolb 	}
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate 	newpp = &cp_default;
9740Sstevel@tonic-gate 	while ((cp = pp->cp_cpulist) != NULL) {
9750Sstevel@tonic-gate 		if (err = cpupart_move_cpu(cp, newpp, 0)) {
9760Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
9770Sstevel@tonic-gate 			return (err);
9780Sstevel@tonic-gate 		}
9790Sstevel@tonic-gate 	}
9800Sstevel@tonic-gate 
9813434Sesaxe 	ASSERT(bitset_is_null(&pp->cp_cmt_pgs));
982*8408SEric.Saxe@Sun.COM 	ASSERT(bitset_is_null(&pp->cp_haltset));
9830Sstevel@tonic-gate 
9840Sstevel@tonic-gate 	/*
985*8408SEric.Saxe@Sun.COM 	 * Teardown the partition's group of active CMT PGs and halted
986*8408SEric.Saxe@Sun.COM 	 * CPUs now that they have all left.
9873434Sesaxe 	 */
9883434Sesaxe 	bitset_fini(&pp->cp_cmt_pgs);
989*8408SEric.Saxe@Sun.COM 	bitset_fini(&pp->cp_haltset);
9903434Sesaxe 
9913434Sesaxe 	/*
9920Sstevel@tonic-gate 	 * Reset the pointers in any offline processors so they won't
9930Sstevel@tonic-gate 	 * try to rejoin the destroyed partition when they're turned
9940Sstevel@tonic-gate 	 * online.
9950Sstevel@tonic-gate 	 */
9960Sstevel@tonic-gate 	first_cp = cp = CPU;
9970Sstevel@tonic-gate 	do {
9980Sstevel@tonic-gate 		if (cp->cpu_part == pp) {
9990Sstevel@tonic-gate 			ASSERT(cp->cpu_flags & CPU_OFFLINE);
10000Sstevel@tonic-gate 			cp->cpu_part = newpp;
10010Sstevel@tonic-gate 		}
10020Sstevel@tonic-gate 		cp = cp->cpu_next;
10030Sstevel@tonic-gate 	} while (cp != first_cp);
10040Sstevel@tonic-gate 
10050Sstevel@tonic-gate 	/*
10060Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
10070Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
10080Sstevel@tonic-gate 	 * cpu_lock) isn't running.
10090Sstevel@tonic-gate 	 */
10100Sstevel@tonic-gate 	pause_cpus(NULL);
10110Sstevel@tonic-gate 	pp->cp_prev->cp_next = pp->cp_next;
10120Sstevel@tonic-gate 	pp->cp_next->cp_prev = pp->cp_prev;
10130Sstevel@tonic-gate 	if (cp_list_head == pp)
10140Sstevel@tonic-gate 		cp_list_head = pp->cp_next;
10150Sstevel@tonic-gate 	start_cpus();
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 	if (cp_id_next > pp->cp_id)
10180Sstevel@tonic-gate 		cp_id_next = pp->cp_id;
10190Sstevel@tonic-gate 
10200Sstevel@tonic-gate 	if (pp->cp_kstat)
10210Sstevel@tonic-gate 		kstat_delete(pp->cp_kstat);
10220Sstevel@tonic-gate 
10230Sstevel@tonic-gate 	cp_numparts--;
10240Sstevel@tonic-gate 
10250Sstevel@tonic-gate 	disp_kp_free(&pp->cp_kp_queue);
1026*8408SEric.Saxe@Sun.COM 
1027*8408SEric.Saxe@Sun.COM 	cpupart_lpl_teardown(pp);
1028*8408SEric.Saxe@Sun.COM 
10290Sstevel@tonic-gate 	kmem_free(pp, sizeof (cpupart_t));
10300Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
10310Sstevel@tonic-gate 
10320Sstevel@tonic-gate 	return (err);
10330Sstevel@tonic-gate }
10340Sstevel@tonic-gate 
10350Sstevel@tonic-gate 
10360Sstevel@tonic-gate /*
10370Sstevel@tonic-gate  * Return the ID of the partition to which the specified processor belongs.
10380Sstevel@tonic-gate  */
10390Sstevel@tonic-gate psetid_t
10400Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp)
10410Sstevel@tonic-gate {
10420Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10430Sstevel@tonic-gate 
10440Sstevel@tonic-gate 	return (CPTOPS(cp->cpu_part->cp_id));
10450Sstevel@tonic-gate }
10460Sstevel@tonic-gate 
10470Sstevel@tonic-gate 
10480Sstevel@tonic-gate /*
10490Sstevel@tonic-gate  * Attach a processor to an existing partition.
10500Sstevel@tonic-gate  */
10510Sstevel@tonic-gate int
10520Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced)
10530Sstevel@tonic-gate {
10540Sstevel@tonic-gate 	cpupart_t	*pp;
10550Sstevel@tonic-gate 	int		err;
10560Sstevel@tonic-gate 
10570Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10580Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 	pp = cpupart_find(psid);
10610Sstevel@tonic-gate 	if (pp == NULL)
10620Sstevel@tonic-gate 		return (EINVAL);
10630Sstevel@tonic-gate 	if (cp->cpu_flags & CPU_OFFLINE)
10640Sstevel@tonic-gate 		return (EINVAL);
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 	err = cpupart_move_cpu(cp, pp, forced);
10670Sstevel@tonic-gate 	return (err);
10680Sstevel@tonic-gate }
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate /*
10710Sstevel@tonic-gate  * Get a list of cpus belonging to the partition.  If numcpus is NULL,
10720Sstevel@tonic-gate  * this just checks for a valid partition.  If numcpus is non-NULL but
10730Sstevel@tonic-gate  * cpulist is NULL, the current number of cpus is stored in *numcpus.
10740Sstevel@tonic-gate  * If both are non-NULL, the current number of cpus is stored in *numcpus,
10750Sstevel@tonic-gate  * and a list of those cpus up to the size originally in *numcpus is
10760Sstevel@tonic-gate  * stored in cpulist[].  Also, store the processor set id in *psid.
10770Sstevel@tonic-gate  * This is useful in case the processor set id passed in was PS_MYID.
10780Sstevel@tonic-gate  */
10790Sstevel@tonic-gate int
10800Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus)
10810Sstevel@tonic-gate {
10820Sstevel@tonic-gate 	cpupart_t	*pp;
10830Sstevel@tonic-gate 	uint_t		ncpus;
10840Sstevel@tonic-gate 	cpu_t		*c;
10850Sstevel@tonic-gate 	int		i;
10860Sstevel@tonic-gate 
10870Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
10880Sstevel@tonic-gate 	pp = cpupart_find(*psid);
10890Sstevel@tonic-gate 	if (pp == NULL) {
10900Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
10910Sstevel@tonic-gate 		return (EINVAL);
10920Sstevel@tonic-gate 	}
10930Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
10940Sstevel@tonic-gate 	ncpus = pp->cp_ncpus;
10950Sstevel@tonic-gate 	if (numcpus) {
10960Sstevel@tonic-gate 		if (ncpus > *numcpus) {
10970Sstevel@tonic-gate 			/*
10980Sstevel@tonic-gate 			 * Only copy as many cpus as were passed in, but
10990Sstevel@tonic-gate 			 * pass back the real number.
11000Sstevel@tonic-gate 			 */
11010Sstevel@tonic-gate 			uint_t t = ncpus;
11020Sstevel@tonic-gate 			ncpus = *numcpus;
11030Sstevel@tonic-gate 			*numcpus = t;
11040Sstevel@tonic-gate 		} else
11050Sstevel@tonic-gate 			*numcpus = ncpus;
11060Sstevel@tonic-gate 
11070Sstevel@tonic-gate 		if (cpulist) {
11080Sstevel@tonic-gate 			c = pp->cp_cpulist;
11090Sstevel@tonic-gate 			for (i = 0; i < ncpus; i++) {
11100Sstevel@tonic-gate 				ASSERT(c != NULL);
11110Sstevel@tonic-gate 				cpulist[i] = c->cpu_id;
11120Sstevel@tonic-gate 				c = c->cpu_next_part;
11130Sstevel@tonic-gate 			}
11140Sstevel@tonic-gate 		}
11150Sstevel@tonic-gate 	}
11160Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
11170Sstevel@tonic-gate 	return (0);
11180Sstevel@tonic-gate }
11190Sstevel@tonic-gate 
11200Sstevel@tonic-gate /*
11210Sstevel@tonic-gate  * Reallocate kpreempt queues for each CPU partition.  Called from
11220Sstevel@tonic-gate  * disp_setup when a new scheduling class is loaded that increases the
11230Sstevel@tonic-gate  * number of priorities in the system.
11240Sstevel@tonic-gate  */
11250Sstevel@tonic-gate void
11260Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri)
11270Sstevel@tonic-gate {
11280Sstevel@tonic-gate 	cpupart_t *cpp;
11290Sstevel@tonic-gate 
11300Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11310Sstevel@tonic-gate 	cpp = cp_list_head;
11320Sstevel@tonic-gate 	do {
11330Sstevel@tonic-gate 		disp_kp_alloc(&cpp->cp_kp_queue, npri);
11340Sstevel@tonic-gate 		cpp = cpp->cp_next;
11350Sstevel@tonic-gate 	} while (cpp != cp_list_head);
11360Sstevel@tonic-gate }
11370Sstevel@tonic-gate 
11380Sstevel@tonic-gate int
11390Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem)
11400Sstevel@tonic-gate {
11410Sstevel@tonic-gate 	cpupart_t *cp;
11420Sstevel@tonic-gate 	int i;
11430Sstevel@tonic-gate 
11440Sstevel@tonic-gate 	ASSERT(nelem >= 0);
11450Sstevel@tonic-gate 	ASSERT(nelem <= LOADAVG_NSTATS);
11460Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11470Sstevel@tonic-gate 
11480Sstevel@tonic-gate 	cp = cpupart_find(psid);
11490Sstevel@tonic-gate 	if (cp == NULL)
11500Sstevel@tonic-gate 		return (EINVAL);
11510Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
11520Sstevel@tonic-gate 		buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT);
11530Sstevel@tonic-gate 
11540Sstevel@tonic-gate 	return (0);
11550Sstevel@tonic-gate }
11560Sstevel@tonic-gate 
11570Sstevel@tonic-gate 
11580Sstevel@tonic-gate uint_t
11590Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag)
11600Sstevel@tonic-gate {
11610Sstevel@tonic-gate 	uint_t numpart = 0;
11620Sstevel@tonic-gate 	cpupart_t *cp;
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11650Sstevel@tonic-gate 	ASSERT(flag == CP_ALL || flag == CP_NONEMPTY);
11660Sstevel@tonic-gate 
11670Sstevel@tonic-gate 	if (list != NULL) {
11680Sstevel@tonic-gate 		cp = cp_list_head;
11690Sstevel@tonic-gate 		do {
11700Sstevel@tonic-gate 			if (((flag == CP_ALL) && (cp != &cp_default)) ||
11710Sstevel@tonic-gate 			    ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) {
11720Sstevel@tonic-gate 				if (numpart == nelem)
11730Sstevel@tonic-gate 					break;
11740Sstevel@tonic-gate 				list[numpart++] = CPTOPS(cp->cp_id);
11750Sstevel@tonic-gate 			}
11760Sstevel@tonic-gate 			cp = cp->cp_next;
11770Sstevel@tonic-gate 		} while (cp != cp_list_head);
11780Sstevel@tonic-gate 	}
11790Sstevel@tonic-gate 
11800Sstevel@tonic-gate 	ASSERT(numpart < cp_numparts);
11810Sstevel@tonic-gate 
11820Sstevel@tonic-gate 	if (flag == CP_ALL)
11830Sstevel@tonic-gate 		numpart = cp_numparts - 1; /* leave out default partition */
11840Sstevel@tonic-gate 	else if (flag == CP_NONEMPTY)
11850Sstevel@tonic-gate 		numpart = cp_numparts_nonempty;
11860Sstevel@tonic-gate 
11870Sstevel@tonic-gate 	return (numpart);
11880Sstevel@tonic-gate }
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate int
11910Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr)
11920Sstevel@tonic-gate {
11930Sstevel@tonic-gate 	cpupart_t *cp;
11940Sstevel@tonic-gate 
11950Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11960Sstevel@tonic-gate 
11970Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
11980Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
11990Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12000Sstevel@tonic-gate 		return (EINVAL);
12010Sstevel@tonic-gate 	}
12020Sstevel@tonic-gate 	/*
12030Sstevel@tonic-gate 	 * PSET_NOESCAPE attribute for default cpu partition is always set
12040Sstevel@tonic-gate 	 */
12050Sstevel@tonic-gate 	if (cp == &cp_default && !(attr & PSET_NOESCAPE)) {
12060Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12070Sstevel@tonic-gate 		return (EINVAL);
12080Sstevel@tonic-gate 	}
12090Sstevel@tonic-gate 	cp->cp_attr = attr;
12100Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
12110Sstevel@tonic-gate 	return (0);
12120Sstevel@tonic-gate }
12130Sstevel@tonic-gate 
12140Sstevel@tonic-gate int
12150Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp)
12160Sstevel@tonic-gate {
12170Sstevel@tonic-gate 	cpupart_t *cp;
12180Sstevel@tonic-gate 
12190Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
12200Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
12210Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12220Sstevel@tonic-gate 		return (EINVAL);
12230Sstevel@tonic-gate 	}
12240Sstevel@tonic-gate 	*attrp = cp->cp_attr;
12250Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
12260Sstevel@tonic-gate 	return (0);
12270Sstevel@tonic-gate }
1228