xref: /onnv-gate/usr/src/uts/common/disp/cpupart.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/types.h>
30*0Sstevel@tonic-gate #include <sys/systm.h>
31*0Sstevel@tonic-gate #include <sys/cmn_err.h>
32*0Sstevel@tonic-gate #include <sys/cpuvar.h>
33*0Sstevel@tonic-gate #include <sys/thread.h>
34*0Sstevel@tonic-gate #include <sys/disp.h>
35*0Sstevel@tonic-gate #include <sys/kmem.h>
36*0Sstevel@tonic-gate #include <sys/debug.h>
37*0Sstevel@tonic-gate #include <sys/cpupart.h>
38*0Sstevel@tonic-gate #include <sys/pset.h>
39*0Sstevel@tonic-gate #include <sys/var.h>
40*0Sstevel@tonic-gate #include <sys/cyclic.h>
41*0Sstevel@tonic-gate #include <sys/lgrp.h>
42*0Sstevel@tonic-gate #include <sys/chip.h>
43*0Sstevel@tonic-gate #include <sys/loadavg.h>
44*0Sstevel@tonic-gate #include <sys/class.h>
45*0Sstevel@tonic-gate #include <sys/fss.h>
46*0Sstevel@tonic-gate #include <sys/pool.h>
47*0Sstevel@tonic-gate #include <sys/pool_pset.h>
48*0Sstevel@tonic-gate #include <sys/policy.h>
49*0Sstevel@tonic-gate 
50*0Sstevel@tonic-gate /*
51*0Sstevel@tonic-gate  * Calling pool_lock() protects the pools configuration, which includes
52*0Sstevel@tonic-gate  * CPU partitions.  cpu_lock protects the CPU partition list, and prevents
53*0Sstevel@tonic-gate  * partitions from being created or destroyed while the lock is held.
54*0Sstevel@tonic-gate  * The lock ordering with respect to related locks is:
55*0Sstevel@tonic-gate  *
56*0Sstevel@tonic-gate  *    pool_lock() ---> cpu_lock  --->  pidlock  -->  p_lock
57*0Sstevel@tonic-gate  *
58*0Sstevel@tonic-gate  * Blocking memory allocations may be made while holding "pool_lock"
59*0Sstevel@tonic-gate  * or cpu_lock.
60*0Sstevel@tonic-gate  */
61*0Sstevel@tonic-gate 
62*0Sstevel@tonic-gate /*
63*0Sstevel@tonic-gate  * The cp_default partition is allocated statically, but its lgroup load average
64*0Sstevel@tonic-gate  * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
65*0Sstevel@tonic-gate  * saves some memory since the space allocated reflects the actual number of
66*0Sstevel@tonic-gate  * lgroups supported by the platform. The lgrp facility provides a temporary
67*0Sstevel@tonic-gate  * space to hold lpl information during system bootstrap.
68*0Sstevel@tonic-gate  */
69*0Sstevel@tonic-gate 
70*0Sstevel@tonic-gate cpupart_t		*cp_list_head;
71*0Sstevel@tonic-gate cpupart_t		cp_default;
72*0Sstevel@tonic-gate static cpupartid_t	cp_id_next;
73*0Sstevel@tonic-gate uint_t			cp_numparts;
74*0Sstevel@tonic-gate uint_t			cp_numparts_nonempty;
75*0Sstevel@tonic-gate 
76*0Sstevel@tonic-gate /*
77*0Sstevel@tonic-gate  * Need to limit total number of partitions to avoid slowing down the
78*0Sstevel@tonic-gate  * clock code too much.  The clock code traverses the list of
79*0Sstevel@tonic-gate  * partitions and needs to be able to execute in a reasonable amount
80*0Sstevel@tonic-gate  * of time (less than 1/hz seconds).  The maximum is sized based on
81*0Sstevel@tonic-gate  * max_ncpus so it shouldn't be a problem unless there are large
82*0Sstevel@tonic-gate  * numbers of empty partitions.
83*0Sstevel@tonic-gate  */
84*0Sstevel@tonic-gate static uint_t		cp_max_numparts;
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate /*
87*0Sstevel@tonic-gate  * Processor sets and CPU partitions are different but related concepts.
88*0Sstevel@tonic-gate  * A processor set is a user-level abstraction allowing users to create
89*0Sstevel@tonic-gate  * sets of CPUs and bind threads exclusively to those sets.  A CPU
90*0Sstevel@tonic-gate  * partition is a kernel dispatcher object consisting of a set of CPUs
91*0Sstevel@tonic-gate  * and a global dispatch queue.  The processor set abstraction is
92*0Sstevel@tonic-gate  * implemented via a CPU partition, and currently there is a 1-1
93*0Sstevel@tonic-gate  * mapping between processor sets and partitions (excluding the default
94*0Sstevel@tonic-gate  * partition, which is not visible as a processor set).  Hence, the
95*0Sstevel@tonic-gate  * numbering for processor sets and CPU partitions is identical.  This
96*0Sstevel@tonic-gate  * may not always be true in the future, and these macros could become
97*0Sstevel@tonic-gate  * less trivial if we support e.g. a processor set containing multiple
98*0Sstevel@tonic-gate  * CPU partitions.
99*0Sstevel@tonic-gate  */
100*0Sstevel@tonic-gate #define	PSTOCP(psid)	((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
101*0Sstevel@tonic-gate #define	CPTOPS(cpid)	((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate 
104*0Sstevel@tonic-gate /*
105*0Sstevel@tonic-gate  * Find a CPU partition given a processor set ID.
106*0Sstevel@tonic-gate  */
107*0Sstevel@tonic-gate static cpupart_t *
108*0Sstevel@tonic-gate cpupart_find_all(psetid_t psid)
109*0Sstevel@tonic-gate {
110*0Sstevel@tonic-gate 	cpupart_t *cp;
111*0Sstevel@tonic-gate 	cpupartid_t cpid = PSTOCP(psid);
112*0Sstevel@tonic-gate 
113*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
114*0Sstevel@tonic-gate 
115*0Sstevel@tonic-gate 	/* default partition not visible as a processor set */
116*0Sstevel@tonic-gate 	if (psid == CP_DEFAULT)
117*0Sstevel@tonic-gate 		return (NULL);
118*0Sstevel@tonic-gate 
119*0Sstevel@tonic-gate 	if (psid == PS_MYID)
120*0Sstevel@tonic-gate 		return (curthread->t_cpupart);
121*0Sstevel@tonic-gate 
122*0Sstevel@tonic-gate 	cp = cp_list_head;
123*0Sstevel@tonic-gate 	do {
124*0Sstevel@tonic-gate 		if (cp->cp_id == cpid)
125*0Sstevel@tonic-gate 			return (cp);
126*0Sstevel@tonic-gate 		cp = cp->cp_next;
127*0Sstevel@tonic-gate 	} while (cp != cp_list_head);
128*0Sstevel@tonic-gate 	return (NULL);
129*0Sstevel@tonic-gate }
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate /*
132*0Sstevel@tonic-gate  * Find a CPU partition given a processor set ID if the processor set
133*0Sstevel@tonic-gate  * should be visible from the calling zone.
134*0Sstevel@tonic-gate  */
135*0Sstevel@tonic-gate cpupart_t *
136*0Sstevel@tonic-gate cpupart_find(psetid_t psid)
137*0Sstevel@tonic-gate {
138*0Sstevel@tonic-gate 	cpupart_t *cp;
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
141*0Sstevel@tonic-gate 	cp = cpupart_find_all(psid);
142*0Sstevel@tonic-gate 	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
143*0Sstevel@tonic-gate 	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
144*0Sstevel@tonic-gate 			return (NULL);
145*0Sstevel@tonic-gate 	return (cp);
146*0Sstevel@tonic-gate }
147*0Sstevel@tonic-gate 
148*0Sstevel@tonic-gate static int
149*0Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw)
150*0Sstevel@tonic-gate {
151*0Sstevel@tonic-gate 	cpupart_t *cp = (cpupart_t *)ksp->ks_private;
152*0Sstevel@tonic-gate 	cpupart_kstat_t *cpksp = ksp->ks_data;
153*0Sstevel@tonic-gate 
154*0Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
155*0Sstevel@tonic-gate 		return (EACCES);
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate 	cpksp->cpk_updates.value.ui64 = cp->cp_updates;
158*0Sstevel@tonic-gate 	cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum;
159*0Sstevel@tonic-gate 	cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum;
160*0Sstevel@tonic-gate 	cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus;
161*0Sstevel@tonic-gate 	cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >>
162*0Sstevel@tonic-gate 	    (16 - FSHIFT);
163*0Sstevel@tonic-gate 	cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >>
164*0Sstevel@tonic-gate 	    (16 - FSHIFT);
165*0Sstevel@tonic-gate 	cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >>
166*0Sstevel@tonic-gate 	    (16 - FSHIFT);
167*0Sstevel@tonic-gate 	return (0);
168*0Sstevel@tonic-gate }
169*0Sstevel@tonic-gate 
170*0Sstevel@tonic-gate static void
171*0Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp)
172*0Sstevel@tonic-gate {
173*0Sstevel@tonic-gate 	kstat_t *ksp;
174*0Sstevel@tonic-gate 	zoneid_t zoneid;
175*0Sstevel@tonic-gate 
176*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 	/*
179*0Sstevel@tonic-gate 	 * We have a bit of a chicken-egg problem since this code will
180*0Sstevel@tonic-gate 	 * get called to create the kstats for CP_DEFAULT before the
181*0Sstevel@tonic-gate 	 * pools framework gets initialized.  We circumvent the problem
182*0Sstevel@tonic-gate 	 * by special-casing cp_default.
183*0Sstevel@tonic-gate 	 */
184*0Sstevel@tonic-gate 	if (cp != &cp_default && pool_pset_enabled())
185*0Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
186*0Sstevel@tonic-gate 	else
187*0Sstevel@tonic-gate 		zoneid = ALL_ZONES;
188*0Sstevel@tonic-gate 	ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc",
189*0Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
190*0Sstevel@tonic-gate 	    sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid);
191*0Sstevel@tonic-gate 	if (ksp != NULL) {
192*0Sstevel@tonic-gate 		cpupart_kstat_t *cpksp = ksp->ks_data;
193*0Sstevel@tonic-gate 
194*0Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_updates, "updates",
195*0Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
196*0Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_runnable, "runnable",
197*0Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
198*0Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_waiting, "waiting",
199*0Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
200*0Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_ncpus, "ncpus",
201*0Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
202*0Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min",
203*0Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
204*0Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min",
205*0Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
206*0Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min",
207*0Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate 		ksp->ks_update = cpupart_kstat_update;
210*0Sstevel@tonic-gate 		ksp->ks_private = cp;
211*0Sstevel@tonic-gate 
212*0Sstevel@tonic-gate 		kstat_install(ksp);
213*0Sstevel@tonic-gate 	}
214*0Sstevel@tonic-gate 	cp->cp_kstat = ksp;
215*0Sstevel@tonic-gate }
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate /*
218*0Sstevel@tonic-gate  * Initialize the default partition and kpreempt disp queue.
219*0Sstevel@tonic-gate  */
220*0Sstevel@tonic-gate void
221*0Sstevel@tonic-gate cpupart_initialize_default(void)
222*0Sstevel@tonic-gate {
223*0Sstevel@tonic-gate 	lgrp_id_t i;
224*0Sstevel@tonic-gate 
225*0Sstevel@tonic-gate 	cp_list_head = &cp_default;
226*0Sstevel@tonic-gate 	cp_default.cp_next = &cp_default;
227*0Sstevel@tonic-gate 	cp_default.cp_prev = &cp_default;
228*0Sstevel@tonic-gate 	cp_default.cp_id = CP_DEFAULT;
229*0Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_maxrunpri = -1;
230*0Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_max_unbound_pri = -1;
231*0Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_cpu = NULL;
232*0Sstevel@tonic-gate 	cp_default.cp_gen = 0;
233*0Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_cur = 0;
234*0Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_len = 0;
235*0Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_total = 0;
236*0Sstevel@tonic-gate 	for (i = 0; i < S_LOADAVG_SZ; i++) {
237*0Sstevel@tonic-gate 		cp_default.cp_loadavg.lg_loads[i] = 0;
238*0Sstevel@tonic-gate 	}
239*0Sstevel@tonic-gate 	CPUSET_ZERO(cp_default.cp_haltset);
240*0Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock);
241*0Sstevel@tonic-gate 	cp_id_next = CP_DEFAULT + 1;
242*0Sstevel@tonic-gate 	cpupart_kstat_create(&cp_default);
243*0Sstevel@tonic-gate 	cp_numparts = 1;
244*0Sstevel@tonic-gate 	if (cp_max_numparts == 0)	/* allow for /etc/system tuning */
245*0Sstevel@tonic-gate 		cp_max_numparts = max_ncpus * 2 + 1;
246*0Sstevel@tonic-gate 	/*
247*0Sstevel@tonic-gate 	 * Allocate space for cp_default list of lgrploads
248*0Sstevel@tonic-gate 	 */
249*0Sstevel@tonic-gate 	cp_default.cp_nlgrploads = lgrp_plat_max_lgrps();
250*0Sstevel@tonic-gate 	cp_default.cp_lgrploads = kmem_zalloc(sizeof (lpl_t) *
251*0Sstevel@tonic-gate 	    cp_default.cp_nlgrploads, KM_SLEEP);
252*0Sstevel@tonic-gate 
253*0Sstevel@tonic-gate 	/*
254*0Sstevel@tonic-gate 	 * The initial lpl topology is created in a special lpl list
255*0Sstevel@tonic-gate 	 * lpl_bootstrap. It should be copied to cp_default.
256*0Sstevel@tonic-gate 	 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
257*0Sstevel@tonic-gate 	 *	 to the correct lpl in the cp_default.cp_lgrploads list.
258*0Sstevel@tonic-gate 	 */
259*0Sstevel@tonic-gate 	lpl_topo_bootstrap(cp_default.cp_lgrploads,
260*0Sstevel@tonic-gate 	    cp_default.cp_nlgrploads);
261*0Sstevel@tonic-gate 
262*0Sstevel@tonic-gate 	for (i = 0; i < cp_default.cp_nlgrploads; i++) {
263*0Sstevel@tonic-gate 		cp_default.cp_lgrploads[i].lpl_lgrpid = i;
264*0Sstevel@tonic-gate 	}
265*0Sstevel@tonic-gate 	cp_default.cp_attr = PSET_NOESCAPE;
266*0Sstevel@tonic-gate 	cp_numparts_nonempty = 1;
267*0Sstevel@tonic-gate 	/*
268*0Sstevel@tonic-gate 	 * Set t0's home
269*0Sstevel@tonic-gate 	 */
270*0Sstevel@tonic-gate 	t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
271*0Sstevel@tonic-gate }
272*0Sstevel@tonic-gate 
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate static int
275*0Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
276*0Sstevel@tonic-gate {
277*0Sstevel@tonic-gate 	cpupart_t *oldpp;
278*0Sstevel@tonic-gate 	cpu_t	*ncp, *newlist;
279*0Sstevel@tonic-gate 	kthread_t *t;
280*0Sstevel@tonic-gate 	int	move_threads = 1;
281*0Sstevel@tonic-gate 	lgrp_id_t lgrpid;
282*0Sstevel@tonic-gate 	proc_t 	*p;
283*0Sstevel@tonic-gate 	int lgrp_diff_lpl;
284*0Sstevel@tonic-gate 	lpl_t	*cpu_lpl;
285*0Sstevel@tonic-gate 	int	ret;
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
288*0Sstevel@tonic-gate 	ASSERT(newpp != NULL);
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate 	oldpp = cp->cpu_part;
291*0Sstevel@tonic-gate 	ASSERT(oldpp != NULL);
292*0Sstevel@tonic-gate 	ASSERT(oldpp->cp_ncpus > 0);
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate 	if (newpp == oldpp) {
295*0Sstevel@tonic-gate 		/*
296*0Sstevel@tonic-gate 		 * Don't need to do anything.
297*0Sstevel@tonic-gate 		 */
298*0Sstevel@tonic-gate 		return (0);
299*0Sstevel@tonic-gate 	}
300*0Sstevel@tonic-gate 
301*0Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate 	if (!disp_bound_partition(cp, 0)) {
304*0Sstevel@tonic-gate 		/*
305*0Sstevel@tonic-gate 		 * Don't need to move threads if there are no threads in
306*0Sstevel@tonic-gate 		 * the partition.  Note that threads can't enter the
307*0Sstevel@tonic-gate 		 * partition while we're holding cpu_lock.
308*0Sstevel@tonic-gate 		 */
309*0Sstevel@tonic-gate 		move_threads = 0;
310*0Sstevel@tonic-gate 	} else if (oldpp->cp_ncpus == 1) {
311*0Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
312*0Sstevel@tonic-gate 		return (EBUSY);
313*0Sstevel@tonic-gate 	}
314*0Sstevel@tonic-gate 
315*0Sstevel@tonic-gate 	if (forced && (ret = cpu_unbind(cp->cpu_id)) != 0) {
316*0Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
317*0Sstevel@tonic-gate 		return (ret);
318*0Sstevel@tonic-gate 	}
319*0Sstevel@tonic-gate 
320*0Sstevel@tonic-gate 	/*
321*0Sstevel@tonic-gate 	 * Stop further threads weak binding to this cpu.
322*0Sstevel@tonic-gate 	 */
323*0Sstevel@tonic-gate 	cpu_inmotion = cp;
324*0Sstevel@tonic-gate 	membar_enter();
325*0Sstevel@tonic-gate 
326*0Sstevel@tonic-gate again:
327*0Sstevel@tonic-gate 	if (move_threads) {
328*0Sstevel@tonic-gate 		int loop_count;
329*0Sstevel@tonic-gate 		/*
330*0Sstevel@tonic-gate 		 * Check for threads strong or weak bound to this CPU.
331*0Sstevel@tonic-gate 		 */
332*0Sstevel@tonic-gate 		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
333*0Sstevel@tonic-gate 			if (loop_count >= 5) {
334*0Sstevel@tonic-gate 				cpu_state_change_notify(cp->cpu_id,
335*0Sstevel@tonic-gate 				    CPU_CPUPART_IN);
336*0Sstevel@tonic-gate 				cpu_inmotion = NULL;
337*0Sstevel@tonic-gate 				return (EBUSY);	/* some threads still bound */
338*0Sstevel@tonic-gate 			}
339*0Sstevel@tonic-gate 			delay(1);
340*0Sstevel@tonic-gate 		}
341*0Sstevel@tonic-gate 	}
342*0Sstevel@tonic-gate 
343*0Sstevel@tonic-gate 	/*
344*0Sstevel@tonic-gate 	 * Before we actually start changing data structures, notify
345*0Sstevel@tonic-gate 	 * the cyclic subsystem that we want to move this CPU out of its
346*0Sstevel@tonic-gate 	 * partition.
347*0Sstevel@tonic-gate 	 */
348*0Sstevel@tonic-gate 	if (!cyclic_move_out(cp)) {
349*0Sstevel@tonic-gate 		/*
350*0Sstevel@tonic-gate 		 * This CPU must be the last CPU in a processor set with
351*0Sstevel@tonic-gate 		 * a bound cyclic.
352*0Sstevel@tonic-gate 		 */
353*0Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
354*0Sstevel@tonic-gate 		cpu_inmotion = NULL;
355*0Sstevel@tonic-gate 		return (EBUSY);
356*0Sstevel@tonic-gate 	}
357*0Sstevel@tonic-gate 
358*0Sstevel@tonic-gate 	pause_cpus(cp);
359*0Sstevel@tonic-gate 
360*0Sstevel@tonic-gate 	if (move_threads) {
361*0Sstevel@tonic-gate 		/*
362*0Sstevel@tonic-gate 		 * The thread on cpu before the pause thread may have read
363*0Sstevel@tonic-gate 		 * cpu_inmotion before we raised the barrier above.  Check
364*0Sstevel@tonic-gate 		 * again.
365*0Sstevel@tonic-gate 		 */
366*0Sstevel@tonic-gate 		if (disp_bound_threads(cp, 1)) {
367*0Sstevel@tonic-gate 			start_cpus();
368*0Sstevel@tonic-gate 			goto again;
369*0Sstevel@tonic-gate 		}
370*0Sstevel@tonic-gate 
371*0Sstevel@tonic-gate 	}
372*0Sstevel@tonic-gate 
373*0Sstevel@tonic-gate 	/*
374*0Sstevel@tonic-gate 	 * Update the set of chip's being spanned
375*0Sstevel@tonic-gate 	 */
376*0Sstevel@tonic-gate 	chip_cpu_move_part(cp, oldpp, newpp);
377*0Sstevel@tonic-gate 
378*0Sstevel@tonic-gate 	/* save this cpu's lgroup -- it'll be the same in the new partition */
379*0Sstevel@tonic-gate 	lgrpid = cp->cpu_lpl->lpl_lgrpid;
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate 	cpu_lpl = cp->cpu_lpl;
382*0Sstevel@tonic-gate 	/*
383*0Sstevel@tonic-gate 	 * let the lgroup framework know cp has left the partition
384*0Sstevel@tonic-gate 	 */
385*0Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);
386*0Sstevel@tonic-gate 
387*0Sstevel@tonic-gate 	/* move out of old partition */
388*0Sstevel@tonic-gate 	oldpp->cp_ncpus--;
389*0Sstevel@tonic-gate 	if (oldpp->cp_ncpus > 0) {
390*0Sstevel@tonic-gate 
391*0Sstevel@tonic-gate 		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
392*0Sstevel@tonic-gate 		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
393*0Sstevel@tonic-gate 		if (oldpp->cp_cpulist == cp) {
394*0Sstevel@tonic-gate 			oldpp->cp_cpulist = ncp;
395*0Sstevel@tonic-gate 		}
396*0Sstevel@tonic-gate 	} else {
397*0Sstevel@tonic-gate 		ncp = oldpp->cp_cpulist = NULL;
398*0Sstevel@tonic-gate 		cp_numparts_nonempty--;
399*0Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
400*0Sstevel@tonic-gate 	}
401*0Sstevel@tonic-gate 	oldpp->cp_gen++;
402*0Sstevel@tonic-gate 
403*0Sstevel@tonic-gate 	/* move into new partition */
404*0Sstevel@tonic-gate 	newlist = newpp->cp_cpulist;
405*0Sstevel@tonic-gate 	if (newlist == NULL) {
406*0Sstevel@tonic-gate 		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
407*0Sstevel@tonic-gate 		cp_numparts_nonempty++;
408*0Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
409*0Sstevel@tonic-gate 	} else {
410*0Sstevel@tonic-gate 		cp->cpu_next_part = newlist;
411*0Sstevel@tonic-gate 		cp->cpu_prev_part = newlist->cpu_prev_part;
412*0Sstevel@tonic-gate 		newlist->cpu_prev_part->cpu_next_part = cp;
413*0Sstevel@tonic-gate 		newlist->cpu_prev_part = cp;
414*0Sstevel@tonic-gate 	}
415*0Sstevel@tonic-gate 	cp->cpu_part = newpp;
416*0Sstevel@tonic-gate 	newpp->cp_ncpus++;
417*0Sstevel@tonic-gate 	newpp->cp_gen++;
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate 	ASSERT(CPUSET_ISNULL(newpp->cp_haltset));
420*0Sstevel@tonic-gate 	ASSERT(CPUSET_ISNULL(oldpp->cp_haltset));
421*0Sstevel@tonic-gate 
422*0Sstevel@tonic-gate 	/*
423*0Sstevel@tonic-gate 	 * let the lgroup framework know cp has entered the partition
424*0Sstevel@tonic-gate 	 */
425*0Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);
426*0Sstevel@tonic-gate 
427*0Sstevel@tonic-gate 	/*
428*0Sstevel@tonic-gate 	 * If necessary, move threads off processor.
429*0Sstevel@tonic-gate 	 */
430*0Sstevel@tonic-gate 	if (move_threads) {
431*0Sstevel@tonic-gate 		ASSERT(ncp != NULL);
432*0Sstevel@tonic-gate 
433*0Sstevel@tonic-gate 		/*
434*0Sstevel@tonic-gate 		 * Walk thru the active process list to look for
435*0Sstevel@tonic-gate 		 * threads that need to have a new home lgroup,
436*0Sstevel@tonic-gate 		 * or the last CPU they run on is the same CPU
437*0Sstevel@tonic-gate 		 * being moved out of the partition.
438*0Sstevel@tonic-gate 		 */
439*0Sstevel@tonic-gate 
440*0Sstevel@tonic-gate 		for (p = practive; p != NULL; p = p->p_next) {
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate 			t = p->p_tlist;
443*0Sstevel@tonic-gate 
444*0Sstevel@tonic-gate 			if (t == NULL)
445*0Sstevel@tonic-gate 				continue;
446*0Sstevel@tonic-gate 
447*0Sstevel@tonic-gate 			lgrp_diff_lpl = 0;
448*0Sstevel@tonic-gate 
449*0Sstevel@tonic-gate 			do {
450*0Sstevel@tonic-gate 
451*0Sstevel@tonic-gate 				ASSERT(t->t_lpl != NULL);
452*0Sstevel@tonic-gate 
453*0Sstevel@tonic-gate 				/*
454*0Sstevel@tonic-gate 				 * Update the count of how many threads are
455*0Sstevel@tonic-gate 				 * in this CPU's lgroup but have a different lpl
456*0Sstevel@tonic-gate 				 */
457*0Sstevel@tonic-gate 
458*0Sstevel@tonic-gate 				if (t->t_lpl != cpu_lpl &&
459*0Sstevel@tonic-gate 				    t->t_lpl->lpl_lgrpid == lgrpid)
460*0Sstevel@tonic-gate 					lgrp_diff_lpl++;
461*0Sstevel@tonic-gate 				/*
462*0Sstevel@tonic-gate 				 * If the lgroup that t is assigned to no
463*0Sstevel@tonic-gate 				 * longer has any CPUs in t's partition,
464*0Sstevel@tonic-gate 				 * we'll have to choose a new lgroup for t.
465*0Sstevel@tonic-gate 				 */
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate 				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
468*0Sstevel@tonic-gate 				    t->t_cpupart)) {
469*0Sstevel@tonic-gate 					lgrp_move_thread(t,
470*0Sstevel@tonic-gate 					    lgrp_choose(t, t->t_cpupart), 0);
471*0Sstevel@tonic-gate 				}
472*0Sstevel@tonic-gate 
473*0Sstevel@tonic-gate 				/*
474*0Sstevel@tonic-gate 				 * make sure lpl points to our own partition
475*0Sstevel@tonic-gate 				 */
476*0Sstevel@tonic-gate 				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
477*0Sstevel@tonic-gate 				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
478*0Sstevel@tonic-gate 					t->t_cpupart->cp_nlgrploads));
479*0Sstevel@tonic-gate 
480*0Sstevel@tonic-gate 				ASSERT(t->t_lpl->lpl_ncpu > 0);
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate 				/* Update CPU last ran on if it was this CPU */
483*0Sstevel@tonic-gate 				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
484*0Sstevel@tonic-gate 				    t->t_bound_cpu != cp) {
485*0Sstevel@tonic-gate 					t->t_cpu = disp_lowpri_cpu(ncp,
486*0Sstevel@tonic-gate 					    t->t_lpl, t->t_pri, NULL);
487*0Sstevel@tonic-gate 				}
488*0Sstevel@tonic-gate 				t = t->t_forw;
489*0Sstevel@tonic-gate 			} while (t != p->p_tlist);
490*0Sstevel@tonic-gate 
491*0Sstevel@tonic-gate 			/*
492*0Sstevel@tonic-gate 			 * Didn't find any threads in the same lgroup as this
493*0Sstevel@tonic-gate 			 * CPU with a different lpl, so remove the lgroup from
494*0Sstevel@tonic-gate 			 * the process lgroup bitmask.
495*0Sstevel@tonic-gate 			 */
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate 			if (lgrp_diff_lpl)
498*0Sstevel@tonic-gate 				klgrpset_del(p->p_lgrpset, lgrpid);
499*0Sstevel@tonic-gate 		}
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate 		/*
502*0Sstevel@tonic-gate 		 * Walk thread list looking for threads that need to be
503*0Sstevel@tonic-gate 		 * rehomed, since there are some threads that are not in
504*0Sstevel@tonic-gate 		 * their process's p_tlist.
505*0Sstevel@tonic-gate 		 */
506*0Sstevel@tonic-gate 
507*0Sstevel@tonic-gate 		t = curthread;
508*0Sstevel@tonic-gate 
509*0Sstevel@tonic-gate 		do {
510*0Sstevel@tonic-gate 			ASSERT(t != NULL && t->t_lpl != NULL);
511*0Sstevel@tonic-gate 
512*0Sstevel@tonic-gate 			/*
513*0Sstevel@tonic-gate 			 * If the lgroup that t is assigned to no
514*0Sstevel@tonic-gate 			 * longer has any CPUs in t's partition,
515*0Sstevel@tonic-gate 			 * we'll have to choose a new lgroup for t.
516*0Sstevel@tonic-gate 			 * Also, choose best lgroup for home when
517*0Sstevel@tonic-gate 			 * thread has specified lgroup affinities,
518*0Sstevel@tonic-gate 			 * since there may be an lgroup with more
519*0Sstevel@tonic-gate 			 * affinity available after moving CPUs
520*0Sstevel@tonic-gate 			 * around.
521*0Sstevel@tonic-gate 			 */
522*0Sstevel@tonic-gate 			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
523*0Sstevel@tonic-gate 			    t->t_cpupart) || t->t_lgrp_affinity) {
524*0Sstevel@tonic-gate 				lgrp_move_thread(t,
525*0Sstevel@tonic-gate 				    lgrp_choose(t, t->t_cpupart), 1);
526*0Sstevel@tonic-gate 			}
527*0Sstevel@tonic-gate 
528*0Sstevel@tonic-gate 			/* make sure lpl points to our own partition */
529*0Sstevel@tonic-gate 			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
530*0Sstevel@tonic-gate 			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
531*0Sstevel@tonic-gate 				t->t_cpupart->cp_nlgrploads));
532*0Sstevel@tonic-gate 
533*0Sstevel@tonic-gate 			ASSERT(t->t_lpl->lpl_ncpu > 0);
534*0Sstevel@tonic-gate 
535*0Sstevel@tonic-gate 			/* Update CPU last ran on if it was this CPU */
536*0Sstevel@tonic-gate 			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
537*0Sstevel@tonic-gate 			    t->t_bound_cpu != cp) {
538*0Sstevel@tonic-gate 				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
539*0Sstevel@tonic-gate 				    t->t_pri, NULL);
540*0Sstevel@tonic-gate 			}
541*0Sstevel@tonic-gate 
542*0Sstevel@tonic-gate 			t = t->t_next;
543*0Sstevel@tonic-gate 		} while (t != curthread);
544*0Sstevel@tonic-gate 
545*0Sstevel@tonic-gate 		/*
546*0Sstevel@tonic-gate 		 * Clear off the CPU's run queue, and the kp queue if the
547*0Sstevel@tonic-gate 		 * partition is now empty.
548*0Sstevel@tonic-gate 		 */
549*0Sstevel@tonic-gate 		disp_cpu_inactive(cp);
550*0Sstevel@tonic-gate 
551*0Sstevel@tonic-gate 		/*
552*0Sstevel@tonic-gate 		 * Make cp switch to a thread from the new partition.
553*0Sstevel@tonic-gate 		 */
554*0Sstevel@tonic-gate 		cp->cpu_runrun = 1;
555*0Sstevel@tonic-gate 		cp->cpu_kprunrun = 1;
556*0Sstevel@tonic-gate 	}
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 	cpu_inmotion = NULL;
559*0Sstevel@tonic-gate 	start_cpus();
560*0Sstevel@tonic-gate 
561*0Sstevel@tonic-gate 	/*
562*0Sstevel@tonic-gate 	 * Let anyone interested know that cpu has been added to the set.
563*0Sstevel@tonic-gate 	 */
564*0Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
565*0Sstevel@tonic-gate 
566*0Sstevel@tonic-gate 	/*
567*0Sstevel@tonic-gate 	 * Now let the cyclic subsystem know that it can reshuffle cyclics
568*0Sstevel@tonic-gate 	 * bound to the new processor set.
569*0Sstevel@tonic-gate 	 */
570*0Sstevel@tonic-gate 	cyclic_move_in(cp);
571*0Sstevel@tonic-gate 
572*0Sstevel@tonic-gate 	return (0);
573*0Sstevel@tonic-gate }
574*0Sstevel@tonic-gate 
575*0Sstevel@tonic-gate /*
576*0Sstevel@tonic-gate  * Check if thread can be moved to a new cpu partition.  Called by
577*0Sstevel@tonic-gate  * cpupart_move_thread() and pset_bind_start().
578*0Sstevel@tonic-gate  */
579*0Sstevel@tonic-gate int
580*0Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore)
581*0Sstevel@tonic-gate {
582*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
583*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
584*0Sstevel@tonic-gate 	ASSERT(cp != NULL);
585*0Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
586*0Sstevel@tonic-gate 
587*0Sstevel@tonic-gate 	/*
588*0Sstevel@tonic-gate 	 * CPU-bound threads can't be moved.
589*0Sstevel@tonic-gate 	 */
590*0Sstevel@tonic-gate 	if (!ignore) {
591*0Sstevel@tonic-gate 		cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu :
592*0Sstevel@tonic-gate 		    tp->t_weakbound_cpu;
593*0Sstevel@tonic-gate 		if (boundcpu != NULL && boundcpu->cpu_part != cp)
594*0Sstevel@tonic-gate 			return (EBUSY);
595*0Sstevel@tonic-gate 	}
596*0Sstevel@tonic-gate 	return (0);
597*0Sstevel@tonic-gate }
598*0Sstevel@tonic-gate 
599*0Sstevel@tonic-gate /*
600*0Sstevel@tonic-gate  * Move thread to new partition.  If ignore is non-zero, then CPU
601*0Sstevel@tonic-gate  * bindings should be ignored (this is used when destroying a
602*0Sstevel@tonic-gate  * partition).
603*0Sstevel@tonic-gate  */
604*0Sstevel@tonic-gate static int
605*0Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore,
606*0Sstevel@tonic-gate     void *projbuf, void *zonebuf)
607*0Sstevel@tonic-gate {
608*0Sstevel@tonic-gate 	cpupart_t *oldpp = tp->t_cpupart;
609*0Sstevel@tonic-gate 	int ret;
610*0Sstevel@tonic-gate 
611*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
612*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
613*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
614*0Sstevel@tonic-gate 	ASSERT(newpp != NULL);
615*0Sstevel@tonic-gate 
616*0Sstevel@tonic-gate 	if (newpp->cp_cpulist == NULL)
617*0Sstevel@tonic-gate 		return (EINVAL);
618*0Sstevel@tonic-gate 
619*0Sstevel@tonic-gate 	/*
620*0Sstevel@tonic-gate 	 * Check for errors first.
621*0Sstevel@tonic-gate 	 */
622*0Sstevel@tonic-gate 	thread_lock(tp);
623*0Sstevel@tonic-gate 	if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) {
624*0Sstevel@tonic-gate 		thread_unlock(tp);
625*0Sstevel@tonic-gate 		return (ret);
626*0Sstevel@tonic-gate 	}
627*0Sstevel@tonic-gate 
628*0Sstevel@tonic-gate 	/* move the thread */
629*0Sstevel@tonic-gate 	if (oldpp != newpp) {
630*0Sstevel@tonic-gate 		/*
631*0Sstevel@tonic-gate 		 * Make the thread switch to the new partition.
632*0Sstevel@tonic-gate 		 */
633*0Sstevel@tonic-gate 		tp->t_cpupart = newpp;
634*0Sstevel@tonic-gate 		ASSERT(tp->t_lpl != NULL);
635*0Sstevel@tonic-gate 		/*
636*0Sstevel@tonic-gate 		 * Leave the thread on the same lgroup if possible; otherwise
637*0Sstevel@tonic-gate 		 * choose a new lgroup for it.  In either case, update its
638*0Sstevel@tonic-gate 		 * t_lpl.
639*0Sstevel@tonic-gate 		 */
640*0Sstevel@tonic-gate 		if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) &&
641*0Sstevel@tonic-gate 		    tp->t_lgrp_affinity == NULL) {
642*0Sstevel@tonic-gate 			/*
643*0Sstevel@tonic-gate 			 * The thread's lgroup has CPUs in the thread's new
644*0Sstevel@tonic-gate 			 * partition, so the thread can stay assigned to the
645*0Sstevel@tonic-gate 			 * same lgroup.  Update its t_lpl to point to the
646*0Sstevel@tonic-gate 			 * lpl_t for its lgroup in its new partition.
647*0Sstevel@tonic-gate 			 */
648*0Sstevel@tonic-gate 			lgrp_move_thread(tp, &tp->t_cpupart->\
649*0Sstevel@tonic-gate 			    cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1);
650*0Sstevel@tonic-gate 		} else {
651*0Sstevel@tonic-gate 			/*
652*0Sstevel@tonic-gate 			 * The thread's lgroup has no cpus in its new
653*0Sstevel@tonic-gate 			 * partition or it has specified lgroup affinities,
654*0Sstevel@tonic-gate 			 * so choose the best lgroup for the thread and
655*0Sstevel@tonic-gate 			 * assign it to that lgroup.
656*0Sstevel@tonic-gate 			 */
657*0Sstevel@tonic-gate 			lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart),
658*0Sstevel@tonic-gate 			    1);
659*0Sstevel@tonic-gate 		}
660*0Sstevel@tonic-gate 		/*
661*0Sstevel@tonic-gate 		 * make sure lpl points to our own partition
662*0Sstevel@tonic-gate 		 */
663*0Sstevel@tonic-gate 		ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) &&
664*0Sstevel@tonic-gate 		    (tp->t_lpl < tp->t_cpupart->cp_lgrploads +
665*0Sstevel@tonic-gate 			tp->t_cpupart->cp_nlgrploads));
666*0Sstevel@tonic-gate 
667*0Sstevel@tonic-gate 		ASSERT(tp->t_lpl->lpl_ncpu > 0);
668*0Sstevel@tonic-gate 
669*0Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC) {
670*0Sstevel@tonic-gate 			cpu_surrender(tp);
671*0Sstevel@tonic-gate 		} else if (tp->t_state == TS_RUN) {
672*0Sstevel@tonic-gate 			(void) dispdeq(tp);
673*0Sstevel@tonic-gate 			setbackdq(tp);
674*0Sstevel@tonic-gate 		}
675*0Sstevel@tonic-gate 	}
676*0Sstevel@tonic-gate 
677*0Sstevel@tonic-gate 	/*
678*0Sstevel@tonic-gate 	 * Our binding has changed; set TP_CHANGEBIND.
679*0Sstevel@tonic-gate 	 */
680*0Sstevel@tonic-gate 	tp->t_proc_flag |= TP_CHANGEBIND;
681*0Sstevel@tonic-gate 	aston(tp);
682*0Sstevel@tonic-gate 
683*0Sstevel@tonic-gate 	thread_unlock(tp);
684*0Sstevel@tonic-gate 	fss_changepset(tp, newpp, projbuf, zonebuf);
685*0Sstevel@tonic-gate 
686*0Sstevel@tonic-gate 	return (0);		/* success */
687*0Sstevel@tonic-gate }
688*0Sstevel@tonic-gate 
689*0Sstevel@tonic-gate 
690*0Sstevel@tonic-gate /*
691*0Sstevel@tonic-gate  * This function binds a thread to a partition.  Must be called with the
692*0Sstevel@tonic-gate  * p_lock of the containing process held (to keep the thread from going
693*0Sstevel@tonic-gate  * away), and thus also with cpu_lock held (since cpu_lock must be
694*0Sstevel@tonic-gate  * acquired before p_lock).  If ignore is non-zero, then CPU bindings
695*0Sstevel@tonic-gate  * should be ignored (this is used when destroying a partition).
696*0Sstevel@tonic-gate  */
697*0Sstevel@tonic-gate int
698*0Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf,
699*0Sstevel@tonic-gate     void *zonebuf)
700*0Sstevel@tonic-gate {
701*0Sstevel@tonic-gate 	cpupart_t	*newpp;
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate 	ASSERT(pool_lock_held());
704*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
705*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
706*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
707*0Sstevel@tonic-gate 
708*0Sstevel@tonic-gate 	if (psid == PS_NONE)
709*0Sstevel@tonic-gate 		newpp = &cp_default;
710*0Sstevel@tonic-gate 	else {
711*0Sstevel@tonic-gate 		newpp = cpupart_find(psid);
712*0Sstevel@tonic-gate 		if (newpp == NULL) {
713*0Sstevel@tonic-gate 			return (EINVAL);
714*0Sstevel@tonic-gate 		}
715*0Sstevel@tonic-gate 	}
716*0Sstevel@tonic-gate 	return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf));
717*0Sstevel@tonic-gate }
718*0Sstevel@tonic-gate 
719*0Sstevel@tonic-gate 
720*0Sstevel@tonic-gate /*
721*0Sstevel@tonic-gate  * Create a new partition.  On MP systems, this also allocates a
722*0Sstevel@tonic-gate  * kpreempt disp queue for that partition.
723*0Sstevel@tonic-gate  */
724*0Sstevel@tonic-gate int
725*0Sstevel@tonic-gate cpupart_create(psetid_t *psid)
726*0Sstevel@tonic-gate {
727*0Sstevel@tonic-gate 	cpupart_t	*pp;
728*0Sstevel@tonic-gate 	lgrp_id_t	i;
729*0Sstevel@tonic-gate 
730*0Sstevel@tonic-gate 	ASSERT(pool_lock_held());
731*0Sstevel@tonic-gate 
732*0Sstevel@tonic-gate 	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
733*0Sstevel@tonic-gate 	pp->cp_nlgrploads = lgrp_plat_max_lgrps();
734*0Sstevel@tonic-gate 	pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads,
735*0Sstevel@tonic-gate 	    KM_SLEEP);
736*0Sstevel@tonic-gate 
737*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
738*0Sstevel@tonic-gate 	if (cp_numparts == cp_max_numparts) {
739*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
740*0Sstevel@tonic-gate 		kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
741*0Sstevel@tonic-gate 		pp->cp_lgrploads = NULL;
742*0Sstevel@tonic-gate 		kmem_free(pp, sizeof (cpupart_t));
743*0Sstevel@tonic-gate 		return (ENOMEM);
744*0Sstevel@tonic-gate 	}
745*0Sstevel@tonic-gate 	cp_numparts++;
746*0Sstevel@tonic-gate 	/* find the next free partition ID */
747*0Sstevel@tonic-gate 	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
748*0Sstevel@tonic-gate 		cp_id_next++;
749*0Sstevel@tonic-gate 	pp->cp_id = cp_id_next++;
750*0Sstevel@tonic-gate 	pp->cp_ncpus = 0;
751*0Sstevel@tonic-gate 	pp->cp_cpulist = NULL;
752*0Sstevel@tonic-gate 	pp->cp_attr = 0;
753*0Sstevel@tonic-gate 	klgrpset_clear(pp->cp_lgrpset);
754*0Sstevel@tonic-gate 	pp->cp_kp_queue.disp_maxrunpri = -1;
755*0Sstevel@tonic-gate 	pp->cp_kp_queue.disp_max_unbound_pri = -1;
756*0Sstevel@tonic-gate 	pp->cp_kp_queue.disp_cpu = NULL;
757*0Sstevel@tonic-gate 	pp->cp_gen = 0;
758*0Sstevel@tonic-gate 	CPUSET_ZERO(pp->cp_haltset);
759*0Sstevel@tonic-gate 	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
760*0Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
761*0Sstevel@tonic-gate 	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
762*0Sstevel@tonic-gate 	cpupart_kstat_create(pp);
763*0Sstevel@tonic-gate 	for (i = 0; i < pp->cp_nlgrploads; i++) {
764*0Sstevel@tonic-gate 		pp->cp_lgrploads[i].lpl_lgrpid = i;
765*0Sstevel@tonic-gate 	}
766*0Sstevel@tonic-gate 	CHIP_SET_ZERO(pp->cp_chipset);
767*0Sstevel@tonic-gate 
768*0Sstevel@tonic-gate 	/*
769*0Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
770*0Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
771*0Sstevel@tonic-gate 	 * cpu_lock) isn't running.
772*0Sstevel@tonic-gate 	 */
773*0Sstevel@tonic-gate 	pause_cpus(NULL);
774*0Sstevel@tonic-gate 	pp->cp_next = cp_list_head;
775*0Sstevel@tonic-gate 	pp->cp_prev = cp_list_head->cp_prev;
776*0Sstevel@tonic-gate 	cp_list_head->cp_prev->cp_next = pp;
777*0Sstevel@tonic-gate 	cp_list_head->cp_prev = pp;
778*0Sstevel@tonic-gate 	start_cpus();
779*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
780*0Sstevel@tonic-gate 
781*0Sstevel@tonic-gate 	return (0);
782*0Sstevel@tonic-gate }
783*0Sstevel@tonic-gate 
784*0Sstevel@tonic-gate 
785*0Sstevel@tonic-gate /*
786*0Sstevel@tonic-gate  * Destroy a partition.
787*0Sstevel@tonic-gate  */
788*0Sstevel@tonic-gate int
789*0Sstevel@tonic-gate cpupart_destroy(psetid_t psid)
790*0Sstevel@tonic-gate {
791*0Sstevel@tonic-gate 	cpu_t	*cp, *first_cp;
792*0Sstevel@tonic-gate 	cpupart_t *pp, *newpp;
793*0Sstevel@tonic-gate 	int	err = 0;
794*0Sstevel@tonic-gate 	void 	*projbuf, *zonebuf;
795*0Sstevel@tonic-gate 	kthread_t *t;
796*0Sstevel@tonic-gate 	proc_t	*p;
797*0Sstevel@tonic-gate 
798*0Sstevel@tonic-gate 	ASSERT(pool_lock_held());
799*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
800*0Sstevel@tonic-gate 
801*0Sstevel@tonic-gate 	pp = cpupart_find(psid);
802*0Sstevel@tonic-gate 	if (pp == NULL || pp == &cp_default) {
803*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
804*0Sstevel@tonic-gate 		return (EINVAL);
805*0Sstevel@tonic-gate 	}
806*0Sstevel@tonic-gate 
807*0Sstevel@tonic-gate 	/*
808*0Sstevel@tonic-gate 	 * Pre-allocate enough buffers for FSS for all active projects and
809*0Sstevel@tonic-gate 	 * for all active zones on the system.  Unused buffers will be
810*0Sstevel@tonic-gate 	 * freed later by fss_freebuf().
811*0Sstevel@tonic-gate 	 */
812*0Sstevel@tonic-gate 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
813*0Sstevel@tonic-gate 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
814*0Sstevel@tonic-gate 
815*0Sstevel@tonic-gate 	/*
816*0Sstevel@tonic-gate 	 * First need to unbind all the threads currently bound to the
817*0Sstevel@tonic-gate 	 * partition.  Then do the actual destroy (which moves the CPUs).
818*0Sstevel@tonic-gate 	 */
819*0Sstevel@tonic-gate 	mutex_enter(&pidlock);
820*0Sstevel@tonic-gate 	t = curthread;
821*0Sstevel@tonic-gate 	do {
822*0Sstevel@tonic-gate 		if (t->t_bind_pset == psid) {
823*0Sstevel@tonic-gate again:			p = ttoproc(t);
824*0Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
825*0Sstevel@tonic-gate 			if (ttoproc(t) != p) {
826*0Sstevel@tonic-gate 				/*
827*0Sstevel@tonic-gate 				 * lwp_exit has changed this thread's process
828*0Sstevel@tonic-gate 				 * pointer before we grabbed its p_lock.
829*0Sstevel@tonic-gate 				 */
830*0Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
831*0Sstevel@tonic-gate 				goto again;
832*0Sstevel@tonic-gate 			}
833*0Sstevel@tonic-gate 			err = cpupart_bind_thread(t, PS_NONE, 1,
834*0Sstevel@tonic-gate 			    projbuf, zonebuf);
835*0Sstevel@tonic-gate 			if (err) {
836*0Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
837*0Sstevel@tonic-gate 				mutex_exit(&pidlock);
838*0Sstevel@tonic-gate 				mutex_exit(&cpu_lock);
839*0Sstevel@tonic-gate 				fss_freebuf(projbuf, FSS_ALLOC_PROJ);
840*0Sstevel@tonic-gate 				fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
841*0Sstevel@tonic-gate 				return (err);
842*0Sstevel@tonic-gate 			}
843*0Sstevel@tonic-gate 			t->t_bind_pset = PS_NONE;
844*0Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
845*0Sstevel@tonic-gate 		}
846*0Sstevel@tonic-gate 		t = t->t_next;
847*0Sstevel@tonic-gate 	} while (t != curthread);
848*0Sstevel@tonic-gate 
849*0Sstevel@tonic-gate 	mutex_exit(&pidlock);
850*0Sstevel@tonic-gate 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
851*0Sstevel@tonic-gate 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
852*0Sstevel@tonic-gate 
853*0Sstevel@tonic-gate 	newpp = &cp_default;
854*0Sstevel@tonic-gate 	while ((cp = pp->cp_cpulist) != NULL) {
855*0Sstevel@tonic-gate 		if (err = cpupart_move_cpu(cp, newpp, 0)) {
856*0Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
857*0Sstevel@tonic-gate 			return (err);
858*0Sstevel@tonic-gate 		}
859*0Sstevel@tonic-gate 	}
860*0Sstevel@tonic-gate 
861*0Sstevel@tonic-gate 	ASSERT(CHIP_SET_ISNULL(pp->cp_chipset));
862*0Sstevel@tonic-gate 	ASSERT(CPUSET_ISNULL(pp->cp_haltset));
863*0Sstevel@tonic-gate 
864*0Sstevel@tonic-gate 	/*
865*0Sstevel@tonic-gate 	 * Reset the pointers in any offline processors so they won't
866*0Sstevel@tonic-gate 	 * try to rejoin the destroyed partition when they're turned
867*0Sstevel@tonic-gate 	 * online.
868*0Sstevel@tonic-gate 	 */
869*0Sstevel@tonic-gate 	first_cp = cp = CPU;
870*0Sstevel@tonic-gate 	do {
871*0Sstevel@tonic-gate 		if (cp->cpu_part == pp) {
872*0Sstevel@tonic-gate 			ASSERT(cp->cpu_flags & CPU_OFFLINE);
873*0Sstevel@tonic-gate 			cp->cpu_part = newpp;
874*0Sstevel@tonic-gate 		}
875*0Sstevel@tonic-gate 		cp = cp->cpu_next;
876*0Sstevel@tonic-gate 	} while (cp != first_cp);
877*0Sstevel@tonic-gate 
878*0Sstevel@tonic-gate 	/*
879*0Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
880*0Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
881*0Sstevel@tonic-gate 	 * cpu_lock) isn't running.
882*0Sstevel@tonic-gate 	 */
883*0Sstevel@tonic-gate 	pause_cpus(NULL);
884*0Sstevel@tonic-gate 	pp->cp_prev->cp_next = pp->cp_next;
885*0Sstevel@tonic-gate 	pp->cp_next->cp_prev = pp->cp_prev;
886*0Sstevel@tonic-gate 	if (cp_list_head == pp)
887*0Sstevel@tonic-gate 		cp_list_head = pp->cp_next;
888*0Sstevel@tonic-gate 	start_cpus();
889*0Sstevel@tonic-gate 
890*0Sstevel@tonic-gate 	if (cp_id_next > pp->cp_id)
891*0Sstevel@tonic-gate 		cp_id_next = pp->cp_id;
892*0Sstevel@tonic-gate 
893*0Sstevel@tonic-gate 	if (pp->cp_kstat)
894*0Sstevel@tonic-gate 		kstat_delete(pp->cp_kstat);
895*0Sstevel@tonic-gate 
896*0Sstevel@tonic-gate 	cp_numparts--;
897*0Sstevel@tonic-gate 
898*0Sstevel@tonic-gate 	disp_kp_free(&pp->cp_kp_queue);
899*0Sstevel@tonic-gate 	kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
900*0Sstevel@tonic-gate 	pp->cp_lgrploads = NULL;
901*0Sstevel@tonic-gate 	kmem_free(pp, sizeof (cpupart_t));
902*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
903*0Sstevel@tonic-gate 
904*0Sstevel@tonic-gate 	return (err);
905*0Sstevel@tonic-gate }
906*0Sstevel@tonic-gate 
907*0Sstevel@tonic-gate 
908*0Sstevel@tonic-gate /*
909*0Sstevel@tonic-gate  * Return the ID of the partition to which the specified processor belongs.
910*0Sstevel@tonic-gate  */
911*0Sstevel@tonic-gate psetid_t
912*0Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp)
913*0Sstevel@tonic-gate {
914*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
915*0Sstevel@tonic-gate 
916*0Sstevel@tonic-gate 	return (CPTOPS(cp->cpu_part->cp_id));
917*0Sstevel@tonic-gate }
918*0Sstevel@tonic-gate 
919*0Sstevel@tonic-gate 
920*0Sstevel@tonic-gate /*
921*0Sstevel@tonic-gate  * Attach a processor to an existing partition.
922*0Sstevel@tonic-gate  */
923*0Sstevel@tonic-gate int
924*0Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced)
925*0Sstevel@tonic-gate {
926*0Sstevel@tonic-gate 	cpupart_t	*pp;
927*0Sstevel@tonic-gate 	int		err;
928*0Sstevel@tonic-gate 
929*0Sstevel@tonic-gate 	ASSERT(pool_lock_held());
930*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
931*0Sstevel@tonic-gate 
932*0Sstevel@tonic-gate 	pp = cpupart_find(psid);
933*0Sstevel@tonic-gate 	if (pp == NULL)
934*0Sstevel@tonic-gate 		return (EINVAL);
935*0Sstevel@tonic-gate 	if (cp->cpu_flags & CPU_OFFLINE)
936*0Sstevel@tonic-gate 		return (EINVAL);
937*0Sstevel@tonic-gate 
938*0Sstevel@tonic-gate 	err = cpupart_move_cpu(cp, pp, forced);
939*0Sstevel@tonic-gate 	return (err);
940*0Sstevel@tonic-gate }
941*0Sstevel@tonic-gate 
942*0Sstevel@tonic-gate /*
943*0Sstevel@tonic-gate  * Get a list of cpus belonging to the partition.  If numcpus is NULL,
944*0Sstevel@tonic-gate  * this just checks for a valid partition.  If numcpus is non-NULL but
945*0Sstevel@tonic-gate  * cpulist is NULL, the current number of cpus is stored in *numcpus.
946*0Sstevel@tonic-gate  * If both are non-NULL, the current number of cpus is stored in *numcpus,
947*0Sstevel@tonic-gate  * and a list of those cpus up to the size originally in *numcpus is
948*0Sstevel@tonic-gate  * stored in cpulist[].  Also, store the processor set id in *psid.
949*0Sstevel@tonic-gate  * This is useful in case the processor set id passed in was PS_MYID.
950*0Sstevel@tonic-gate  */
951*0Sstevel@tonic-gate int
952*0Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus)
953*0Sstevel@tonic-gate {
954*0Sstevel@tonic-gate 	cpupart_t	*pp;
955*0Sstevel@tonic-gate 	uint_t		ncpus;
956*0Sstevel@tonic-gate 	cpu_t		*c;
957*0Sstevel@tonic-gate 	int		i;
958*0Sstevel@tonic-gate 
959*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
960*0Sstevel@tonic-gate 	pp = cpupart_find(*psid);
961*0Sstevel@tonic-gate 	if (pp == NULL) {
962*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
963*0Sstevel@tonic-gate 		return (EINVAL);
964*0Sstevel@tonic-gate 	}
965*0Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
966*0Sstevel@tonic-gate 	ncpus = pp->cp_ncpus;
967*0Sstevel@tonic-gate 	if (numcpus) {
968*0Sstevel@tonic-gate 		if (ncpus > *numcpus) {
969*0Sstevel@tonic-gate 			/*
970*0Sstevel@tonic-gate 			 * Only copy as many cpus as were passed in, but
971*0Sstevel@tonic-gate 			 * pass back the real number.
972*0Sstevel@tonic-gate 			 */
973*0Sstevel@tonic-gate 			uint_t t = ncpus;
974*0Sstevel@tonic-gate 			ncpus = *numcpus;
975*0Sstevel@tonic-gate 			*numcpus = t;
976*0Sstevel@tonic-gate 		} else
977*0Sstevel@tonic-gate 			*numcpus = ncpus;
978*0Sstevel@tonic-gate 
979*0Sstevel@tonic-gate 		if (cpulist) {
980*0Sstevel@tonic-gate 			c = pp->cp_cpulist;
981*0Sstevel@tonic-gate 			for (i = 0; i < ncpus; i++) {
982*0Sstevel@tonic-gate 				ASSERT(c != NULL);
983*0Sstevel@tonic-gate 				cpulist[i] = c->cpu_id;
984*0Sstevel@tonic-gate 				c = c->cpu_next_part;
985*0Sstevel@tonic-gate 			}
986*0Sstevel@tonic-gate 		}
987*0Sstevel@tonic-gate 	}
988*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
989*0Sstevel@tonic-gate 	return (0);
990*0Sstevel@tonic-gate }
991*0Sstevel@tonic-gate 
992*0Sstevel@tonic-gate /*
993*0Sstevel@tonic-gate  * Reallocate kpreempt queues for each CPU partition.  Called from
994*0Sstevel@tonic-gate  * disp_setup when a new scheduling class is loaded that increases the
995*0Sstevel@tonic-gate  * number of priorities in the system.
996*0Sstevel@tonic-gate  */
997*0Sstevel@tonic-gate void
998*0Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri)
999*0Sstevel@tonic-gate {
1000*0Sstevel@tonic-gate 	cpupart_t *cpp;
1001*0Sstevel@tonic-gate 
1002*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1003*0Sstevel@tonic-gate 	cpp = cp_list_head;
1004*0Sstevel@tonic-gate 	do {
1005*0Sstevel@tonic-gate 		disp_kp_alloc(&cpp->cp_kp_queue, npri);
1006*0Sstevel@tonic-gate 		cpp = cpp->cp_next;
1007*0Sstevel@tonic-gate 	} while (cpp != cp_list_head);
1008*0Sstevel@tonic-gate }
1009*0Sstevel@tonic-gate 
1010*0Sstevel@tonic-gate int
1011*0Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem)
1012*0Sstevel@tonic-gate {
1013*0Sstevel@tonic-gate 	cpupart_t *cp;
1014*0Sstevel@tonic-gate 	int i;
1015*0Sstevel@tonic-gate 
1016*0Sstevel@tonic-gate 	ASSERT(nelem >= 0);
1017*0Sstevel@tonic-gate 	ASSERT(nelem <= LOADAVG_NSTATS);
1018*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1019*0Sstevel@tonic-gate 
1020*0Sstevel@tonic-gate 	cp = cpupart_find(psid);
1021*0Sstevel@tonic-gate 	if (cp == NULL)
1022*0Sstevel@tonic-gate 		return (EINVAL);
1023*0Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
1024*0Sstevel@tonic-gate 		buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT);
1025*0Sstevel@tonic-gate 
1026*0Sstevel@tonic-gate 	return (0);
1027*0Sstevel@tonic-gate }
1028*0Sstevel@tonic-gate 
1029*0Sstevel@tonic-gate 
1030*0Sstevel@tonic-gate uint_t
1031*0Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag)
1032*0Sstevel@tonic-gate {
1033*0Sstevel@tonic-gate 	uint_t numpart = 0;
1034*0Sstevel@tonic-gate 	cpupart_t *cp;
1035*0Sstevel@tonic-gate 
1036*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1037*0Sstevel@tonic-gate 	ASSERT(flag == CP_ALL || flag == CP_NONEMPTY);
1038*0Sstevel@tonic-gate 
1039*0Sstevel@tonic-gate 	if (list != NULL) {
1040*0Sstevel@tonic-gate 		cp = cp_list_head;
1041*0Sstevel@tonic-gate 		do {
1042*0Sstevel@tonic-gate 			if (((flag == CP_ALL) && (cp != &cp_default)) ||
1043*0Sstevel@tonic-gate 			    ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) {
1044*0Sstevel@tonic-gate 				if (numpart == nelem)
1045*0Sstevel@tonic-gate 					break;
1046*0Sstevel@tonic-gate 				list[numpart++] = CPTOPS(cp->cp_id);
1047*0Sstevel@tonic-gate 			}
1048*0Sstevel@tonic-gate 			cp = cp->cp_next;
1049*0Sstevel@tonic-gate 		} while (cp != cp_list_head);
1050*0Sstevel@tonic-gate 	}
1051*0Sstevel@tonic-gate 
1052*0Sstevel@tonic-gate 	ASSERT(numpart < cp_numparts);
1053*0Sstevel@tonic-gate 
1054*0Sstevel@tonic-gate 	if (flag == CP_ALL)
1055*0Sstevel@tonic-gate 		numpart = cp_numparts - 1; /* leave out default partition */
1056*0Sstevel@tonic-gate 	else if (flag == CP_NONEMPTY)
1057*0Sstevel@tonic-gate 		numpart = cp_numparts_nonempty;
1058*0Sstevel@tonic-gate 
1059*0Sstevel@tonic-gate 	return (numpart);
1060*0Sstevel@tonic-gate }
1061*0Sstevel@tonic-gate 
1062*0Sstevel@tonic-gate int
1063*0Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr)
1064*0Sstevel@tonic-gate {
1065*0Sstevel@tonic-gate 	cpupart_t *cp;
1066*0Sstevel@tonic-gate 
1067*0Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1068*0Sstevel@tonic-gate 
1069*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
1070*0Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
1071*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
1072*0Sstevel@tonic-gate 		return (EINVAL);
1073*0Sstevel@tonic-gate 	}
1074*0Sstevel@tonic-gate 	/*
1075*0Sstevel@tonic-gate 	 * PSET_NOESCAPE attribute for default cpu partition is always set
1076*0Sstevel@tonic-gate 	 */
1077*0Sstevel@tonic-gate 	if (cp == &cp_default && !(attr & PSET_NOESCAPE)) {
1078*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
1079*0Sstevel@tonic-gate 		return (EINVAL);
1080*0Sstevel@tonic-gate 	}
1081*0Sstevel@tonic-gate 	cp->cp_attr = attr;
1082*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
1083*0Sstevel@tonic-gate 	return (0);
1084*0Sstevel@tonic-gate }
1085*0Sstevel@tonic-gate 
1086*0Sstevel@tonic-gate int
1087*0Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp)
1088*0Sstevel@tonic-gate {
1089*0Sstevel@tonic-gate 	cpupart_t *cp;
1090*0Sstevel@tonic-gate 
1091*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
1092*0Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
1093*0Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
1094*0Sstevel@tonic-gate 		return (EINVAL);
1095*0Sstevel@tonic-gate 	}
1096*0Sstevel@tonic-gate 	*attrp = cp->cp_attr;
1097*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
1098*0Sstevel@tonic-gate 	return (0);
1099*0Sstevel@tonic-gate }
1100