xref: /onnv-gate/usr/src/uts/common/os/pool.c (revision 3247:e05001c14ea2)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51336Sgm149974  * Common Development and Distribution License (the "License").
61336Sgm149974  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
221336Sgm149974  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/pool.h>
290Sstevel@tonic-gate #include <sys/pool_impl.h>
300Sstevel@tonic-gate #include <sys/pool_pset.h>
310Sstevel@tonic-gate #include <sys/id_space.h>
320Sstevel@tonic-gate #include <sys/mutex.h>
330Sstevel@tonic-gate #include <sys/nvpair.h>
340Sstevel@tonic-gate #include <sys/cpuvar.h>
350Sstevel@tonic-gate #include <sys/errno.h>
360Sstevel@tonic-gate #include <sys/cmn_err.h>
370Sstevel@tonic-gate #include <sys/systm.h>
380Sstevel@tonic-gate #include <sys/proc.h>
390Sstevel@tonic-gate #include <sys/fss.h>
400Sstevel@tonic-gate #include <sys/class.h>
410Sstevel@tonic-gate #include <sys/exacct.h>
420Sstevel@tonic-gate #include <sys/utsname.h>
430Sstevel@tonic-gate #include <sys/procset.h>
440Sstevel@tonic-gate #include <sys/atomic.h>
450Sstevel@tonic-gate #include <sys/zone.h>
460Sstevel@tonic-gate #include <sys/policy.h>
470Sstevel@tonic-gate 
480Sstevel@tonic-gate /*
490Sstevel@tonic-gate  * RESOURCE POOLS
500Sstevel@tonic-gate  *
510Sstevel@tonic-gate  * The resource pools facility brings together process-bindable resource into
520Sstevel@tonic-gate  * a common abstraction called a pool. Processor sets and other entities can
530Sstevel@tonic-gate  * be configured, grouped, and labelled such that workload components can be
540Sstevel@tonic-gate  * associated with a subset of a system's total resources.
550Sstevel@tonic-gate  *
560Sstevel@tonic-gate  * When disabled, the pools facility is "invisible".  All processes belong
570Sstevel@tonic-gate  * to the same pool (pool_default), and processor sets can be managed through
580Sstevel@tonic-gate  * the old pset() system call.  When enabled, processor sets can only be
590Sstevel@tonic-gate  * managed via the pools facility.  New pools can be created and associated
600Sstevel@tonic-gate  * with processor sets.  Processes can be bound to pools which have non-empty
610Sstevel@tonic-gate  * resource sets.
620Sstevel@tonic-gate  *
630Sstevel@tonic-gate  * Locking: pool_lock() protects global pools state and must be called
640Sstevel@tonic-gate  * before modifying the configuration, or when taking a snapshot of the
650Sstevel@tonic-gate  * configuration.  If pool_lock_intr() is used, the operation may be
660Sstevel@tonic-gate  * interrupted by a signal or a request.
670Sstevel@tonic-gate  *
680Sstevel@tonic-gate  * To prevent processes from being rebound between pools while they are
690Sstevel@tonic-gate  * the middle of an operation which affects resource set bindings, such
700Sstevel@tonic-gate  * operations must be surrounded by calls to pool_barrier_enter() and
710Sstevel@tonic-gate  * pool_barrier_exit().  This mechanism guarantees that such processes will
720Sstevel@tonic-gate  * be stopped either at the beginning or at the end of the barrier so that
730Sstevel@tonic-gate  * the rebind operation can atomically bind the process and its threads
740Sstevel@tonic-gate  * to new resource sets, and then let process run again.
750Sstevel@tonic-gate  *
760Sstevel@tonic-gate  * Lock ordering with respect to other locks is as follows:
770Sstevel@tonic-gate  *
780Sstevel@tonic-gate  * 	pool_lock() -> cpu_lock -> pidlock -> p_lock -> pool_barrier_lock
790Sstevel@tonic-gate  *
800Sstevel@tonic-gate  * Most static and global variables defined in this file are protected
810Sstevel@tonic-gate  * by calling pool_lock().
820Sstevel@tonic-gate  *
830Sstevel@tonic-gate  * The operation that binds tasks and projects to pools is atomic.  That is,
840Sstevel@tonic-gate  * either all processes in a given task or a project will be bound to a
850Sstevel@tonic-gate  * new pool, or (in case of an error) they will be all left bound to the
860Sstevel@tonic-gate  * old pool. Processes in a given task or a given project can only be bound to
870Sstevel@tonic-gate  * different pools if they were rebound individually one by one as single
880Sstevel@tonic-gate  * processes.  Threads or LWPs of the same process do not have pool bindings,
890Sstevel@tonic-gate  * and are bound to the same resource sets associated with the resource pool
900Sstevel@tonic-gate  * of that process.
910Sstevel@tonic-gate  *
920Sstevel@tonic-gate  * The following picture shows one possible pool configuration with three
930Sstevel@tonic-gate  * pools and three processor sets.  Note that processor set "foo" is not
940Sstevel@tonic-gate  * associated with any pools and therefore cannot have any processes
950Sstevel@tonic-gate  * bound to it.  Two pools (default and foo) are associated with the
960Sstevel@tonic-gate  * same processor set (default).  Also, note that processes in Task 2
970Sstevel@tonic-gate  * are bound to different pools.
980Sstevel@tonic-gate  *
990Sstevel@tonic-gate  *
1000Sstevel@tonic-gate  *							       Processor Sets
1010Sstevel@tonic-gate  *								+---------+
1020Sstevel@tonic-gate  *		       +--------------+========================>| default |
1030Sstevel@tonic-gate  *		      a|	      |				+---------+
1040Sstevel@tonic-gate  *		      s|	      |				    ||
1050Sstevel@tonic-gate  *		      s|	      |				+---------+
1060Sstevel@tonic-gate  *		      o|	      |				|   foo   |
1070Sstevel@tonic-gate  *		      c|	      |				+---------+
1080Sstevel@tonic-gate  *		      i|	      |				    ||
1090Sstevel@tonic-gate  *		      a|	      |				+---------+
1100Sstevel@tonic-gate  *		      t|	      |			+------>|   bar   |
1110Sstevel@tonic-gate  *		      e|	      |			|	+---------+
1120Sstevel@tonic-gate  *                    d|              |                 |
1130Sstevel@tonic-gate  *                     |              |                 |
1140Sstevel@tonic-gate  *	       +---------+      +---------+      +---------+
1150Sstevel@tonic-gate  *     Pools   | default |======|   foo   |======|   bar   |
1160Sstevel@tonic-gate  *	       +---------+      +---------+      +---------+
1170Sstevel@tonic-gate  *	           @  @            @              @ @   @
1180Sstevel@tonic-gate  *                b|  |            |              | |   |
1190Sstevel@tonic-gate  *                o|  |            |              | |   |
1200Sstevel@tonic-gate  *                u|  +-----+      |      +-------+ |   +---+
1210Sstevel@tonic-gate  *                n|        |      |      |         |       |
1220Sstevel@tonic-gate  *            ....d|........|......|......|.........|.......|....
1230Sstevel@tonic-gate  *            :    |   ::   |      |      |    ::   |       |   :
1240Sstevel@tonic-gate  *            :  +---+ :: +---+  +---+  +---+  :: +---+   +---+ :
1250Sstevel@tonic-gate  *  Processes :  | p | :: | p |  | p |  | p |  :: | p |...| p | :
1260Sstevel@tonic-gate  *            :  +---+ :: +---+  +---+  +---+  :: +---+   +---+ :
1270Sstevel@tonic-gate  *            :........::......................::...............:
1280Sstevel@tonic-gate  *              Task 1            Task 2              Task N
1290Sstevel@tonic-gate  *                 |                 |                  |
1300Sstevel@tonic-gate  *                 |                 |                  |
1310Sstevel@tonic-gate  *                 |  +-----------+  |             +-----------+
1320Sstevel@tonic-gate  *                 +--| Project 1 |--+             | Project N |
1330Sstevel@tonic-gate  *                    +-----------+                +-----------+
1340Sstevel@tonic-gate  *
1350Sstevel@tonic-gate  * This is just an illustration of relationships between processes, tasks,
1360Sstevel@tonic-gate  * projects, pools, and processor sets. New types of resource sets will be
1370Sstevel@tonic-gate  * added in the future.
1380Sstevel@tonic-gate  */
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate pool_t		*pool_default;	/* default pool which always exists */
1410Sstevel@tonic-gate int		pool_count;	/* number of pools created on this system */
1420Sstevel@tonic-gate int		pool_state;	/* pools state -- enabled/disabled */
1430Sstevel@tonic-gate void		*pool_buf;	/* pre-commit snapshot of the pools state */
1440Sstevel@tonic-gate size_t		pool_bufsz;	/* size of pool_buf */
1450Sstevel@tonic-gate static hrtime_t	pool_pool_mod;	/* last modification time for pools */
1460Sstevel@tonic-gate static hrtime_t	pool_sys_mod;	/* last modification time for system */
1470Sstevel@tonic-gate static nvlist_t	*pool_sys_prop;	/* system properties */
1480Sstevel@tonic-gate static id_space_t *pool_ids;	/* pool ID space */
1490Sstevel@tonic-gate static list_t	pool_list;	/* doubly-linked list of pools */
1500Sstevel@tonic-gate static kmutex_t		pool_mutex;		/* protects pool_busy_* */
1510Sstevel@tonic-gate static kcondvar_t	pool_busy_cv;		/* waiting for "pool_lock" */
1520Sstevel@tonic-gate static kthread_t	*pool_busy_thread;	/* thread holding "pool_lock" */
1530Sstevel@tonic-gate static kmutex_t		pool_barrier_lock;	/* synch. with pool_barrier_* */
1540Sstevel@tonic-gate static kcondvar_t	pool_barrier_cv;	/* synch. with pool_barrier_* */
1550Sstevel@tonic-gate static int		pool_barrier_count;	/* synch. with pool_barrier_* */
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate /*
1580Sstevel@tonic-gate  * Boot-time pool initialization.
1590Sstevel@tonic-gate  */
1600Sstevel@tonic-gate void
1610Sstevel@tonic-gate pool_init(void)
1620Sstevel@tonic-gate {
1630Sstevel@tonic-gate 	pool_ids = id_space_create("pool_ids", POOL_DEFAULT + 1, POOL_MAXID);
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 	/*
1660Sstevel@tonic-gate 	 * Initialize default pool.
1670Sstevel@tonic-gate 	 */
1680Sstevel@tonic-gate 	pool_default = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
1690Sstevel@tonic-gate 	pool_default->pool_id = POOL_DEFAULT;
1700Sstevel@tonic-gate 	list_create(&pool_list, sizeof (pool_t), offsetof(pool_t, pool_link));
1710Sstevel@tonic-gate 	list_insert_head(&pool_list, pool_default);
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate 	/*
1740Sstevel@tonic-gate 	 * Initialize plugins for resource sets.
1750Sstevel@tonic-gate 	 */
1760Sstevel@tonic-gate 	pool_pset_init();
1770Sstevel@tonic-gate 	pool_count = 1;
1780Sstevel@tonic-gate 	p0.p_pool = pool_default;
1790Sstevel@tonic-gate 	global_zone->zone_pool = pool_default;
1800Sstevel@tonic-gate 	pool_default->pool_ref = 1;
1810Sstevel@tonic-gate }
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate /*
1840Sstevel@tonic-gate  * Synchronization routines.
1850Sstevel@tonic-gate  *
1860Sstevel@tonic-gate  * pool_lock is only called from syscall-level routines (processor_bind(),
1870Sstevel@tonic-gate  * pset_*(), and /dev/pool ioctls).  The pool "lock" may be held for long
1880Sstevel@tonic-gate  * periods of time, including across sleeping operations, so we allow its
1890Sstevel@tonic-gate  * acquisition to be interruptible.
1900Sstevel@tonic-gate  *
1910Sstevel@tonic-gate  * The current thread that owns the "lock" is stored in the variable
1920Sstevel@tonic-gate  * pool_busy_thread, both to let pool_lock_held() work and to aid debugging.
1930Sstevel@tonic-gate  */
1940Sstevel@tonic-gate void
1950Sstevel@tonic-gate pool_lock(void)
1960Sstevel@tonic-gate {
1970Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
1980Sstevel@tonic-gate 	while (pool_busy_thread != NULL)
1990Sstevel@tonic-gate 		cv_wait(&pool_busy_cv, &pool_mutex);
2000Sstevel@tonic-gate 	pool_busy_thread = curthread;
2010Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
2020Sstevel@tonic-gate }
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate int
2050Sstevel@tonic-gate pool_lock_intr(void)
2060Sstevel@tonic-gate {
2070Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
2080Sstevel@tonic-gate 	while (pool_busy_thread != NULL) {
2090Sstevel@tonic-gate 		if (cv_wait_sig(&pool_busy_cv, &pool_mutex) == 0) {
2100Sstevel@tonic-gate 			cv_signal(&pool_busy_cv);
2110Sstevel@tonic-gate 			mutex_exit(&pool_mutex);
2120Sstevel@tonic-gate 			return (1);
2130Sstevel@tonic-gate 		}
2140Sstevel@tonic-gate 	}
2150Sstevel@tonic-gate 	pool_busy_thread = curthread;
2160Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
2170Sstevel@tonic-gate 	return (0);
2180Sstevel@tonic-gate }
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate int
2210Sstevel@tonic-gate pool_lock_held(void)
2220Sstevel@tonic-gate {
2230Sstevel@tonic-gate 	return (pool_busy_thread == curthread);
2240Sstevel@tonic-gate }
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate void
2270Sstevel@tonic-gate pool_unlock(void)
2280Sstevel@tonic-gate {
2290Sstevel@tonic-gate 	mutex_enter(&pool_mutex);
2300Sstevel@tonic-gate 	pool_busy_thread = NULL;
2310Sstevel@tonic-gate 	cv_signal(&pool_busy_cv);
2320Sstevel@tonic-gate 	mutex_exit(&pool_mutex);
2330Sstevel@tonic-gate }
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate /*
2360Sstevel@tonic-gate  * Routines allowing fork(), exec(), exit(), and lwp_create() to synchronize
2370Sstevel@tonic-gate  * with pool_do_bind().
2380Sstevel@tonic-gate  *
2390Sstevel@tonic-gate  * Calls to pool_barrier_enter() and pool_barrier_exit() must bracket all
2400Sstevel@tonic-gate  * operations which modify pool or pset associations.  They can be called
2410Sstevel@tonic-gate  * while the process is multi-threaded.  In the common case, when current
2420Sstevel@tonic-gate  * process is not being rebound (PBWAIT flag is not set), these functions
2430Sstevel@tonic-gate  * will be just incrementing and decrementing reference counts.
2440Sstevel@tonic-gate  */
2450Sstevel@tonic-gate void
2460Sstevel@tonic-gate pool_barrier_enter(void)
2470Sstevel@tonic-gate {
2480Sstevel@tonic-gate 	proc_t *p = curproc;
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
2510Sstevel@tonic-gate 	while (p->p_poolflag & PBWAIT)
2520Sstevel@tonic-gate 		cv_wait(&p->p_poolcv, &p->p_lock);
2530Sstevel@tonic-gate 	p->p_poolcnt++;
2540Sstevel@tonic-gate }
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate void
2570Sstevel@tonic-gate pool_barrier_exit(void)
2580Sstevel@tonic-gate {
2590Sstevel@tonic-gate 	proc_t *p = curproc;
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&p->p_lock));
2620Sstevel@tonic-gate 	ASSERT(p->p_poolcnt > 0);
2630Sstevel@tonic-gate 	p->p_poolcnt--;
2640Sstevel@tonic-gate 	if (p->p_poolflag & PBWAIT) {
2650Sstevel@tonic-gate 		mutex_enter(&pool_barrier_lock);
2660Sstevel@tonic-gate 		ASSERT(pool_barrier_count > 0);
2670Sstevel@tonic-gate 		pool_barrier_count--;
2680Sstevel@tonic-gate 		if (pool_barrier_count == 0)
2690Sstevel@tonic-gate 			cv_signal(&pool_barrier_cv);
2700Sstevel@tonic-gate 		mutex_exit(&pool_barrier_lock);
2710Sstevel@tonic-gate 		while (p->p_poolflag & PBWAIT)
2720Sstevel@tonic-gate 			cv_wait(&p->p_poolcv, &p->p_lock);
2730Sstevel@tonic-gate 	}
2740Sstevel@tonic-gate }
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate /*
2770Sstevel@tonic-gate  * Enable pools facility.
2780Sstevel@tonic-gate  */
2790Sstevel@tonic-gate static int
2800Sstevel@tonic-gate pool_enable(void)
2810Sstevel@tonic-gate {
2820Sstevel@tonic-gate 	int ret;
2830Sstevel@tonic-gate 
2840Sstevel@tonic-gate 	ASSERT(pool_lock_held());
2850Sstevel@tonic-gate 	ASSERT(pool_count == 1);
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate 	ret = pool_pset_enable();
2880Sstevel@tonic-gate 	if (ret != 0)
2890Sstevel@tonic-gate 		return (ret);
2900Sstevel@tonic-gate 	(void) nvlist_alloc(&pool_sys_prop, NV_UNIQUE_NAME, KM_SLEEP);
2910Sstevel@tonic-gate 	(void) nvlist_add_string(pool_sys_prop, "system.name",
2921336Sgm149974 	    "default");
2930Sstevel@tonic-gate 	(void) nvlist_add_string(pool_sys_prop, "system.comment", "");
2940Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_sys_prop, "system.version", 1);
2950Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_sys_prop, "system.bind-default", 1);
296*3247Sgjelinek 	(void) nvlist_add_string(pool_sys_prop, "system.poold.objectives",
297*3247Sgjelinek 	    "wt-load");
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	(void) nvlist_alloc(&pool_default->pool_props,
3000Sstevel@tonic-gate 	    NV_UNIQUE_NAME, KM_SLEEP);
3010Sstevel@tonic-gate 	(void) nvlist_add_string(pool_default->pool_props,
3020Sstevel@tonic-gate 	    "pool.name", "pool_default");
3030Sstevel@tonic-gate 	(void) nvlist_add_string(pool_default->pool_props, "pool.comment", "");
3040Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_default->pool_props, "pool.default", 1);
3050Sstevel@tonic-gate 	(void) nvlist_add_byte(pool_default->pool_props, "pool.active", 1);
3060Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_default->pool_props,
3070Sstevel@tonic-gate 	    "pool.importance", 1);
3080Sstevel@tonic-gate 	(void) nvlist_add_int64(pool_default->pool_props, "pool.sys_id",
3090Sstevel@tonic-gate 	    pool_default->pool_id);
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	pool_sys_mod = pool_pool_mod = gethrtime();
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	return (ret);
3140Sstevel@tonic-gate }
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate /*
3170Sstevel@tonic-gate  * Disable pools facility.
3180Sstevel@tonic-gate  */
3190Sstevel@tonic-gate static int
3200Sstevel@tonic-gate pool_disable(void)
3210Sstevel@tonic-gate {
3220Sstevel@tonic-gate 	int ret;
3230Sstevel@tonic-gate 
3240Sstevel@tonic-gate 	ASSERT(pool_lock_held());
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate 	if (pool_count > 1)	/* must destroy all pools first */
3270Sstevel@tonic-gate 		return (EBUSY);
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	ret = pool_pset_disable();
3300Sstevel@tonic-gate 	if (ret != 0)
3310Sstevel@tonic-gate 		return (ret);
3320Sstevel@tonic-gate 	if (pool_sys_prop != NULL) {
3330Sstevel@tonic-gate 		nvlist_free(pool_sys_prop);
3340Sstevel@tonic-gate 		pool_sys_prop = NULL;
3350Sstevel@tonic-gate 	}
3360Sstevel@tonic-gate 	if (pool_default->pool_props != NULL) {
3370Sstevel@tonic-gate 		nvlist_free(pool_default->pool_props);
3380Sstevel@tonic-gate 		pool_default->pool_props = NULL;
3390Sstevel@tonic-gate 	}
3400Sstevel@tonic-gate 	return (0);
3410Sstevel@tonic-gate }
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate pool_t *
3440Sstevel@tonic-gate pool_lookup_pool_by_name(char *name)
3450Sstevel@tonic-gate {
3460Sstevel@tonic-gate 	pool_t *pool = pool_default;
3470Sstevel@tonic-gate 	char *p;
3480Sstevel@tonic-gate 
3490Sstevel@tonic-gate 	ASSERT(pool_lock_held());
3500Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
3510Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
3520Sstevel@tonic-gate 		if (nvlist_lookup_string(pool->pool_props,
3530Sstevel@tonic-gate 		    "pool.name", &p) == 0 && strcmp(name, p) == 0)
3540Sstevel@tonic-gate 			return (pool);
3550Sstevel@tonic-gate 	}
3560Sstevel@tonic-gate 	return (NULL);
3570Sstevel@tonic-gate }
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate pool_t *
3600Sstevel@tonic-gate pool_lookup_pool_by_id(poolid_t poolid)
3610Sstevel@tonic-gate {
3620Sstevel@tonic-gate 	pool_t *pool = pool_default;
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 	ASSERT(pool_lock_held());
3650Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
3660Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
3670Sstevel@tonic-gate 		if (pool->pool_id == poolid)
3680Sstevel@tonic-gate 			return (pool);
3690Sstevel@tonic-gate 	}
3700Sstevel@tonic-gate 	return (NULL);
3710Sstevel@tonic-gate }
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate /*
3740Sstevel@tonic-gate  * Create new pool, associate it with default resource sets, and give
3750Sstevel@tonic-gate  * it a temporary name.
3760Sstevel@tonic-gate  */
3770Sstevel@tonic-gate static int
3780Sstevel@tonic-gate pool_pool_create(poolid_t *poolid)
3790Sstevel@tonic-gate {
3800Sstevel@tonic-gate 	pool_t *pool;
3810Sstevel@tonic-gate 	char pool_name[40];
3820Sstevel@tonic-gate 
3830Sstevel@tonic-gate 	ASSERT(pool_lock_held());
3840Sstevel@tonic-gate 
3850Sstevel@tonic-gate 	pool = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
3860Sstevel@tonic-gate 	pool->pool_id = *poolid = id_alloc(pool_ids);
3870Sstevel@tonic-gate 	pool->pool_pset = pool_pset_default;
3880Sstevel@tonic-gate 	pool_pset_default->pset_npools++;
3890Sstevel@tonic-gate 	list_insert_tail(&pool_list, pool);
3900Sstevel@tonic-gate 	(void) nvlist_alloc(&pool->pool_props, NV_UNIQUE_NAME, KM_SLEEP);
3910Sstevel@tonic-gate 	(void) nvlist_add_int64(pool->pool_props, "pool.sys_id", pool->pool_id);
3920Sstevel@tonic-gate 	(void) nvlist_add_byte(pool->pool_props, "pool.default", 0);
3930Sstevel@tonic-gate 	pool_pool_mod = gethrtime();
3940Sstevel@tonic-gate 	(void) snprintf(pool_name, sizeof (pool_name), "pool_%lld",
3950Sstevel@tonic-gate 	    pool_pool_mod);
3960Sstevel@tonic-gate 	(void) nvlist_add_string(pool->pool_props, "pool.name", pool_name);
3970Sstevel@tonic-gate 	pool_count++;
3980Sstevel@tonic-gate 	return (0);
3990Sstevel@tonic-gate }
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate struct destroy_zone_arg {
4020Sstevel@tonic-gate 	pool_t *old;
4030Sstevel@tonic-gate 	pool_t *new;
4040Sstevel@tonic-gate };
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate /*
4070Sstevel@tonic-gate  * Update pool pointers for zones that are currently bound to pool "old"
4080Sstevel@tonic-gate  * to be bound to pool "new".
4090Sstevel@tonic-gate  */
4100Sstevel@tonic-gate static int
4110Sstevel@tonic-gate pool_destroy_zone_cb(zone_t *zone, void *arg)
4120Sstevel@tonic-gate {
4130Sstevel@tonic-gate 	struct destroy_zone_arg *dza = arg;
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 	ASSERT(pool_lock_held());
4160Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	if (zone_pool_get(zone) == dza->old)
4190Sstevel@tonic-gate 		zone_pool_set(zone, dza->new);
4200Sstevel@tonic-gate 	return (0);
4210Sstevel@tonic-gate }
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate /*
4240Sstevel@tonic-gate  * Destroy specified pool, and rebind all processes in it
4250Sstevel@tonic-gate  * to the default pool.
4260Sstevel@tonic-gate  */
4270Sstevel@tonic-gate static int
4280Sstevel@tonic-gate pool_pool_destroy(poolid_t poolid)
4290Sstevel@tonic-gate {
4300Sstevel@tonic-gate 	pool_t *pool;
4310Sstevel@tonic-gate 	int ret;
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate 	ASSERT(pool_lock_held());
4340Sstevel@tonic-gate 
4350Sstevel@tonic-gate 	if (poolid == POOL_DEFAULT)
4360Sstevel@tonic-gate 		return (EINVAL);
4370Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
4380Sstevel@tonic-gate 		return (ESRCH);
4390Sstevel@tonic-gate 	ret = pool_do_bind(pool_default, P_POOLID, poolid, POOL_BIND_ALL);
4400Sstevel@tonic-gate 	if (ret == 0) {
4410Sstevel@tonic-gate 		struct destroy_zone_arg dzarg;
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate 		dzarg.old = pool;
4440Sstevel@tonic-gate 		dzarg.new = pool_default;
4450Sstevel@tonic-gate 		mutex_enter(&cpu_lock);
4460Sstevel@tonic-gate 		ret = zone_walk(pool_destroy_zone_cb, &dzarg);
4470Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
4480Sstevel@tonic-gate 		ASSERT(ret == 0);
4490Sstevel@tonic-gate 		ASSERT(pool->pool_ref == 0);
4500Sstevel@tonic-gate 		(void) nvlist_free(pool->pool_props);
4510Sstevel@tonic-gate 		id_free(pool_ids, pool->pool_id);
4520Sstevel@tonic-gate 		pool->pool_pset->pset_npools--;
4530Sstevel@tonic-gate 		list_remove(&pool_list, pool);
4540Sstevel@tonic-gate 		pool_count--;
4550Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
4560Sstevel@tonic-gate 		kmem_free(pool, sizeof (pool_t));
4570Sstevel@tonic-gate 	}
4580Sstevel@tonic-gate 	return (ret);
4590Sstevel@tonic-gate }
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate /*
4620Sstevel@tonic-gate  * Create new pool or resource set.
4630Sstevel@tonic-gate  */
4640Sstevel@tonic-gate int
4650Sstevel@tonic-gate pool_create(int class, int subclass, id_t *id)
4660Sstevel@tonic-gate {
4670Sstevel@tonic-gate 	int ret;
4680Sstevel@tonic-gate 
4690Sstevel@tonic-gate 	ASSERT(pool_lock_held());
4700Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
4710Sstevel@tonic-gate 		return (ENOTACTIVE);
4720Sstevel@tonic-gate 	switch (class) {
4730Sstevel@tonic-gate 	case PEC_POOL:
4740Sstevel@tonic-gate 		ret = pool_pool_create((poolid_t *)id);
4750Sstevel@tonic-gate 		break;
4760Sstevel@tonic-gate 	case PEC_RES_COMP:
4770Sstevel@tonic-gate 		switch (subclass) {
4780Sstevel@tonic-gate 		case PREC_PSET:
4790Sstevel@tonic-gate 			ret = pool_pset_create((psetid_t *)id);
4800Sstevel@tonic-gate 			break;
4810Sstevel@tonic-gate 		default:
4820Sstevel@tonic-gate 			ret = EINVAL;
4830Sstevel@tonic-gate 		}
4840Sstevel@tonic-gate 		break;
4850Sstevel@tonic-gate 	case PEC_RES_AGG:
4860Sstevel@tonic-gate 		ret = ENOTSUP;
4870Sstevel@tonic-gate 		break;
4880Sstevel@tonic-gate 	default:
4890Sstevel@tonic-gate 		ret = EINVAL;
4900Sstevel@tonic-gate 	}
4910Sstevel@tonic-gate 	return (ret);
4920Sstevel@tonic-gate }
4930Sstevel@tonic-gate 
4940Sstevel@tonic-gate /*
4950Sstevel@tonic-gate  * Destroy an existing pool or resource set.
4960Sstevel@tonic-gate  */
4970Sstevel@tonic-gate int
4980Sstevel@tonic-gate pool_destroy(int class, int subclass, id_t id)
4990Sstevel@tonic-gate {
5000Sstevel@tonic-gate 	int ret;
5010Sstevel@tonic-gate 
5020Sstevel@tonic-gate 	ASSERT(pool_lock_held());
5030Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
5040Sstevel@tonic-gate 		return (ENOTACTIVE);
5050Sstevel@tonic-gate 	switch (class) {
5060Sstevel@tonic-gate 	case PEC_POOL:
5070Sstevel@tonic-gate 		ret = pool_pool_destroy((poolid_t)id);
5080Sstevel@tonic-gate 		break;
5090Sstevel@tonic-gate 	case PEC_RES_COMP:
5100Sstevel@tonic-gate 		switch (subclass) {
5110Sstevel@tonic-gate 		case PREC_PSET:
5120Sstevel@tonic-gate 			ret = pool_pset_destroy((psetid_t)id);
5130Sstevel@tonic-gate 			break;
5140Sstevel@tonic-gate 		default:
5150Sstevel@tonic-gate 			ret = EINVAL;
5160Sstevel@tonic-gate 		}
5170Sstevel@tonic-gate 		break;
5180Sstevel@tonic-gate 	case PEC_RES_AGG:
5190Sstevel@tonic-gate 		ret = ENOTSUP;
5200Sstevel@tonic-gate 		break;
5210Sstevel@tonic-gate 	default:
5220Sstevel@tonic-gate 		ret = EINVAL;
5230Sstevel@tonic-gate 	}
5240Sstevel@tonic-gate 	return (ret);
5250Sstevel@tonic-gate }
5260Sstevel@tonic-gate 
5270Sstevel@tonic-gate /*
5280Sstevel@tonic-gate  * Enable or disable pools.
5290Sstevel@tonic-gate  */
5300Sstevel@tonic-gate int
5310Sstevel@tonic-gate pool_status(int status)
5320Sstevel@tonic-gate {
5330Sstevel@tonic-gate 	int ret = 0;
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate 	ASSERT(pool_lock_held());
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 	if (pool_state == status)
5380Sstevel@tonic-gate 		return (0);
5390Sstevel@tonic-gate 	switch (status) {
5400Sstevel@tonic-gate 	case POOL_ENABLED:
5410Sstevel@tonic-gate 		ret = pool_enable();
5420Sstevel@tonic-gate 		if (ret != 0)
5430Sstevel@tonic-gate 			return (ret);
5440Sstevel@tonic-gate 		pool_state = POOL_ENABLED;
5450Sstevel@tonic-gate 		break;
5460Sstevel@tonic-gate 	case POOL_DISABLED:
5470Sstevel@tonic-gate 		ret = pool_disable();
5480Sstevel@tonic-gate 		if (ret != 0)
5490Sstevel@tonic-gate 			return (ret);
5500Sstevel@tonic-gate 		pool_state = POOL_DISABLED;
5510Sstevel@tonic-gate 		break;
5520Sstevel@tonic-gate 	default:
5530Sstevel@tonic-gate 		ret = EINVAL;
5540Sstevel@tonic-gate 	}
5550Sstevel@tonic-gate 	return (ret);
5560Sstevel@tonic-gate }
5570Sstevel@tonic-gate 
5580Sstevel@tonic-gate /*
5590Sstevel@tonic-gate  * Associate pool with resource set.
5600Sstevel@tonic-gate  */
5610Sstevel@tonic-gate int
5620Sstevel@tonic-gate pool_assoc(poolid_t poolid, int idtype, id_t id)
5630Sstevel@tonic-gate {
5640Sstevel@tonic-gate 	int ret;
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	ASSERT(pool_lock_held());
5670Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
5680Sstevel@tonic-gate 		return (ENOTACTIVE);
5690Sstevel@tonic-gate 	switch (idtype) {
5700Sstevel@tonic-gate 	case PREC_PSET:
5710Sstevel@tonic-gate 		ret = pool_pset_assoc(poolid, (psetid_t)id);
5720Sstevel@tonic-gate 		break;
5730Sstevel@tonic-gate 	default:
5740Sstevel@tonic-gate 		ret = EINVAL;
5750Sstevel@tonic-gate 	}
5760Sstevel@tonic-gate 	if (ret == 0)
5770Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
5780Sstevel@tonic-gate 	return (ret);
5790Sstevel@tonic-gate }
5800Sstevel@tonic-gate 
5810Sstevel@tonic-gate /*
5820Sstevel@tonic-gate  * Disassociate resource set from pool.
5830Sstevel@tonic-gate  */
5840Sstevel@tonic-gate int
5850Sstevel@tonic-gate pool_dissoc(poolid_t poolid, int idtype)
5860Sstevel@tonic-gate {
5870Sstevel@tonic-gate 	int ret;
5880Sstevel@tonic-gate 
5890Sstevel@tonic-gate 	ASSERT(pool_lock_held());
5900Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
5910Sstevel@tonic-gate 		return (ENOTACTIVE);
5920Sstevel@tonic-gate 	switch (idtype) {
5930Sstevel@tonic-gate 	case PREC_PSET:
5940Sstevel@tonic-gate 		ret = pool_pset_assoc(poolid, PS_NONE);
5950Sstevel@tonic-gate 		break;
5960Sstevel@tonic-gate 	default:
5970Sstevel@tonic-gate 		ret = EINVAL;
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 	if (ret == 0)
6000Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
6010Sstevel@tonic-gate 	return (ret);
6020Sstevel@tonic-gate }
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate /*
6050Sstevel@tonic-gate  * Transfer specified quantity of resources between resource sets.
6060Sstevel@tonic-gate  */
6070Sstevel@tonic-gate /*ARGSUSED*/
6080Sstevel@tonic-gate int
6090Sstevel@tonic-gate pool_transfer(int type, id_t src, id_t dst, uint64_t qty)
6100Sstevel@tonic-gate {
6110Sstevel@tonic-gate 	int ret = EINVAL;
6120Sstevel@tonic-gate 	return (ret);
6130Sstevel@tonic-gate }
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate /*
6160Sstevel@tonic-gate  * Transfer resources specified by their IDs between resource sets.
6170Sstevel@tonic-gate  */
6180Sstevel@tonic-gate int
6190Sstevel@tonic-gate pool_xtransfer(int type, id_t src, id_t dst, uint_t size, id_t *ids)
6200Sstevel@tonic-gate {
6210Sstevel@tonic-gate 	int ret;
6220Sstevel@tonic-gate 
6230Sstevel@tonic-gate 	ASSERT(pool_lock_held());
6240Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
6250Sstevel@tonic-gate 		return (ENOTACTIVE);
6260Sstevel@tonic-gate 	switch (type) {
6270Sstevel@tonic-gate 	case PREC_PSET:
6280Sstevel@tonic-gate 		ret = pool_pset_xtransfer((psetid_t)src, (psetid_t)dst,
6290Sstevel@tonic-gate 		    size, ids);
6300Sstevel@tonic-gate 		break;
6310Sstevel@tonic-gate 	default:
6320Sstevel@tonic-gate 		ret = EINVAL;
6330Sstevel@tonic-gate 	}
6340Sstevel@tonic-gate 	return (ret);
6350Sstevel@tonic-gate }
6360Sstevel@tonic-gate 
6370Sstevel@tonic-gate /*
6380Sstevel@tonic-gate  * Bind processes to pools.
6390Sstevel@tonic-gate  */
6400Sstevel@tonic-gate int
6410Sstevel@tonic-gate pool_bind(poolid_t poolid, idtype_t idtype, id_t id)
6420Sstevel@tonic-gate {
6430Sstevel@tonic-gate 	pool_t *pool;
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 	ASSERT(pool_lock_held());
6460Sstevel@tonic-gate 
6470Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
6480Sstevel@tonic-gate 		return (ENOTACTIVE);
6490Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
6500Sstevel@tonic-gate 		return (ESRCH);
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate 	switch (idtype) {
6530Sstevel@tonic-gate 	case P_PID:
6540Sstevel@tonic-gate 	case P_TASKID:
6550Sstevel@tonic-gate 	case P_PROJID:
6560Sstevel@tonic-gate 	case P_ZONEID:
6570Sstevel@tonic-gate 		break;
6580Sstevel@tonic-gate 	default:
6590Sstevel@tonic-gate 		return (EINVAL);
6600Sstevel@tonic-gate 	}
6610Sstevel@tonic-gate 	return (pool_do_bind(pool, idtype, id, POOL_BIND_ALL));
6620Sstevel@tonic-gate }
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate /*
6650Sstevel@tonic-gate  * Query pool binding of the specifed process.
6660Sstevel@tonic-gate  */
6670Sstevel@tonic-gate int
6680Sstevel@tonic-gate pool_query_binding(idtype_t idtype, id_t id, id_t *poolid)
6690Sstevel@tonic-gate {
6700Sstevel@tonic-gate 	proc_t *p;
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	if (idtype != P_PID)
6730Sstevel@tonic-gate 		return (ENOTSUP);
6740Sstevel@tonic-gate 	if (id == P_MYID)
6750Sstevel@tonic-gate 		id = curproc->p_pid;
6760Sstevel@tonic-gate 
6770Sstevel@tonic-gate 	ASSERT(pool_lock_held());
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate 	mutex_enter(&pidlock);
6800Sstevel@tonic-gate 	if ((p = prfind((pid_t)id)) == NULL) {
6810Sstevel@tonic-gate 		mutex_exit(&pidlock);
6820Sstevel@tonic-gate 		return (ESRCH);
6830Sstevel@tonic-gate 	}
6840Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
6850Sstevel@tonic-gate 	/*
6860Sstevel@tonic-gate 	 * In local zones, lie about pool bindings of processes from
6870Sstevel@tonic-gate 	 * the global zone.
6880Sstevel@tonic-gate 	 */
6890Sstevel@tonic-gate 	if (!INGLOBALZONE(curproc) && INGLOBALZONE(p)) {
6900Sstevel@tonic-gate 		pool_t *pool;
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 		pool = zone_pool_get(curproc->p_zone);
6930Sstevel@tonic-gate 		*poolid = pool->pool_id;
6940Sstevel@tonic-gate 	} else {
6950Sstevel@tonic-gate 		*poolid = p->p_pool->pool_id;
6960Sstevel@tonic-gate 	}
6970Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
6980Sstevel@tonic-gate 	mutex_exit(&pidlock);
6990Sstevel@tonic-gate 	return (0);
7000Sstevel@tonic-gate }
7010Sstevel@tonic-gate 
7020Sstevel@tonic-gate static ea_object_t *
7030Sstevel@tonic-gate pool_system_pack(void)
7040Sstevel@tonic-gate {
7050Sstevel@tonic-gate 	ea_object_t *eo_system;
7060Sstevel@tonic-gate 	size_t bufsz = 0;
7070Sstevel@tonic-gate 	char *buf = NULL;
7080Sstevel@tonic-gate 
7090Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7100Sstevel@tonic-gate 
7110Sstevel@tonic-gate 	eo_system = ea_alloc_group(EXT_GROUP | EXC_LOCAL | EXD_GROUP_SYSTEM);
7120Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_sys_mod, sizeof (hrtime_t),
7130Sstevel@tonic-gate 	    EXC_LOCAL | EXD_SYSTEM_TSTAMP | EXT_UINT64);
7140Sstevel@tonic-gate 	if (INGLOBALZONE(curproc))
7150Sstevel@tonic-gate 		(void) ea_attach_item(eo_system, &pool_pool_mod,
7160Sstevel@tonic-gate 		    sizeof (hrtime_t),
7170Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
7180Sstevel@tonic-gate 	else
7190Sstevel@tonic-gate 		(void) ea_attach_item(eo_system,
7200Sstevel@tonic-gate 		    &curproc->p_zone->zone_pool_mod,
7210Sstevel@tonic-gate 		    sizeof (hrtime_t),
7220Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
7230Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_pset_mod, sizeof (hrtime_t),
7240Sstevel@tonic-gate 	    EXC_LOCAL | EXD_PSET_TSTAMP | EXT_UINT64);
7250Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, &pool_cpu_mod, sizeof (hrtime_t),
7260Sstevel@tonic-gate 	    EXC_LOCAL | EXD_CPU_TSTAMP | EXT_UINT64);
7270Sstevel@tonic-gate 	(void) nvlist_pack(pool_sys_prop, &buf, &bufsz, NV_ENCODE_NATIVE, 0);
7280Sstevel@tonic-gate 	(void) ea_attach_item(eo_system, buf, bufsz,
7290Sstevel@tonic-gate 	    EXC_LOCAL | EXD_SYSTEM_PROP | EXT_RAW);
7300Sstevel@tonic-gate 	kmem_free(buf, bufsz);
7310Sstevel@tonic-gate 	return (eo_system);
7320Sstevel@tonic-gate }
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate /*
7350Sstevel@tonic-gate  * Pack information about pools and attach it to specified exacct group.
7360Sstevel@tonic-gate  */
7370Sstevel@tonic-gate static int
7380Sstevel@tonic-gate pool_pool_pack(ea_object_t *eo_system)
7390Sstevel@tonic-gate {
7400Sstevel@tonic-gate 	ea_object_t *eo_pool;
7410Sstevel@tonic-gate 	pool_t *pool;
7420Sstevel@tonic-gate 	size_t bufsz;
7430Sstevel@tonic-gate 	char *buf;
7440Sstevel@tonic-gate 	pool_t *myzonepool;
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7470Sstevel@tonic-gate 	myzonepool = zone_pool_get(curproc->p_zone);
7480Sstevel@tonic-gate 	for (pool = list_head(&pool_list); pool;
7490Sstevel@tonic-gate 	    pool = list_next(&pool_list, pool)) {
7500Sstevel@tonic-gate 		if (!INGLOBALZONE(curproc) && myzonepool != pool)
7510Sstevel@tonic-gate 			continue;
7520Sstevel@tonic-gate 		bufsz = 0;
7530Sstevel@tonic-gate 		buf = NULL;
7540Sstevel@tonic-gate 		eo_pool = ea_alloc_group(EXT_GROUP |
7550Sstevel@tonic-gate 		    EXC_LOCAL | EXD_GROUP_POOL);
7560Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, &pool->pool_id, sizeof (id_t),
7570Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_POOLID | EXT_UINT32);
7580Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, &pool->pool_pset->pset_id,
7590Sstevel@tonic-gate 		    sizeof (id_t), EXC_LOCAL | EXD_POOL_PSETID | EXT_UINT32);
7600Sstevel@tonic-gate 		(void) nvlist_pack(pool->pool_props, &buf, &bufsz,
7610Sstevel@tonic-gate 		    NV_ENCODE_NATIVE, 0);
7620Sstevel@tonic-gate 		(void) ea_attach_item(eo_pool, buf, bufsz,
7630Sstevel@tonic-gate 		    EXC_LOCAL | EXD_POOL_PROP | EXT_RAW);
7640Sstevel@tonic-gate 		kmem_free(buf, bufsz);
7650Sstevel@tonic-gate 		(void) ea_attach_to_group(eo_system, eo_pool);
7660Sstevel@tonic-gate 	}
7670Sstevel@tonic-gate 	return (0);
7680Sstevel@tonic-gate }
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate /*
7710Sstevel@tonic-gate  * Pack the whole pool configuration in the specified buffer.
7720Sstevel@tonic-gate  */
7730Sstevel@tonic-gate int
7740Sstevel@tonic-gate pool_pack_conf(void *kbuf, size_t kbufsz, size_t *asize)
7750Sstevel@tonic-gate {
7760Sstevel@tonic-gate 	ea_object_t *eo_system;
7770Sstevel@tonic-gate 	size_t ksize;
7780Sstevel@tonic-gate 	int ret = 0;
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate 	eo_system = pool_system_pack();		/* 1. pack system */
7830Sstevel@tonic-gate 	(void) pool_pool_pack(eo_system);	/* 2. pack all pools */
7840Sstevel@tonic-gate 	(void) pool_pset_pack(eo_system);	/* 3. pack all psets */
7850Sstevel@tonic-gate 	ksize = ea_pack_object(eo_system, NULL, 0);
7860Sstevel@tonic-gate 	if (kbuf == NULL || kbufsz == 0)
7870Sstevel@tonic-gate 		*asize = ksize;
7880Sstevel@tonic-gate 	else if (ksize > kbufsz)
7890Sstevel@tonic-gate 		ret = ENOMEM;
7900Sstevel@tonic-gate 	else
7910Sstevel@tonic-gate 		*asize = ea_pack_object(eo_system, kbuf, kbufsz);
7920Sstevel@tonic-gate 	ea_free_object(eo_system, EUP_ALLOC);
7930Sstevel@tonic-gate 	return (ret);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate /*
7970Sstevel@tonic-gate  * Start/end the commit transaction.  If commit transaction is currently
7980Sstevel@tonic-gate  * in progress, then all POOL_QUERY ioctls will return pools configuration
7990Sstevel@tonic-gate  * at the beginning of transaction.
8000Sstevel@tonic-gate  */
8010Sstevel@tonic-gate int
8020Sstevel@tonic-gate pool_commit(int state)
8030Sstevel@tonic-gate {
8040Sstevel@tonic-gate 	ea_object_t *eo_system;
8050Sstevel@tonic-gate 	int ret = 0;
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
8100Sstevel@tonic-gate 		return (ENOTACTIVE);
8110Sstevel@tonic-gate 	switch (state) {
8120Sstevel@tonic-gate 	case 1:
8130Sstevel@tonic-gate 		/*
8140Sstevel@tonic-gate 		 * Beginning commit transation.
8150Sstevel@tonic-gate 		 */
8160Sstevel@tonic-gate 		if (pool_buf != NULL)		/* transaction in progress */
8170Sstevel@tonic-gate 			return (EBUSY);
8180Sstevel@tonic-gate 		eo_system = pool_system_pack();		/* 1. pack system */
8190Sstevel@tonic-gate 		(void) pool_pool_pack(eo_system);	/* 2. pack all pools */
8200Sstevel@tonic-gate 		(void) pool_pset_pack(eo_system);	/* 3. pack all psets */
8210Sstevel@tonic-gate 		pool_bufsz = ea_pack_object(eo_system, NULL, 0);
8220Sstevel@tonic-gate 		pool_buf = kmem_alloc(pool_bufsz, KM_SLEEP);
8230Sstevel@tonic-gate 		pool_bufsz = ea_pack_object(eo_system, pool_buf, pool_bufsz);
8240Sstevel@tonic-gate 		ea_free_object(eo_system, EUP_ALLOC);
8250Sstevel@tonic-gate 		break;
8260Sstevel@tonic-gate 	case 0:
8270Sstevel@tonic-gate 		/*
8280Sstevel@tonic-gate 		 * Finishing commit transaction.
8290Sstevel@tonic-gate 		 */
8300Sstevel@tonic-gate 		if (pool_buf != NULL) {
8310Sstevel@tonic-gate 			kmem_free(pool_buf, pool_bufsz);
8320Sstevel@tonic-gate 			pool_buf = NULL;
8330Sstevel@tonic-gate 			pool_bufsz = 0;
8340Sstevel@tonic-gate 		}
8350Sstevel@tonic-gate 		break;
8360Sstevel@tonic-gate 	default:
8370Sstevel@tonic-gate 		ret = EINVAL;
8380Sstevel@tonic-gate 	}
8390Sstevel@tonic-gate 	return (ret);
8400Sstevel@tonic-gate }
8410Sstevel@tonic-gate 
8420Sstevel@tonic-gate /*
8430Sstevel@tonic-gate  * Check is the specified property is special
8440Sstevel@tonic-gate  */
8450Sstevel@tonic-gate static pool_property_t *
8460Sstevel@tonic-gate pool_property_find(char *name, pool_property_t *list)
8470Sstevel@tonic-gate {
8480Sstevel@tonic-gate 	pool_property_t *prop;
8490Sstevel@tonic-gate 
8500Sstevel@tonic-gate 	for (prop = list; prop->pp_name != NULL; prop++)
8510Sstevel@tonic-gate 		if (strcmp(prop->pp_name, name) == 0)
8520Sstevel@tonic-gate 			return (prop);
8530Sstevel@tonic-gate 	return (NULL);
8540Sstevel@tonic-gate }
8550Sstevel@tonic-gate 
8560Sstevel@tonic-gate static pool_property_t pool_prop_sys[] = {
8570Sstevel@tonic-gate 	{ "system.name",		DATA_TYPE_STRING,	PP_RDWR },
8580Sstevel@tonic-gate 	{ "system.comment",		DATA_TYPE_STRING,	PP_RDWR },
8590Sstevel@tonic-gate 	{ "system.version",		DATA_TYPE_UINT64,	PP_READ },
8600Sstevel@tonic-gate 	{ "system.bind-default",	DATA_TYPE_BYTE,		PP_RDWR },
8610Sstevel@tonic-gate 	{ "system.allocate-method",	DATA_TYPE_STRING,
8620Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
8630Sstevel@tonic-gate 	{ "system.poold.log-level",	DATA_TYPE_STRING,
8640Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
8650Sstevel@tonic-gate 	{ "system.poold.log-location",	DATA_TYPE_STRING,
8660Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
8670Sstevel@tonic-gate 	{ "system.poold.monitor-interval",	DATA_TYPE_UINT64,
8680Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
8690Sstevel@tonic-gate 	{ "system.poold.history-file",	DATA_TYPE_STRING,
8700Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
8710Sstevel@tonic-gate 	{ "system.poold.objectives",	DATA_TYPE_STRING,
8720Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
8730Sstevel@tonic-gate 	{ NULL,				0,			0 }
8740Sstevel@tonic-gate };
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate static pool_property_t pool_prop_pool[] = {
8770Sstevel@tonic-gate 	{ "pool.sys_id",		DATA_TYPE_UINT64,	PP_READ },
8780Sstevel@tonic-gate 	{ "pool.name",			DATA_TYPE_STRING,	PP_RDWR },
8790Sstevel@tonic-gate 	{ "pool.default",		DATA_TYPE_BYTE,		PP_READ },
8800Sstevel@tonic-gate 	{ "pool.active",		DATA_TYPE_BYTE,		PP_RDWR },
8810Sstevel@tonic-gate 	{ "pool.importance",		DATA_TYPE_INT64,	PP_RDWR },
8820Sstevel@tonic-gate 	{ "pool.comment",		DATA_TYPE_STRING,	PP_RDWR },
8830Sstevel@tonic-gate 	{ "pool.scheduler",		DATA_TYPE_STRING,
8840Sstevel@tonic-gate 	    PP_RDWR | PP_OPTIONAL },
8850Sstevel@tonic-gate 	{ NULL,				0,			0 }
8860Sstevel@tonic-gate };
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate /*
8890Sstevel@tonic-gate  * Common routine to put new property on the specified list
8900Sstevel@tonic-gate  */
8910Sstevel@tonic-gate int
8920Sstevel@tonic-gate pool_propput_common(nvlist_t *nvlist, nvpair_t *pair, pool_property_t *props)
8930Sstevel@tonic-gate {
8940Sstevel@tonic-gate 	pool_property_t *prop;
8950Sstevel@tonic-gate 
8960Sstevel@tonic-gate 	if ((prop = pool_property_find(nvpair_name(pair), props)) != NULL) {
8970Sstevel@tonic-gate 		/*
8980Sstevel@tonic-gate 		 * No read-only properties or properties with bad types
8990Sstevel@tonic-gate 		 */
9000Sstevel@tonic-gate 		if (!(prop->pp_perm & PP_WRITE) ||
9010Sstevel@tonic-gate 		    prop->pp_type != nvpair_type(pair))
9020Sstevel@tonic-gate 			return (EINVAL);
9030Sstevel@tonic-gate 	}
9040Sstevel@tonic-gate 	return (nvlist_add_nvpair(nvlist, pair));
9050Sstevel@tonic-gate }
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate /*
9080Sstevel@tonic-gate  * Common routine to remove property from the given list
9090Sstevel@tonic-gate  */
9100Sstevel@tonic-gate int
9110Sstevel@tonic-gate pool_proprm_common(nvlist_t *nvlist, char *name, pool_property_t *props)
9120Sstevel@tonic-gate {
9130Sstevel@tonic-gate 	pool_property_t *prop;
9140Sstevel@tonic-gate 
9150Sstevel@tonic-gate 	if ((prop = pool_property_find(name, props)) != NULL) {
9160Sstevel@tonic-gate 		if (!(prop->pp_perm & PP_OPTIONAL))
9170Sstevel@tonic-gate 			return (EINVAL);
9180Sstevel@tonic-gate 	}
9190Sstevel@tonic-gate 	return (nvlist_remove_all(nvlist, name));
9200Sstevel@tonic-gate }
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate static int
9230Sstevel@tonic-gate pool_system_propput(nvpair_t *pair)
9240Sstevel@tonic-gate {
9250Sstevel@tonic-gate 	int ret;
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9280Sstevel@tonic-gate 	ret = pool_propput_common(pool_sys_prop, pair, pool_prop_sys);
9290Sstevel@tonic-gate 	if (ret == 0)
9300Sstevel@tonic-gate 		pool_sys_mod = gethrtime();
9310Sstevel@tonic-gate 	return (ret);
9320Sstevel@tonic-gate }
9330Sstevel@tonic-gate 
9340Sstevel@tonic-gate static int
9350Sstevel@tonic-gate pool_system_proprm(char *name)
9360Sstevel@tonic-gate {
9370Sstevel@tonic-gate 	int ret;
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9400Sstevel@tonic-gate 	ret = pool_proprm_common(pool_sys_prop, name, pool_prop_sys);
9410Sstevel@tonic-gate 	if (ret == 0)
9420Sstevel@tonic-gate 		pool_sys_mod = gethrtime();
9430Sstevel@tonic-gate 	return (ret);
9440Sstevel@tonic-gate }
9450Sstevel@tonic-gate 
9460Sstevel@tonic-gate static int
9470Sstevel@tonic-gate pool_pool_propput(poolid_t poolid, nvpair_t *pair)
9480Sstevel@tonic-gate {
9490Sstevel@tonic-gate 	pool_t *pool;
9500Sstevel@tonic-gate 	int ret;
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9530Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
9540Sstevel@tonic-gate 		return (ESRCH);
9550Sstevel@tonic-gate 	ret = pool_propput_common(pool->pool_props, pair, pool_prop_pool);
9560Sstevel@tonic-gate 	if (ret == 0)
9570Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
9580Sstevel@tonic-gate 	return (ret);
9590Sstevel@tonic-gate }
9600Sstevel@tonic-gate 
9610Sstevel@tonic-gate static int
9620Sstevel@tonic-gate pool_pool_proprm(poolid_t poolid, char *name)
9630Sstevel@tonic-gate {
9640Sstevel@tonic-gate 	int ret;
9650Sstevel@tonic-gate 	pool_t *pool;
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9680Sstevel@tonic-gate 	if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
9690Sstevel@tonic-gate 		return (ESRCH);
9700Sstevel@tonic-gate 	ret = pool_proprm_common(pool->pool_props, name, pool_prop_pool);
9710Sstevel@tonic-gate 	if (ret == 0)
9720Sstevel@tonic-gate 		pool_pool_mod = gethrtime();
9730Sstevel@tonic-gate 	return (ret);
9740Sstevel@tonic-gate }
9750Sstevel@tonic-gate 
9760Sstevel@tonic-gate int
9770Sstevel@tonic-gate pool_propput(int class, int subclass, id_t id, nvpair_t *pair)
9780Sstevel@tonic-gate {
9790Sstevel@tonic-gate 	int ret;
9800Sstevel@tonic-gate 
9810Sstevel@tonic-gate 	ASSERT(pool_lock_held());
9820Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
9830Sstevel@tonic-gate 		return (ENOTACTIVE);
9840Sstevel@tonic-gate 	switch (class) {
9850Sstevel@tonic-gate 	case PEC_SYSTEM:
9860Sstevel@tonic-gate 		ret = pool_system_propput(pair);
9870Sstevel@tonic-gate 		break;
9880Sstevel@tonic-gate 	case PEC_POOL:
9890Sstevel@tonic-gate 		ret = pool_pool_propput((poolid_t)id, pair);
9900Sstevel@tonic-gate 		break;
9910Sstevel@tonic-gate 	case PEC_RES_COMP:
9920Sstevel@tonic-gate 		switch (subclass) {
9930Sstevel@tonic-gate 		case PREC_PSET:
9940Sstevel@tonic-gate 			ret = pool_pset_propput((psetid_t)id, pair);
9950Sstevel@tonic-gate 			break;
9960Sstevel@tonic-gate 		default:
9970Sstevel@tonic-gate 			ret = EINVAL;
9980Sstevel@tonic-gate 		}
9990Sstevel@tonic-gate 		break;
10000Sstevel@tonic-gate 	case PEC_RES_AGG:
10010Sstevel@tonic-gate 		ret = ENOTSUP;
10020Sstevel@tonic-gate 		break;
10030Sstevel@tonic-gate 	case PEC_COMP:
10040Sstevel@tonic-gate 		switch (subclass) {
10050Sstevel@tonic-gate 		case PCEC_CPU:
10060Sstevel@tonic-gate 			ret = pool_cpu_propput((processorid_t)id, pair);
10070Sstevel@tonic-gate 			break;
10080Sstevel@tonic-gate 		default:
10090Sstevel@tonic-gate 			ret = EINVAL;
10100Sstevel@tonic-gate 		}
10110Sstevel@tonic-gate 		break;
10120Sstevel@tonic-gate 	default:
10130Sstevel@tonic-gate 		ret = EINVAL;
10140Sstevel@tonic-gate 	}
10150Sstevel@tonic-gate 	return (ret);
10160Sstevel@tonic-gate }
10170Sstevel@tonic-gate 
10180Sstevel@tonic-gate int
10190Sstevel@tonic-gate pool_proprm(int class, int subclass, id_t id, char *name)
10200Sstevel@tonic-gate {
10210Sstevel@tonic-gate 	int ret;
10220Sstevel@tonic-gate 
10230Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10240Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
10250Sstevel@tonic-gate 		return (ENOTACTIVE);
10260Sstevel@tonic-gate 	switch (class) {
10270Sstevel@tonic-gate 	case PEC_SYSTEM:
10280Sstevel@tonic-gate 		ret = pool_system_proprm(name);
10290Sstevel@tonic-gate 		break;
10300Sstevel@tonic-gate 	case PEC_POOL:
10310Sstevel@tonic-gate 		ret = pool_pool_proprm((poolid_t)id, name);
10320Sstevel@tonic-gate 		break;
10330Sstevel@tonic-gate 	case PEC_RES_COMP:
10340Sstevel@tonic-gate 		switch (subclass) {
10350Sstevel@tonic-gate 		case PREC_PSET:
10360Sstevel@tonic-gate 			ret = pool_pset_proprm((psetid_t)id, name);
10370Sstevel@tonic-gate 			break;
10380Sstevel@tonic-gate 		default:
10390Sstevel@tonic-gate 			ret = EINVAL;
10400Sstevel@tonic-gate 		}
10410Sstevel@tonic-gate 		break;
10420Sstevel@tonic-gate 	case PEC_RES_AGG:
10430Sstevel@tonic-gate 		ret = ENOTSUP;
10440Sstevel@tonic-gate 		break;
10450Sstevel@tonic-gate 	case PEC_COMP:
10460Sstevel@tonic-gate 		switch (subclass) {
10470Sstevel@tonic-gate 		case PCEC_CPU:
10480Sstevel@tonic-gate 			ret = pool_cpu_proprm((processorid_t)id, name);
10490Sstevel@tonic-gate 			break;
10500Sstevel@tonic-gate 		default:
10510Sstevel@tonic-gate 			ret = EINVAL;
10520Sstevel@tonic-gate 		}
10530Sstevel@tonic-gate 		break;
10540Sstevel@tonic-gate 	default:
10550Sstevel@tonic-gate 		ret = EINVAL;
10560Sstevel@tonic-gate 	}
10570Sstevel@tonic-gate 	return (ret);
10580Sstevel@tonic-gate }
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate int
10610Sstevel@tonic-gate pool_propget(char *name, int class, int subclass, id_t id, nvlist_t **nvlp)
10620Sstevel@tonic-gate {
10630Sstevel@tonic-gate 	int ret;
10640Sstevel@tonic-gate 	nvlist_t *nvl;
10650Sstevel@tonic-gate 
10660Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10670Sstevel@tonic-gate 	if (pool_state == POOL_DISABLED)
10680Sstevel@tonic-gate 		return (ENOTACTIVE);
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate 	(void) nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
10710Sstevel@tonic-gate 
10720Sstevel@tonic-gate 	switch (class) {
10730Sstevel@tonic-gate 	case PEC_SYSTEM:
10740Sstevel@tonic-gate 	case PEC_POOL:
10750Sstevel@tonic-gate 		ret = EINVAL;
10760Sstevel@tonic-gate 		break;
10770Sstevel@tonic-gate 	case PEC_RES_COMP:
10780Sstevel@tonic-gate 		switch (subclass) {
10790Sstevel@tonic-gate 		case PREC_PSET:
10800Sstevel@tonic-gate 			ret = pool_pset_propget((psetid_t)id, name, nvl);
10810Sstevel@tonic-gate 			break;
10820Sstevel@tonic-gate 		default:
10830Sstevel@tonic-gate 			ret = EINVAL;
10840Sstevel@tonic-gate 		}
10850Sstevel@tonic-gate 		break;
10860Sstevel@tonic-gate 	case PEC_RES_AGG:
10870Sstevel@tonic-gate 		ret = ENOTSUP;
10880Sstevel@tonic-gate 		break;
10890Sstevel@tonic-gate 	case PEC_COMP:
10900Sstevel@tonic-gate 		switch (subclass) {
10910Sstevel@tonic-gate 		case PCEC_CPU:
10920Sstevel@tonic-gate 			ret = pool_cpu_propget((processorid_t)id, name, nvl);
10930Sstevel@tonic-gate 			break;
10940Sstevel@tonic-gate 		default:
10950Sstevel@tonic-gate 			ret = EINVAL;
10960Sstevel@tonic-gate 		}
10970Sstevel@tonic-gate 		break;
10980Sstevel@tonic-gate 	default:
10990Sstevel@tonic-gate 		ret = EINVAL;
11000Sstevel@tonic-gate 	}
11010Sstevel@tonic-gate 	if (ret == 0)
11020Sstevel@tonic-gate 		*nvlp = nvl;
11030Sstevel@tonic-gate 	else
11040Sstevel@tonic-gate 		nvlist_free(nvl);
11050Sstevel@tonic-gate 	return (ret);
11060Sstevel@tonic-gate }
11070Sstevel@tonic-gate 
11080Sstevel@tonic-gate /*
11090Sstevel@tonic-gate  * pool_bind_wake and pool_bind_wakeall are helper functions to undo PBWAITs
11100Sstevel@tonic-gate  * in case of failure in pool_do_bind().
11110Sstevel@tonic-gate  */
11120Sstevel@tonic-gate static void
11130Sstevel@tonic-gate pool_bind_wake(proc_t *p)
11140Sstevel@tonic-gate {
11150Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11160Sstevel@tonic-gate 
11170Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11180Sstevel@tonic-gate 	ASSERT(p->p_poolflag & PBWAIT);
11190Sstevel@tonic-gate 	if (p->p_poolcnt > 0) {
11200Sstevel@tonic-gate 		mutex_enter(&pool_barrier_lock);
11210Sstevel@tonic-gate 		pool_barrier_count -= p->p_poolcnt;
11220Sstevel@tonic-gate 		mutex_exit(&pool_barrier_lock);
11230Sstevel@tonic-gate 	}
11240Sstevel@tonic-gate 	p->p_poolflag &= ~PBWAIT;
11250Sstevel@tonic-gate 	cv_signal(&p->p_poolcv);
11260Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11270Sstevel@tonic-gate }
11280Sstevel@tonic-gate 
11290Sstevel@tonic-gate static void
11300Sstevel@tonic-gate pool_bind_wakeall(proc_t **procs)
11310Sstevel@tonic-gate {
11320Sstevel@tonic-gate 	proc_t *p, **pp;
11330Sstevel@tonic-gate 
11340Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11350Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++)
11360Sstevel@tonic-gate 		pool_bind_wake(p);
11370Sstevel@tonic-gate }
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate /*
11400Sstevel@tonic-gate  * Return the scheduling class for this pool, or
11410Sstevel@tonic-gate  * 	POOL_CLASS_UNSET if not set
11420Sstevel@tonic-gate  * 	POOL_CLASS_INVAL if set to an invalid class ID.
11430Sstevel@tonic-gate  */
11440Sstevel@tonic-gate id_t
11450Sstevel@tonic-gate pool_get_class(pool_t *pool)
11460Sstevel@tonic-gate {
11470Sstevel@tonic-gate 	char *name;
11480Sstevel@tonic-gate 	id_t cid;
11490Sstevel@tonic-gate 
11500Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 	if (nvlist_lookup_string(pool->pool_props, "pool.scheduler",
11530Sstevel@tonic-gate 	    &name) == 0) {
11540Sstevel@tonic-gate 		if (getcidbyname(name, &cid) == 0)
11550Sstevel@tonic-gate 			return (cid);
11560Sstevel@tonic-gate 		else
11570Sstevel@tonic-gate 			return (POOL_CLASS_INVAL);
11580Sstevel@tonic-gate 	}
11590Sstevel@tonic-gate 	return (POOL_CLASS_UNSET);
11600Sstevel@tonic-gate }
11610Sstevel@tonic-gate 
11620Sstevel@tonic-gate /*
11630Sstevel@tonic-gate  * Move process to the new scheduling class.
11640Sstevel@tonic-gate  */
11650Sstevel@tonic-gate static void
11660Sstevel@tonic-gate pool_change_class(proc_t *p, id_t cid)
11670Sstevel@tonic-gate {
11680Sstevel@tonic-gate 	kthread_t *t;
11690Sstevel@tonic-gate 	void *cldata;
11700Sstevel@tonic-gate 	id_t oldcid;
11710Sstevel@tonic-gate 	void **bufs;
11720Sstevel@tonic-gate 	void **buf;
11730Sstevel@tonic-gate 	int nlwp;
11740Sstevel@tonic-gate 	int ret;
11750Sstevel@tonic-gate 	int i;
11760Sstevel@tonic-gate 
11770Sstevel@tonic-gate 	/*
11780Sstevel@tonic-gate 	 * Do not move kernel processes (such as zsched).
11790Sstevel@tonic-gate 	 */
11800Sstevel@tonic-gate 	if (p->p_flag & SSYS)
11810Sstevel@tonic-gate 		return;
11820Sstevel@tonic-gate 	/*
11830Sstevel@tonic-gate 	 * This process is in the pool barrier, so it can't possibly be
11840Sstevel@tonic-gate 	 * adding new threads and we can use p_lwpcnt + p_zombcnt + 1
11850Sstevel@tonic-gate 	 * (for possible agent LWP which doesn't use pool barrier) as
11860Sstevel@tonic-gate 	 * our upper bound.
11870Sstevel@tonic-gate 	 */
11880Sstevel@tonic-gate 	nlwp = p->p_lwpcnt + p->p_zombcnt + 1;
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 	/*
11910Sstevel@tonic-gate 	 * Pre-allocate scheduling class specific buffers before
11920Sstevel@tonic-gate 	 * grabbing p_lock.
11930Sstevel@tonic-gate 	 */
11940Sstevel@tonic-gate 	bufs = kmem_zalloc(nlwp * sizeof (void *), KM_SLEEP);
11950Sstevel@tonic-gate 	for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
11960Sstevel@tonic-gate 		ret = CL_ALLOC(buf, cid, KM_SLEEP);
11970Sstevel@tonic-gate 		ASSERT(ret == 0);
11980Sstevel@tonic-gate 	}
11990Sstevel@tonic-gate 
12000Sstevel@tonic-gate 	/*
12010Sstevel@tonic-gate 	 * Move threads one by one to the new scheduling class.
12020Sstevel@tonic-gate 	 * This never fails because we have all the right
12030Sstevel@tonic-gate 	 * privileges here.
12040Sstevel@tonic-gate 	 */
12050Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
12060Sstevel@tonic-gate 	ASSERT(p->p_poolflag & PBWAIT);
12070Sstevel@tonic-gate 	buf = bufs;
12080Sstevel@tonic-gate 	t = p->p_tlist;
12090Sstevel@tonic-gate 	ASSERT(t != NULL);
12100Sstevel@tonic-gate 	do {
12110Sstevel@tonic-gate 		if (t->t_cid != cid) {
12120Sstevel@tonic-gate 			oldcid = t->t_cid;
12130Sstevel@tonic-gate 			cldata = t->t_cldata;
12140Sstevel@tonic-gate 			ret = CL_ENTERCLASS(t, cid, NULL, NULL, *buf);
12150Sstevel@tonic-gate 			ASSERT(ret == 0);
12160Sstevel@tonic-gate 			CL_EXITCLASS(oldcid, cldata);
12170Sstevel@tonic-gate 			*buf++ = NULL;
12180Sstevel@tonic-gate 		}
12190Sstevel@tonic-gate 	} while ((t = t->t_forw) != p->p_tlist);
12200Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
12210Sstevel@tonic-gate 	/*
12220Sstevel@tonic-gate 	 * Free unused scheduling class specific buffers.
12230Sstevel@tonic-gate 	 */
12240Sstevel@tonic-gate 	for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
12250Sstevel@tonic-gate 		if (*buf != NULL) {
12260Sstevel@tonic-gate 			CL_FREE(cid, *buf);
12270Sstevel@tonic-gate 			*buf = NULL;
12280Sstevel@tonic-gate 		}
12290Sstevel@tonic-gate 	}
12300Sstevel@tonic-gate 	kmem_free(bufs, nlwp * sizeof (void *));
12310Sstevel@tonic-gate }
12320Sstevel@tonic-gate 
12330Sstevel@tonic-gate /*
12340Sstevel@tonic-gate  * The meat of the bind operation.  The steps in pool_do_bind are:
12350Sstevel@tonic-gate  *
12360Sstevel@tonic-gate  * 1) Set PBWAIT in the p_poolflag of any process of interest, and add all
12370Sstevel@tonic-gate  *    such processes to an array.  For any interesting process that has
12380Sstevel@tonic-gate  *    threads inside the pool barrier set, increment a counter by the
12390Sstevel@tonic-gate  *    count of such threads.  Once PBWAIT is set on a process, that process
12400Sstevel@tonic-gate  *    will not disappear.
12410Sstevel@tonic-gate  *
12420Sstevel@tonic-gate  * 2) Wait for the counter from step 2 to drop to zero.  Any process which
12430Sstevel@tonic-gate  *    calls pool_barrier_exit() and notices that PBWAIT has been set on it
12440Sstevel@tonic-gate  *    will decrement that counter before going to sleep, and the process
12450Sstevel@tonic-gate  *    calling pool_barrier_exit() which does the final decrement will wake us.
12460Sstevel@tonic-gate  *
12470Sstevel@tonic-gate  * 3) For each interesting process, perform a calculation on it to see if
12480Sstevel@tonic-gate  *    the bind will actually succeed.  This uses the following three
12490Sstevel@tonic-gate  *    resource-set-specific functions:
12500Sstevel@tonic-gate  *
12510Sstevel@tonic-gate  *    - int set_bind_start(procs, pool)
12520Sstevel@tonic-gate  *
12530Sstevel@tonic-gate  *      Determine whether the given array of processes can be bound to the
12540Sstevel@tonic-gate  *      resource set associated with the given pool.  If it can, take and hold
12550Sstevel@tonic-gate  *      any locks necessary to ensure that the operation will succeed, and
12560Sstevel@tonic-gate  *      make any necessary reservations in the target resource set.  If it
12570Sstevel@tonic-gate  *      can't, return failure with no reservations made and no new locks held.
12580Sstevel@tonic-gate  *
12590Sstevel@tonic-gate  *    - void set_bind_abort(procs, pool)
12600Sstevel@tonic-gate  *
12610Sstevel@tonic-gate  *      set_bind_start() has completed successfully, but another resource set's
12620Sstevel@tonic-gate  *      set_bind_start() has failed, and we haven't begun the bind yet.  Undo
12630Sstevel@tonic-gate  *      any reservations made and drop any locks acquired by our
12640Sstevel@tonic-gate  *      set_bind_start().
12650Sstevel@tonic-gate  *
12660Sstevel@tonic-gate  *    - void set_bind_finish(void)
12670Sstevel@tonic-gate  *
12680Sstevel@tonic-gate  *      The bind has completed successfully.  The processes have been released,
12690Sstevel@tonic-gate  *      and the reservation acquired in set_bind_start() has been depleted as
12700Sstevel@tonic-gate  *      the processes have finished their bindings.  Drop any locks acquired by
12710Sstevel@tonic-gate  *      set_bind_start().
12720Sstevel@tonic-gate  *
12730Sstevel@tonic-gate  * 4) If we've decided that we can proceed with the bind, iterate through
12740Sstevel@tonic-gate  *    the list of interesting processes, grab the necessary locks (which
12750Sstevel@tonic-gate  *    may differ per resource set), perform the bind, and ASSERT that it
12760Sstevel@tonic-gate  *    succeeds.  Once a process has been rebound, it can be awakened.
12770Sstevel@tonic-gate  *
12780Sstevel@tonic-gate  * The operations from step 4 must be kept in sync with anything which might
12790Sstevel@tonic-gate  * cause the bind operations (e.g., cpupart_bind_thread()) to fail, and
12800Sstevel@tonic-gate  * are thus located in the same source files as the associated bind operations.
12810Sstevel@tonic-gate  */
12820Sstevel@tonic-gate int
12830Sstevel@tonic-gate pool_do_bind(pool_t *pool, idtype_t idtype, id_t id, int flags)
12840Sstevel@tonic-gate {
12850Sstevel@tonic-gate 	extern uint_t nproc;
12860Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(curthread);
12870Sstevel@tonic-gate 	proc_t **pp, **procs;
12880Sstevel@tonic-gate 	proc_t *prstart;
12890Sstevel@tonic-gate 	int procs_count = 0;
12900Sstevel@tonic-gate 	kproject_t *kpj;
12910Sstevel@tonic-gate 	procset_t set;
12920Sstevel@tonic-gate 	zone_t *zone;
12930Sstevel@tonic-gate 	int procs_size;
12940Sstevel@tonic-gate 	int rv = 0;
12950Sstevel@tonic-gate 	proc_t *p;
12960Sstevel@tonic-gate 	id_t cid = -1;
12970Sstevel@tonic-gate 
12980Sstevel@tonic-gate 	ASSERT(pool_lock_held());
12990Sstevel@tonic-gate 
13000Sstevel@tonic-gate 	if ((cid = pool_get_class(pool)) == POOL_CLASS_INVAL)
13010Sstevel@tonic-gate 		return (EINVAL);
13020Sstevel@tonic-gate 
13030Sstevel@tonic-gate 	if (idtype == P_ZONEID) {
13040Sstevel@tonic-gate 		zone = zone_find_by_id(id);
13050Sstevel@tonic-gate 		if (zone == NULL)
13060Sstevel@tonic-gate 			return (ESRCH);
13070Sstevel@tonic-gate 		if (zone_status_get(zone) > ZONE_IS_RUNNING) {
13080Sstevel@tonic-gate 			zone_rele(zone);
13090Sstevel@tonic-gate 			return (EBUSY);
13100Sstevel@tonic-gate 		}
13110Sstevel@tonic-gate 	}
13120Sstevel@tonic-gate 
13130Sstevel@tonic-gate 	if (idtype == P_PROJID) {
1314*3247Sgjelinek 		kpj = project_hold_by_id(id, global_zone, PROJECT_HOLD_FIND);
13150Sstevel@tonic-gate 		if (kpj == NULL)
13160Sstevel@tonic-gate 			return (ESRCH);
13170Sstevel@tonic-gate 		mutex_enter(&kpj->kpj_poolbind);
13180Sstevel@tonic-gate 	}
13190Sstevel@tonic-gate 
13200Sstevel@tonic-gate 	if (idtype == P_PID) {
13210Sstevel@tonic-gate 		/*
13220Sstevel@tonic-gate 		 * Fast-path for a single process case.
13230Sstevel@tonic-gate 		 */
13240Sstevel@tonic-gate 		procs_size = 2;	/* procs is NULL-terminated */
13250Sstevel@tonic-gate 		procs = kmem_zalloc(procs_size * sizeof (proc_t *), KM_SLEEP);
13260Sstevel@tonic-gate 		mutex_enter(&pidlock);
13270Sstevel@tonic-gate 	} else {
13280Sstevel@tonic-gate 		/*
13290Sstevel@tonic-gate 		 * We will need enough slots for proc_t pointers for as many as
13300Sstevel@tonic-gate 		 * twice the number of currently running processes (assuming
13310Sstevel@tonic-gate 		 * that each one could be in fork() creating a new child).
13320Sstevel@tonic-gate 		 */
13330Sstevel@tonic-gate 		for (;;) {
13340Sstevel@tonic-gate 			procs_size = nproc * 2;
13350Sstevel@tonic-gate 			procs = kmem_zalloc(procs_size * sizeof (proc_t *),
13360Sstevel@tonic-gate 			    KM_SLEEP);
13370Sstevel@tonic-gate 			mutex_enter(&pidlock);
13380Sstevel@tonic-gate 
13390Sstevel@tonic-gate 			if (nproc * 2 <= procs_size)
13400Sstevel@tonic-gate 				break;
13410Sstevel@tonic-gate 			/*
13420Sstevel@tonic-gate 			 * If nproc has changed, try again.
13430Sstevel@tonic-gate 			 */
13440Sstevel@tonic-gate 			mutex_exit(&pidlock);
13450Sstevel@tonic-gate 			kmem_free(procs, procs_size * sizeof (proc_t *));
13460Sstevel@tonic-gate 		}
13470Sstevel@tonic-gate 	}
13480Sstevel@tonic-gate 
13490Sstevel@tonic-gate 	if (id == P_MYID)
13500Sstevel@tonic-gate 		id = getmyid(idtype);
13510Sstevel@tonic-gate 	setprocset(&set, POP_AND, idtype, id, P_ALL, 0);
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 	/*
13540Sstevel@tonic-gate 	 * Do a first scan, and select target processes.
13550Sstevel@tonic-gate 	 */
13560Sstevel@tonic-gate 	if (idtype == P_PID)
13570Sstevel@tonic-gate 		prstart = prfind(id);
13580Sstevel@tonic-gate 	else
13590Sstevel@tonic-gate 		prstart = practive;
13600Sstevel@tonic-gate 	for (p = prstart, pp = procs; p != NULL; p = p->p_next) {
13610Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
13620Sstevel@tonic-gate 		/*
13630Sstevel@tonic-gate 		 * Skip processes that don't match our (id, idtype) set or
13640Sstevel@tonic-gate 		 * on the way of becoming zombies.  Skip kernel processes
13650Sstevel@tonic-gate 		 * from the global zone.
13660Sstevel@tonic-gate 		 */
13670Sstevel@tonic-gate 		if (procinset(p, &set) == 0 ||
13680Sstevel@tonic-gate 		    p->p_poolflag & PEXITED ||
13690Sstevel@tonic-gate 		    ((p->p_flag & SSYS) && INGLOBALZONE(p))) {
13700Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
13710Sstevel@tonic-gate 			continue;
13720Sstevel@tonic-gate 		}
13730Sstevel@tonic-gate 		if (!INGLOBALZONE(p)) {
13740Sstevel@tonic-gate 			switch (idtype) {
13750Sstevel@tonic-gate 			case P_PID:
13760Sstevel@tonic-gate 			case P_TASKID:
13770Sstevel@tonic-gate 				/*
13780Sstevel@tonic-gate 				 * Can't bind processes or tasks
13790Sstevel@tonic-gate 				 * in local zones to pools.
13800Sstevel@tonic-gate 				 */
13810Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
13820Sstevel@tonic-gate 				mutex_exit(&pidlock);
13830Sstevel@tonic-gate 				pool_bind_wakeall(procs);
13840Sstevel@tonic-gate 				rv = EINVAL;
13850Sstevel@tonic-gate 				goto out;
13860Sstevel@tonic-gate 			case P_PROJID:
13870Sstevel@tonic-gate 				/*
13880Sstevel@tonic-gate 				 * Only projects in the global
13890Sstevel@tonic-gate 				 * zone can be rebound.
13900Sstevel@tonic-gate 				 */
13910Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
13920Sstevel@tonic-gate 				continue;
13930Sstevel@tonic-gate 			case P_POOLID:
13940Sstevel@tonic-gate 				/*
13950Sstevel@tonic-gate 				 * When rebinding pools, processes can be
13960Sstevel@tonic-gate 				 * in different zones.
13970Sstevel@tonic-gate 				 */
13980Sstevel@tonic-gate 				break;
13990Sstevel@tonic-gate 			}
14000Sstevel@tonic-gate 		}
14010Sstevel@tonic-gate 
14020Sstevel@tonic-gate 		p->p_poolflag |= PBWAIT;
14030Sstevel@tonic-gate 		/*
14040Sstevel@tonic-gate 		 * If some threads in this process are inside the pool
14050Sstevel@tonic-gate 		 * barrier, add them to pool_barrier_count, as we have
14060Sstevel@tonic-gate 		 * to wait for all of them to exit the barrier.
14070Sstevel@tonic-gate 		 */
14080Sstevel@tonic-gate 		if (p->p_poolcnt > 0) {
14090Sstevel@tonic-gate 			mutex_enter(&pool_barrier_lock);
14100Sstevel@tonic-gate 			pool_barrier_count += p->p_poolcnt;
14110Sstevel@tonic-gate 			mutex_exit(&pool_barrier_lock);
14120Sstevel@tonic-gate 		}
14130Sstevel@tonic-gate 		ASSERT(pp < &procs[procs_size]);
14140Sstevel@tonic-gate 		*pp++ = p;
14150Sstevel@tonic-gate 		procs_count++;
14160Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
14170Sstevel@tonic-gate 
14180Sstevel@tonic-gate 		/*
14190Sstevel@tonic-gate 		 * We just found our process, so if we're only rebinding a
14200Sstevel@tonic-gate 		 * single process then get out of this loop.
14210Sstevel@tonic-gate 		 */
14220Sstevel@tonic-gate 		if (idtype == P_PID)
14230Sstevel@tonic-gate 			break;
14240Sstevel@tonic-gate 	}
14250Sstevel@tonic-gate 	*pp = NULL;	/* cap off the end of the array */
14260Sstevel@tonic-gate 	mutex_exit(&pidlock);
14270Sstevel@tonic-gate 
14280Sstevel@tonic-gate 	/*
14290Sstevel@tonic-gate 	 * Wait for relevant processes to stop before they try to enter the
14300Sstevel@tonic-gate 	 * barrier or at the exit from the barrier.  Make sure that we do
14310Sstevel@tonic-gate 	 * not get stopped here while we're holding pool_lock.  If we were
14320Sstevel@tonic-gate 	 * requested to stop, or got a signal then return EAGAIN to let the
14330Sstevel@tonic-gate 	 * library know that it needs to retry.
14340Sstevel@tonic-gate 	 */
14350Sstevel@tonic-gate 	mutex_enter(&pool_barrier_lock);
14360Sstevel@tonic-gate 	lwp->lwp_nostop++;
14370Sstevel@tonic-gate 	while (pool_barrier_count > 0) {
14380Sstevel@tonic-gate 		(void) cv_wait_sig(&pool_barrier_cv, &pool_barrier_lock);
14390Sstevel@tonic-gate 		if (pool_barrier_count > 0) {
14400Sstevel@tonic-gate 			/*
14410Sstevel@tonic-gate 			 * We either got a signal or were requested to
14420Sstevel@tonic-gate 			 * stop by /proc.  Bail out with EAGAIN.  If we were
14430Sstevel@tonic-gate 			 * requested to stop, we'll stop in post_syscall()
14440Sstevel@tonic-gate 			 * on our way back to userland.
14450Sstevel@tonic-gate 			 */
14460Sstevel@tonic-gate 			mutex_exit(&pool_barrier_lock);
14470Sstevel@tonic-gate 			pool_bind_wakeall(procs);
14480Sstevel@tonic-gate 			lwp->lwp_nostop--;
14490Sstevel@tonic-gate 			rv = EAGAIN;
14500Sstevel@tonic-gate 			goto out;
14510Sstevel@tonic-gate 		}
14520Sstevel@tonic-gate 	}
14530Sstevel@tonic-gate 	lwp->lwp_nostop--;
14540Sstevel@tonic-gate 	mutex_exit(&pool_barrier_lock);
14550Sstevel@tonic-gate 
14560Sstevel@tonic-gate 	if (idtype == P_PID)
14570Sstevel@tonic-gate 		goto skip;
14580Sstevel@tonic-gate 
14590Sstevel@tonic-gate 	/*
14600Sstevel@tonic-gate 	 * Do another run, and drop processes that were inside the barrier
14610Sstevel@tonic-gate 	 * in exit(), but when they have dropped to pool_barrier_exit
14620Sstevel@tonic-gate 	 * they have become of no interest to us.  Pick up child processes that
14630Sstevel@tonic-gate 	 * were created by fork() but didn't exist during our first scan.
14640Sstevel@tonic-gate 	 * Their parents are now stopped at pool_barrier_exit in cfork().
14650Sstevel@tonic-gate 	 */
14660Sstevel@tonic-gate 	mutex_enter(&pidlock);
14670Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
14680Sstevel@tonic-gate 		if (p->p_poolflag & PEXITED) {
14690Sstevel@tonic-gate 			ASSERT(p->p_lwpcnt == 0);
14700Sstevel@tonic-gate 			pool_bind_wake(p);
14710Sstevel@tonic-gate 			/* flip w/last non-NULL slot */
14720Sstevel@tonic-gate 			*pp = procs[procs_count - 1];
14730Sstevel@tonic-gate 			procs[procs_count - 1] = NULL;
14740Sstevel@tonic-gate 			procs_count--;
14750Sstevel@tonic-gate 			pp--;			/* try this slot again */
14760Sstevel@tonic-gate 			continue;
14770Sstevel@tonic-gate 		}
14780Sstevel@tonic-gate 		/*
14790Sstevel@tonic-gate 		 * Look at the child and check if it should be rebound also.
14800Sstevel@tonic-gate 		 * We're holding pidlock, so it is safe to reference p_child.
14810Sstevel@tonic-gate 		 */
14820Sstevel@tonic-gate 		if ((p = p->p_child) == NULL)
14830Sstevel@tonic-gate 			continue;
14840Sstevel@tonic-gate 
14850Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
14860Sstevel@tonic-gate 		/*
14870Sstevel@tonic-gate 		 * Skip processes in local zones if we're not binding
14880Sstevel@tonic-gate 		 * zones to pools (P_ZONEID).  Skip kernel processes also.
14890Sstevel@tonic-gate 		 */
14900Sstevel@tonic-gate 		if ((!INGLOBALZONE(p) && idtype != P_ZONEID) ||
14910Sstevel@tonic-gate 		    p->p_flag & SSYS) {
14920Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
14930Sstevel@tonic-gate 			continue;
14940Sstevel@tonic-gate 		}
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 		/*
14970Sstevel@tonic-gate 		 * If the child process has been already created by fork(), has
14980Sstevel@tonic-gate 		 * not exited, and has not been added to the list already,
14990Sstevel@tonic-gate 		 * then add it now.  We will hit this process again (since we
15000Sstevel@tonic-gate 		 * stick it at the end of the procs list) but it will ignored
15010Sstevel@tonic-gate 		 * because it will have the PBWAIT flag set.
15020Sstevel@tonic-gate 		 */
15030Sstevel@tonic-gate 		if (procinset(p, &set) &&
15040Sstevel@tonic-gate 		    !(p->p_poolflag & PEXITED) &&
15050Sstevel@tonic-gate 		    !(p->p_poolflag & PBWAIT)) {
15060Sstevel@tonic-gate 			ASSERT(p->p_child == NULL); /* no child of a child */
15070Sstevel@tonic-gate 			procs[procs_count] = p;
15080Sstevel@tonic-gate 			procs[procs_count + 1] = NULL;
15090Sstevel@tonic-gate 			procs_count++;
15100Sstevel@tonic-gate 			p->p_poolflag |= PBWAIT;
15110Sstevel@tonic-gate 		}
15120Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
15130Sstevel@tonic-gate 	}
15140Sstevel@tonic-gate 	mutex_exit(&pidlock);
15150Sstevel@tonic-gate skip:
15160Sstevel@tonic-gate 	/*
15170Sstevel@tonic-gate 	 * If there's no processes to rebind then return ESRCH, unless
15180Sstevel@tonic-gate 	 * we're associating a pool with new resource set, destroying it,
15190Sstevel@tonic-gate 	 * or binding a zone to a pool.
15200Sstevel@tonic-gate 	 */
15210Sstevel@tonic-gate 	if (procs_count == 0) {
15220Sstevel@tonic-gate 		if (idtype == P_POOLID || idtype == P_ZONEID)
15230Sstevel@tonic-gate 			rv = 0;
15240Sstevel@tonic-gate 		else
15250Sstevel@tonic-gate 			rv = ESRCH;
15260Sstevel@tonic-gate 		goto out;
15270Sstevel@tonic-gate 	}
15280Sstevel@tonic-gate 
15290Sstevel@tonic-gate #ifdef DEBUG
15300Sstevel@tonic-gate 	/*
15310Sstevel@tonic-gate 	 * All processes in the array should have PBWAIT set, and none should
15320Sstevel@tonic-gate 	 * be in the critical section.  Even though p_poolflag is protected by
15330Sstevel@tonic-gate 	 * the p_lock, these assertions should be stable across the dropping of
15340Sstevel@tonic-gate 	 * p_lock.
15350Sstevel@tonic-gate 	 */
15360Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
15370Sstevel@tonic-gate 		ASSERT(p->p_poolflag & PBWAIT);
15380Sstevel@tonic-gate 		ASSERT(p->p_poolcnt == 0);
15390Sstevel@tonic-gate 		ASSERT(procinset(p, &set));
15400Sstevel@tonic-gate 	}
15410Sstevel@tonic-gate #endif
15420Sstevel@tonic-gate 
15430Sstevel@tonic-gate 	/*
15440Sstevel@tonic-gate 	 * Do the check if processor set rebinding is going to succeed or not.
15450Sstevel@tonic-gate 	 */
15460Sstevel@tonic-gate 	if ((flags & POOL_BIND_PSET) &&
15470Sstevel@tonic-gate 	    (rv = pset_bind_start(procs, pool)) != 0) {
15480Sstevel@tonic-gate 		pool_bind_wakeall(procs);
15490Sstevel@tonic-gate 		goto out;
15500Sstevel@tonic-gate 	}
15510Sstevel@tonic-gate 
15520Sstevel@tonic-gate 	/*
15530Sstevel@tonic-gate 	 * At this point, all bind operations should succeed.
15540Sstevel@tonic-gate 	 */
15550Sstevel@tonic-gate 	for (pp = procs; (p = *pp) != NULL; pp++) {
15560Sstevel@tonic-gate 		if (flags & POOL_BIND_PSET) {
15570Sstevel@tonic-gate 			psetid_t psetid = pool->pool_pset->pset_id;
15580Sstevel@tonic-gate 			void *zonebuf;
15590Sstevel@tonic-gate 			void *projbuf;
15600Sstevel@tonic-gate 
15610Sstevel@tonic-gate 			/*
15620Sstevel@tonic-gate 			 * Pre-allocate one buffer for FSS (per-project
15630Sstevel@tonic-gate 			 * buffer for a new pset) in case if this is the
15640Sstevel@tonic-gate 			 * first thread from its current project getting
15650Sstevel@tonic-gate 			 * bound to this processor set.
15660Sstevel@tonic-gate 			 */
15670Sstevel@tonic-gate 			projbuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_PROJ);
15680Sstevel@tonic-gate 			zonebuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_ZONE);
15690Sstevel@tonic-gate 
15700Sstevel@tonic-gate 			mutex_enter(&pidlock);
15710Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
15720Sstevel@tonic-gate 			pool_pset_bind(p, psetid, projbuf, zonebuf);
15730Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
15740Sstevel@tonic-gate 			mutex_exit(&pidlock);
15750Sstevel@tonic-gate 			/*
15760Sstevel@tonic-gate 			 * Free buffers pre-allocated above if it
15770Sstevel@tonic-gate 			 * wasn't actually used.
15780Sstevel@tonic-gate 			 */
15790Sstevel@tonic-gate 			fss_freebuf(projbuf, FSS_ALLOC_PROJ);
15800Sstevel@tonic-gate 			fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
15810Sstevel@tonic-gate 		}
15820Sstevel@tonic-gate 		/*
15830Sstevel@tonic-gate 		 * Now let's change the scheduling class of this
15840Sstevel@tonic-gate 		 * process if our target pool has it defined.
15850Sstevel@tonic-gate 		 */
15860Sstevel@tonic-gate 		if (cid != POOL_CLASS_UNSET)
15870Sstevel@tonic-gate 			pool_change_class(p, cid);
15880Sstevel@tonic-gate 
15890Sstevel@tonic-gate 		/*
15900Sstevel@tonic-gate 		 * It is safe to reference p_pool here without holding
15910Sstevel@tonic-gate 		 * p_lock because it cannot change underneath of us.
15920Sstevel@tonic-gate 		 * We're holding pool_lock here, so nobody else can be
15930Sstevel@tonic-gate 		 * moving this process between pools.  If process "p"
15940Sstevel@tonic-gate 		 * would be exiting, we're guaranteed that it would be blocked
15950Sstevel@tonic-gate 		 * at pool_barrier_enter() in exit().  Otherwise, it would've
15960Sstevel@tonic-gate 		 * been skipped by one of our scans of the practive list
15970Sstevel@tonic-gate 		 * as a process with PEXITED flag set.
15980Sstevel@tonic-gate 		 */
15990Sstevel@tonic-gate 		if (p->p_pool != pool) {
16000Sstevel@tonic-gate 			ASSERT(p->p_pool->pool_ref > 0);
16010Sstevel@tonic-gate 			atomic_add_32(&p->p_pool->pool_ref, -1);
16020Sstevel@tonic-gate 			p->p_pool = pool;
16030Sstevel@tonic-gate 			atomic_add_32(&p->p_pool->pool_ref, 1);
16040Sstevel@tonic-gate 		}
16050Sstevel@tonic-gate 		/*
16060Sstevel@tonic-gate 		 * Okay, we've tortured this guy enough.
16070Sstevel@tonic-gate 		 * Let this poor process go now.
16080Sstevel@tonic-gate 		 */
16090Sstevel@tonic-gate 		pool_bind_wake(p);
16100Sstevel@tonic-gate 	}
16110Sstevel@tonic-gate 	if (flags & POOL_BIND_PSET)
16120Sstevel@tonic-gate 		pset_bind_finish();
16130Sstevel@tonic-gate 
16140Sstevel@tonic-gate out:	switch (idtype) {
16150Sstevel@tonic-gate 	case P_PROJID:
16160Sstevel@tonic-gate 		ASSERT(kpj != NULL);
16170Sstevel@tonic-gate 		mutex_exit(&kpj->kpj_poolbind);
16180Sstevel@tonic-gate 		project_rele(kpj);
16190Sstevel@tonic-gate 		break;
16200Sstevel@tonic-gate 	case P_ZONEID:
16210Sstevel@tonic-gate 		if (rv == 0) {
16220Sstevel@tonic-gate 			mutex_enter(&cpu_lock);
16230Sstevel@tonic-gate 			zone_pool_set(zone, pool);
16240Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
16250Sstevel@tonic-gate 		}
16260Sstevel@tonic-gate 		zone->zone_pool_mod = gethrtime();
16270Sstevel@tonic-gate 		zone_rele(zone);
16280Sstevel@tonic-gate 		break;
16290Sstevel@tonic-gate 	}
16300Sstevel@tonic-gate 
16310Sstevel@tonic-gate 	kmem_free(procs, procs_size * sizeof (proc_t *));
16320Sstevel@tonic-gate 	ASSERT(pool_barrier_count == 0);
16330Sstevel@tonic-gate 	return (rv);
16340Sstevel@tonic-gate }
1635