10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51336Sgm149974 * Common Development and Distribution License (the "License").
61336Sgm149974 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
216247Sraf
220Sstevel@tonic-gate /*
23*12376SMichael.Lim@Sun.COM * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #include <sys/pool.h>
270Sstevel@tonic-gate #include <sys/pool_impl.h>
280Sstevel@tonic-gate #include <sys/pool_pset.h>
290Sstevel@tonic-gate #include <sys/id_space.h>
300Sstevel@tonic-gate #include <sys/mutex.h>
310Sstevel@tonic-gate #include <sys/nvpair.h>
320Sstevel@tonic-gate #include <sys/cpuvar.h>
330Sstevel@tonic-gate #include <sys/errno.h>
340Sstevel@tonic-gate #include <sys/cmn_err.h>
350Sstevel@tonic-gate #include <sys/systm.h>
360Sstevel@tonic-gate #include <sys/proc.h>
370Sstevel@tonic-gate #include <sys/fss.h>
380Sstevel@tonic-gate #include <sys/class.h>
390Sstevel@tonic-gate #include <sys/exacct.h>
400Sstevel@tonic-gate #include <sys/utsname.h>
410Sstevel@tonic-gate #include <sys/procset.h>
420Sstevel@tonic-gate #include <sys/atomic.h>
430Sstevel@tonic-gate #include <sys/zone.h>
440Sstevel@tonic-gate #include <sys/policy.h>
456247Sraf #include <sys/schedctl.h>
4611878SVenu.Iyer@Sun.COM #include <sys/taskq.h>
470Sstevel@tonic-gate
480Sstevel@tonic-gate /*
490Sstevel@tonic-gate * RESOURCE POOLS
500Sstevel@tonic-gate *
510Sstevel@tonic-gate * The resource pools facility brings together process-bindable resource into
520Sstevel@tonic-gate * a common abstraction called a pool. Processor sets and other entities can
530Sstevel@tonic-gate * be configured, grouped, and labelled such that workload components can be
540Sstevel@tonic-gate * associated with a subset of a system's total resources.
550Sstevel@tonic-gate *
560Sstevel@tonic-gate * When disabled, the pools facility is "invisible". All processes belong
570Sstevel@tonic-gate * to the same pool (pool_default), and processor sets can be managed through
580Sstevel@tonic-gate * the old pset() system call. When enabled, processor sets can only be
590Sstevel@tonic-gate * managed via the pools facility. New pools can be created and associated
600Sstevel@tonic-gate * with processor sets. Processes can be bound to pools which have non-empty
610Sstevel@tonic-gate * resource sets.
620Sstevel@tonic-gate *
630Sstevel@tonic-gate * Locking: pool_lock() protects global pools state and must be called
640Sstevel@tonic-gate * before modifying the configuration, or when taking a snapshot of the
650Sstevel@tonic-gate * configuration. If pool_lock_intr() is used, the operation may be
660Sstevel@tonic-gate * interrupted by a signal or a request.
670Sstevel@tonic-gate *
680Sstevel@tonic-gate * To prevent processes from being rebound between pools while they are
690Sstevel@tonic-gate * the middle of an operation which affects resource set bindings, such
700Sstevel@tonic-gate * operations must be surrounded by calls to pool_barrier_enter() and
710Sstevel@tonic-gate * pool_barrier_exit(). This mechanism guarantees that such processes will
720Sstevel@tonic-gate * be stopped either at the beginning or at the end of the barrier so that
730Sstevel@tonic-gate * the rebind operation can atomically bind the process and its threads
740Sstevel@tonic-gate * to new resource sets, and then let process run again.
750Sstevel@tonic-gate *
760Sstevel@tonic-gate * Lock ordering with respect to other locks is as follows:
770Sstevel@tonic-gate *
780Sstevel@tonic-gate * pool_lock() -> cpu_lock -> pidlock -> p_lock -> pool_barrier_lock
790Sstevel@tonic-gate *
800Sstevel@tonic-gate * Most static and global variables defined in this file are protected
810Sstevel@tonic-gate * by calling pool_lock().
820Sstevel@tonic-gate *
830Sstevel@tonic-gate * The operation that binds tasks and projects to pools is atomic. That is,
840Sstevel@tonic-gate * either all processes in a given task or a project will be bound to a
850Sstevel@tonic-gate * new pool, or (in case of an error) they will be all left bound to the
860Sstevel@tonic-gate * old pool. Processes in a given task or a given project can only be bound to
870Sstevel@tonic-gate * different pools if they were rebound individually one by one as single
880Sstevel@tonic-gate * processes. Threads or LWPs of the same process do not have pool bindings,
890Sstevel@tonic-gate * and are bound to the same resource sets associated with the resource pool
900Sstevel@tonic-gate * of that process.
910Sstevel@tonic-gate *
920Sstevel@tonic-gate * The following picture shows one possible pool configuration with three
930Sstevel@tonic-gate * pools and three processor sets. Note that processor set "foo" is not
940Sstevel@tonic-gate * associated with any pools and therefore cannot have any processes
950Sstevel@tonic-gate * bound to it. Two pools (default and foo) are associated with the
960Sstevel@tonic-gate * same processor set (default). Also, note that processes in Task 2
970Sstevel@tonic-gate * are bound to different pools.
980Sstevel@tonic-gate *
990Sstevel@tonic-gate *
1000Sstevel@tonic-gate * Processor Sets
1010Sstevel@tonic-gate * +---------+
1020Sstevel@tonic-gate * +--------------+========================>| default |
1030Sstevel@tonic-gate * a| | +---------+
1040Sstevel@tonic-gate * s| | ||
1050Sstevel@tonic-gate * s| | +---------+
1060Sstevel@tonic-gate * o| | | foo |
1070Sstevel@tonic-gate * c| | +---------+
1080Sstevel@tonic-gate * i| | ||
1090Sstevel@tonic-gate * a| | +---------+
1100Sstevel@tonic-gate * t| | +------>| bar |
1110Sstevel@tonic-gate * e| | | +---------+
1120Sstevel@tonic-gate * d| | |
1130Sstevel@tonic-gate * | | |
1140Sstevel@tonic-gate * +---------+ +---------+ +---------+
1150Sstevel@tonic-gate * Pools | default |======| foo |======| bar |
1160Sstevel@tonic-gate * +---------+ +---------+ +---------+
1170Sstevel@tonic-gate * @ @ @ @ @ @
1180Sstevel@tonic-gate * b| | | | | |
1190Sstevel@tonic-gate * o| | | | | |
1200Sstevel@tonic-gate * u| +-----+ | +-------+ | +---+
1210Sstevel@tonic-gate * n| | | | | |
1220Sstevel@tonic-gate * ....d|........|......|......|.........|.......|....
1230Sstevel@tonic-gate * : | :: | | | :: | | :
1240Sstevel@tonic-gate * : +---+ :: +---+ +---+ +---+ :: +---+ +---+ :
1250Sstevel@tonic-gate * Processes : | p | :: | p | | p | | p | :: | p |...| p | :
1260Sstevel@tonic-gate * : +---+ :: +---+ +---+ +---+ :: +---+ +---+ :
1270Sstevel@tonic-gate * :........::......................::...............:
1280Sstevel@tonic-gate * Task 1 Task 2 Task N
1290Sstevel@tonic-gate * | | |
1300Sstevel@tonic-gate * | | |
1310Sstevel@tonic-gate * | +-----------+ | +-----------+
1320Sstevel@tonic-gate * +--| Project 1 |--+ | Project N |
1330Sstevel@tonic-gate * +-----------+ +-----------+
1340Sstevel@tonic-gate *
1350Sstevel@tonic-gate * This is just an illustration of relationships between processes, tasks,
1360Sstevel@tonic-gate * projects, pools, and processor sets. New types of resource sets will be
1370Sstevel@tonic-gate * added in the future.
1380Sstevel@tonic-gate */
1390Sstevel@tonic-gate
1400Sstevel@tonic-gate pool_t *pool_default; /* default pool which always exists */
1410Sstevel@tonic-gate int pool_count; /* number of pools created on this system */
1420Sstevel@tonic-gate int pool_state; /* pools state -- enabled/disabled */
1430Sstevel@tonic-gate void *pool_buf; /* pre-commit snapshot of the pools state */
1440Sstevel@tonic-gate size_t pool_bufsz; /* size of pool_buf */
1450Sstevel@tonic-gate static hrtime_t pool_pool_mod; /* last modification time for pools */
1460Sstevel@tonic-gate static hrtime_t pool_sys_mod; /* last modification time for system */
1470Sstevel@tonic-gate static nvlist_t *pool_sys_prop; /* system properties */
1480Sstevel@tonic-gate static id_space_t *pool_ids; /* pool ID space */
1490Sstevel@tonic-gate static list_t pool_list; /* doubly-linked list of pools */
1500Sstevel@tonic-gate static kmutex_t pool_mutex; /* protects pool_busy_* */
1510Sstevel@tonic-gate static kcondvar_t pool_busy_cv; /* waiting for "pool_lock" */
1520Sstevel@tonic-gate static kthread_t *pool_busy_thread; /* thread holding "pool_lock" */
1530Sstevel@tonic-gate static kmutex_t pool_barrier_lock; /* synch. with pool_barrier_* */
1540Sstevel@tonic-gate static kcondvar_t pool_barrier_cv; /* synch. with pool_barrier_* */
1550Sstevel@tonic-gate static int pool_barrier_count; /* synch. with pool_barrier_* */
15611878SVenu.Iyer@Sun.COM static list_t pool_event_cb_list; /* pool event callbacks */
15711878SVenu.Iyer@Sun.COM static boolean_t pool_event_cb_init = B_FALSE;
15811878SVenu.Iyer@Sun.COM static kmutex_t pool_event_cb_lock;
15911878SVenu.Iyer@Sun.COM static taskq_t *pool_event_cb_taskq = NULL;
16011878SVenu.Iyer@Sun.COM
16111878SVenu.Iyer@Sun.COM void pool_event_dispatch(pool_event_t, poolid_t);
1620Sstevel@tonic-gate
1630Sstevel@tonic-gate /*
1640Sstevel@tonic-gate * Boot-time pool initialization.
1650Sstevel@tonic-gate */
1660Sstevel@tonic-gate void
pool_init(void)1670Sstevel@tonic-gate pool_init(void)
1680Sstevel@tonic-gate {
1690Sstevel@tonic-gate pool_ids = id_space_create("pool_ids", POOL_DEFAULT + 1, POOL_MAXID);
1700Sstevel@tonic-gate
1710Sstevel@tonic-gate /*
1720Sstevel@tonic-gate * Initialize default pool.
1730Sstevel@tonic-gate */
1740Sstevel@tonic-gate pool_default = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
1750Sstevel@tonic-gate pool_default->pool_id = POOL_DEFAULT;
1760Sstevel@tonic-gate list_create(&pool_list, sizeof (pool_t), offsetof(pool_t, pool_link));
1770Sstevel@tonic-gate list_insert_head(&pool_list, pool_default);
1780Sstevel@tonic-gate
1790Sstevel@tonic-gate /*
1800Sstevel@tonic-gate * Initialize plugins for resource sets.
1810Sstevel@tonic-gate */
1820Sstevel@tonic-gate pool_pset_init();
1830Sstevel@tonic-gate pool_count = 1;
1840Sstevel@tonic-gate p0.p_pool = pool_default;
1850Sstevel@tonic-gate global_zone->zone_pool = pool_default;
1860Sstevel@tonic-gate pool_default->pool_ref = 1;
1870Sstevel@tonic-gate }
1880Sstevel@tonic-gate
1890Sstevel@tonic-gate /*
1900Sstevel@tonic-gate * Synchronization routines.
1910Sstevel@tonic-gate *
1920Sstevel@tonic-gate * pool_lock is only called from syscall-level routines (processor_bind(),
1930Sstevel@tonic-gate * pset_*(), and /dev/pool ioctls). The pool "lock" may be held for long
1940Sstevel@tonic-gate * periods of time, including across sleeping operations, so we allow its
1950Sstevel@tonic-gate * acquisition to be interruptible.
1960Sstevel@tonic-gate *
1970Sstevel@tonic-gate * The current thread that owns the "lock" is stored in the variable
1980Sstevel@tonic-gate * pool_busy_thread, both to let pool_lock_held() work and to aid debugging.
1990Sstevel@tonic-gate */
2000Sstevel@tonic-gate void
pool_lock(void)2010Sstevel@tonic-gate pool_lock(void)
2020Sstevel@tonic-gate {
2030Sstevel@tonic-gate mutex_enter(&pool_mutex);
2044664Sacruz ASSERT(!pool_lock_held());
2050Sstevel@tonic-gate while (pool_busy_thread != NULL)
2060Sstevel@tonic-gate cv_wait(&pool_busy_cv, &pool_mutex);
2070Sstevel@tonic-gate pool_busy_thread = curthread;
2080Sstevel@tonic-gate mutex_exit(&pool_mutex);
2090Sstevel@tonic-gate }
2100Sstevel@tonic-gate
2110Sstevel@tonic-gate int
pool_lock_intr(void)2120Sstevel@tonic-gate pool_lock_intr(void)
2130Sstevel@tonic-gate {
2140Sstevel@tonic-gate mutex_enter(&pool_mutex);
2154664Sacruz ASSERT(!pool_lock_held());
2160Sstevel@tonic-gate while (pool_busy_thread != NULL) {
2170Sstevel@tonic-gate if (cv_wait_sig(&pool_busy_cv, &pool_mutex) == 0) {
2180Sstevel@tonic-gate cv_signal(&pool_busy_cv);
2190Sstevel@tonic-gate mutex_exit(&pool_mutex);
2200Sstevel@tonic-gate return (1);
2210Sstevel@tonic-gate }
2220Sstevel@tonic-gate }
2230Sstevel@tonic-gate pool_busy_thread = curthread;
2240Sstevel@tonic-gate mutex_exit(&pool_mutex);
2250Sstevel@tonic-gate return (0);
2260Sstevel@tonic-gate }
2270Sstevel@tonic-gate
2280Sstevel@tonic-gate int
pool_lock_held(void)2290Sstevel@tonic-gate pool_lock_held(void)
2300Sstevel@tonic-gate {
2310Sstevel@tonic-gate return (pool_busy_thread == curthread);
2320Sstevel@tonic-gate }
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate void
pool_unlock(void)2350Sstevel@tonic-gate pool_unlock(void)
2360Sstevel@tonic-gate {
2370Sstevel@tonic-gate mutex_enter(&pool_mutex);
2384664Sacruz ASSERT(pool_lock_held());
2390Sstevel@tonic-gate pool_busy_thread = NULL;
2400Sstevel@tonic-gate cv_signal(&pool_busy_cv);
2410Sstevel@tonic-gate mutex_exit(&pool_mutex);
2420Sstevel@tonic-gate }
2430Sstevel@tonic-gate
2440Sstevel@tonic-gate /*
2450Sstevel@tonic-gate * Routines allowing fork(), exec(), exit(), and lwp_create() to synchronize
2460Sstevel@tonic-gate * with pool_do_bind().
2470Sstevel@tonic-gate *
2480Sstevel@tonic-gate * Calls to pool_barrier_enter() and pool_barrier_exit() must bracket all
2490Sstevel@tonic-gate * operations which modify pool or pset associations. They can be called
2500Sstevel@tonic-gate * while the process is multi-threaded. In the common case, when current
2510Sstevel@tonic-gate * process is not being rebound (PBWAIT flag is not set), these functions
2520Sstevel@tonic-gate * will be just incrementing and decrementing reference counts.
2530Sstevel@tonic-gate */
2540Sstevel@tonic-gate void
pool_barrier_enter(void)2550Sstevel@tonic-gate pool_barrier_enter(void)
2560Sstevel@tonic-gate {
2570Sstevel@tonic-gate proc_t *p = curproc;
2580Sstevel@tonic-gate
2590Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
2600Sstevel@tonic-gate while (p->p_poolflag & PBWAIT)
2610Sstevel@tonic-gate cv_wait(&p->p_poolcv, &p->p_lock);
2620Sstevel@tonic-gate p->p_poolcnt++;
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate
2650Sstevel@tonic-gate void
pool_barrier_exit(void)2660Sstevel@tonic-gate pool_barrier_exit(void)
2670Sstevel@tonic-gate {
2680Sstevel@tonic-gate proc_t *p = curproc;
2690Sstevel@tonic-gate
2700Sstevel@tonic-gate ASSERT(MUTEX_HELD(&p->p_lock));
2710Sstevel@tonic-gate ASSERT(p->p_poolcnt > 0);
2720Sstevel@tonic-gate p->p_poolcnt--;
2730Sstevel@tonic-gate if (p->p_poolflag & PBWAIT) {
2740Sstevel@tonic-gate mutex_enter(&pool_barrier_lock);
2750Sstevel@tonic-gate ASSERT(pool_barrier_count > 0);
2760Sstevel@tonic-gate pool_barrier_count--;
2770Sstevel@tonic-gate if (pool_barrier_count == 0)
2780Sstevel@tonic-gate cv_signal(&pool_barrier_cv);
2790Sstevel@tonic-gate mutex_exit(&pool_barrier_lock);
2800Sstevel@tonic-gate while (p->p_poolflag & PBWAIT)
2810Sstevel@tonic-gate cv_wait(&p->p_poolcv, &p->p_lock);
2820Sstevel@tonic-gate }
2830Sstevel@tonic-gate }
2840Sstevel@tonic-gate
2850Sstevel@tonic-gate /*
2860Sstevel@tonic-gate * Enable pools facility.
2870Sstevel@tonic-gate */
2880Sstevel@tonic-gate static int
pool_enable(void)2890Sstevel@tonic-gate pool_enable(void)
2900Sstevel@tonic-gate {
2910Sstevel@tonic-gate int ret;
2920Sstevel@tonic-gate
2930Sstevel@tonic-gate ASSERT(pool_lock_held());
2940Sstevel@tonic-gate ASSERT(pool_count == 1);
2950Sstevel@tonic-gate
2960Sstevel@tonic-gate ret = pool_pset_enable();
2970Sstevel@tonic-gate if (ret != 0)
2980Sstevel@tonic-gate return (ret);
2990Sstevel@tonic-gate (void) nvlist_alloc(&pool_sys_prop, NV_UNIQUE_NAME, KM_SLEEP);
3000Sstevel@tonic-gate (void) nvlist_add_string(pool_sys_prop, "system.name",
3011336Sgm149974 "default");
3020Sstevel@tonic-gate (void) nvlist_add_string(pool_sys_prop, "system.comment", "");
3030Sstevel@tonic-gate (void) nvlist_add_int64(pool_sys_prop, "system.version", 1);
3040Sstevel@tonic-gate (void) nvlist_add_byte(pool_sys_prop, "system.bind-default", 1);
3053247Sgjelinek (void) nvlist_add_string(pool_sys_prop, "system.poold.objectives",
3063247Sgjelinek "wt-load");
3070Sstevel@tonic-gate
3080Sstevel@tonic-gate (void) nvlist_alloc(&pool_default->pool_props,
3090Sstevel@tonic-gate NV_UNIQUE_NAME, KM_SLEEP);
3100Sstevel@tonic-gate (void) nvlist_add_string(pool_default->pool_props,
3110Sstevel@tonic-gate "pool.name", "pool_default");
3120Sstevel@tonic-gate (void) nvlist_add_string(pool_default->pool_props, "pool.comment", "");
3130Sstevel@tonic-gate (void) nvlist_add_byte(pool_default->pool_props, "pool.default", 1);
3140Sstevel@tonic-gate (void) nvlist_add_byte(pool_default->pool_props, "pool.active", 1);
3150Sstevel@tonic-gate (void) nvlist_add_int64(pool_default->pool_props,
3160Sstevel@tonic-gate "pool.importance", 1);
3170Sstevel@tonic-gate (void) nvlist_add_int64(pool_default->pool_props, "pool.sys_id",
3180Sstevel@tonic-gate pool_default->pool_id);
3190Sstevel@tonic-gate
3200Sstevel@tonic-gate pool_sys_mod = pool_pool_mod = gethrtime();
3210Sstevel@tonic-gate
3220Sstevel@tonic-gate return (ret);
3230Sstevel@tonic-gate }
3240Sstevel@tonic-gate
3250Sstevel@tonic-gate /*
3260Sstevel@tonic-gate * Disable pools facility.
3270Sstevel@tonic-gate */
3280Sstevel@tonic-gate static int
pool_disable(void)3290Sstevel@tonic-gate pool_disable(void)
3300Sstevel@tonic-gate {
3310Sstevel@tonic-gate int ret;
3320Sstevel@tonic-gate
3330Sstevel@tonic-gate ASSERT(pool_lock_held());
3340Sstevel@tonic-gate
3350Sstevel@tonic-gate if (pool_count > 1) /* must destroy all pools first */
3360Sstevel@tonic-gate return (EBUSY);
3370Sstevel@tonic-gate
3380Sstevel@tonic-gate ret = pool_pset_disable();
3390Sstevel@tonic-gate if (ret != 0)
3400Sstevel@tonic-gate return (ret);
3410Sstevel@tonic-gate if (pool_sys_prop != NULL) {
3420Sstevel@tonic-gate nvlist_free(pool_sys_prop);
3430Sstevel@tonic-gate pool_sys_prop = NULL;
3440Sstevel@tonic-gate }
3450Sstevel@tonic-gate if (pool_default->pool_props != NULL) {
3460Sstevel@tonic-gate nvlist_free(pool_default->pool_props);
3470Sstevel@tonic-gate pool_default->pool_props = NULL;
3480Sstevel@tonic-gate }
3490Sstevel@tonic-gate return (0);
3500Sstevel@tonic-gate }
3510Sstevel@tonic-gate
3520Sstevel@tonic-gate pool_t *
pool_lookup_pool_by_name(char * name)3530Sstevel@tonic-gate pool_lookup_pool_by_name(char *name)
3540Sstevel@tonic-gate {
3550Sstevel@tonic-gate pool_t *pool = pool_default;
3560Sstevel@tonic-gate char *p;
3570Sstevel@tonic-gate
3580Sstevel@tonic-gate ASSERT(pool_lock_held());
3590Sstevel@tonic-gate for (pool = list_head(&pool_list); pool;
3600Sstevel@tonic-gate pool = list_next(&pool_list, pool)) {
3610Sstevel@tonic-gate if (nvlist_lookup_string(pool->pool_props,
3620Sstevel@tonic-gate "pool.name", &p) == 0 && strcmp(name, p) == 0)
3630Sstevel@tonic-gate return (pool);
3640Sstevel@tonic-gate }
3650Sstevel@tonic-gate return (NULL);
3660Sstevel@tonic-gate }
3670Sstevel@tonic-gate
3680Sstevel@tonic-gate pool_t *
pool_lookup_pool_by_id(poolid_t poolid)3690Sstevel@tonic-gate pool_lookup_pool_by_id(poolid_t poolid)
3700Sstevel@tonic-gate {
3710Sstevel@tonic-gate pool_t *pool = pool_default;
3720Sstevel@tonic-gate
3730Sstevel@tonic-gate ASSERT(pool_lock_held());
3740Sstevel@tonic-gate for (pool = list_head(&pool_list); pool;
3750Sstevel@tonic-gate pool = list_next(&pool_list, pool)) {
3760Sstevel@tonic-gate if (pool->pool_id == poolid)
3770Sstevel@tonic-gate return (pool);
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate return (NULL);
3800Sstevel@tonic-gate }
3810Sstevel@tonic-gate
38211878SVenu.Iyer@Sun.COM pool_t *
pool_lookup_pool_by_pset(int id)38311878SVenu.Iyer@Sun.COM pool_lookup_pool_by_pset(int id)
38411878SVenu.Iyer@Sun.COM {
38511878SVenu.Iyer@Sun.COM pool_t *pool = pool_default;
38611878SVenu.Iyer@Sun.COM psetid_t psetid = (psetid_t)id;
38711878SVenu.Iyer@Sun.COM
38811878SVenu.Iyer@Sun.COM ASSERT(pool_lock_held());
38911878SVenu.Iyer@Sun.COM for (pool = list_head(&pool_list); pool != NULL;
39011878SVenu.Iyer@Sun.COM pool = list_next(&pool_list, pool)) {
39111878SVenu.Iyer@Sun.COM if (pool->pool_pset->pset_id == psetid)
39211878SVenu.Iyer@Sun.COM return (pool);
39311878SVenu.Iyer@Sun.COM }
39411878SVenu.Iyer@Sun.COM return (NULL);
39511878SVenu.Iyer@Sun.COM }
39611878SVenu.Iyer@Sun.COM
3970Sstevel@tonic-gate /*
3980Sstevel@tonic-gate * Create new pool, associate it with default resource sets, and give
3990Sstevel@tonic-gate * it a temporary name.
4000Sstevel@tonic-gate */
4010Sstevel@tonic-gate static int
pool_pool_create(poolid_t * poolid)4020Sstevel@tonic-gate pool_pool_create(poolid_t *poolid)
4030Sstevel@tonic-gate {
4040Sstevel@tonic-gate pool_t *pool;
4050Sstevel@tonic-gate char pool_name[40];
4060Sstevel@tonic-gate
4070Sstevel@tonic-gate ASSERT(pool_lock_held());
4080Sstevel@tonic-gate
4090Sstevel@tonic-gate pool = kmem_zalloc(sizeof (pool_t), KM_SLEEP);
4100Sstevel@tonic-gate pool->pool_id = *poolid = id_alloc(pool_ids);
4110Sstevel@tonic-gate pool->pool_pset = pool_pset_default;
4120Sstevel@tonic-gate pool_pset_default->pset_npools++;
4130Sstevel@tonic-gate list_insert_tail(&pool_list, pool);
4140Sstevel@tonic-gate (void) nvlist_alloc(&pool->pool_props, NV_UNIQUE_NAME, KM_SLEEP);
4150Sstevel@tonic-gate (void) nvlist_add_int64(pool->pool_props, "pool.sys_id", pool->pool_id);
4160Sstevel@tonic-gate (void) nvlist_add_byte(pool->pool_props, "pool.default", 0);
4170Sstevel@tonic-gate pool_pool_mod = gethrtime();
4180Sstevel@tonic-gate (void) snprintf(pool_name, sizeof (pool_name), "pool_%lld",
4190Sstevel@tonic-gate pool_pool_mod);
4200Sstevel@tonic-gate (void) nvlist_add_string(pool->pool_props, "pool.name", pool_name);
4210Sstevel@tonic-gate pool_count++;
4220Sstevel@tonic-gate return (0);
4230Sstevel@tonic-gate }
4240Sstevel@tonic-gate
4250Sstevel@tonic-gate struct destroy_zone_arg {
4260Sstevel@tonic-gate pool_t *old;
4270Sstevel@tonic-gate pool_t *new;
4280Sstevel@tonic-gate };
4290Sstevel@tonic-gate
4300Sstevel@tonic-gate /*
4310Sstevel@tonic-gate * Update pool pointers for zones that are currently bound to pool "old"
4320Sstevel@tonic-gate * to be bound to pool "new".
4330Sstevel@tonic-gate */
4340Sstevel@tonic-gate static int
pool_destroy_zone_cb(zone_t * zone,void * arg)4350Sstevel@tonic-gate pool_destroy_zone_cb(zone_t *zone, void *arg)
4360Sstevel@tonic-gate {
4370Sstevel@tonic-gate struct destroy_zone_arg *dza = arg;
4380Sstevel@tonic-gate
4390Sstevel@tonic-gate ASSERT(pool_lock_held());
4400Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
4410Sstevel@tonic-gate
4420Sstevel@tonic-gate if (zone_pool_get(zone) == dza->old)
4430Sstevel@tonic-gate zone_pool_set(zone, dza->new);
4440Sstevel@tonic-gate return (0);
4450Sstevel@tonic-gate }
4460Sstevel@tonic-gate
4470Sstevel@tonic-gate /*
4480Sstevel@tonic-gate * Destroy specified pool, and rebind all processes in it
4490Sstevel@tonic-gate * to the default pool.
4500Sstevel@tonic-gate */
4510Sstevel@tonic-gate static int
pool_pool_destroy(poolid_t poolid)4520Sstevel@tonic-gate pool_pool_destroy(poolid_t poolid)
4530Sstevel@tonic-gate {
4540Sstevel@tonic-gate pool_t *pool;
4550Sstevel@tonic-gate int ret;
4560Sstevel@tonic-gate
4570Sstevel@tonic-gate ASSERT(pool_lock_held());
4580Sstevel@tonic-gate
4590Sstevel@tonic-gate if (poolid == POOL_DEFAULT)
4600Sstevel@tonic-gate return (EINVAL);
4610Sstevel@tonic-gate if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
4620Sstevel@tonic-gate return (ESRCH);
4630Sstevel@tonic-gate ret = pool_do_bind(pool_default, P_POOLID, poolid, POOL_BIND_ALL);
4640Sstevel@tonic-gate if (ret == 0) {
4650Sstevel@tonic-gate struct destroy_zone_arg dzarg;
4660Sstevel@tonic-gate
4670Sstevel@tonic-gate dzarg.old = pool;
4680Sstevel@tonic-gate dzarg.new = pool_default;
4690Sstevel@tonic-gate mutex_enter(&cpu_lock);
4700Sstevel@tonic-gate ret = zone_walk(pool_destroy_zone_cb, &dzarg);
4710Sstevel@tonic-gate mutex_exit(&cpu_lock);
4720Sstevel@tonic-gate ASSERT(ret == 0);
4730Sstevel@tonic-gate ASSERT(pool->pool_ref == 0);
4740Sstevel@tonic-gate (void) nvlist_free(pool->pool_props);
4750Sstevel@tonic-gate id_free(pool_ids, pool->pool_id);
4760Sstevel@tonic-gate pool->pool_pset->pset_npools--;
4770Sstevel@tonic-gate list_remove(&pool_list, pool);
4780Sstevel@tonic-gate pool_count--;
4790Sstevel@tonic-gate pool_pool_mod = gethrtime();
4800Sstevel@tonic-gate kmem_free(pool, sizeof (pool_t));
4810Sstevel@tonic-gate }
4820Sstevel@tonic-gate return (ret);
4830Sstevel@tonic-gate }
4840Sstevel@tonic-gate
4850Sstevel@tonic-gate /*
4860Sstevel@tonic-gate * Create new pool or resource set.
4870Sstevel@tonic-gate */
4880Sstevel@tonic-gate int
pool_create(int class,int subclass,id_t * id)4890Sstevel@tonic-gate pool_create(int class, int subclass, id_t *id)
4900Sstevel@tonic-gate {
4910Sstevel@tonic-gate int ret;
4920Sstevel@tonic-gate
4930Sstevel@tonic-gate ASSERT(pool_lock_held());
4940Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
4950Sstevel@tonic-gate return (ENOTACTIVE);
4960Sstevel@tonic-gate switch (class) {
4970Sstevel@tonic-gate case PEC_POOL:
4980Sstevel@tonic-gate ret = pool_pool_create((poolid_t *)id);
4990Sstevel@tonic-gate break;
5000Sstevel@tonic-gate case PEC_RES_COMP:
5010Sstevel@tonic-gate switch (subclass) {
5020Sstevel@tonic-gate case PREC_PSET:
5030Sstevel@tonic-gate ret = pool_pset_create((psetid_t *)id);
5040Sstevel@tonic-gate break;
5050Sstevel@tonic-gate default:
5060Sstevel@tonic-gate ret = EINVAL;
5070Sstevel@tonic-gate }
5080Sstevel@tonic-gate break;
5090Sstevel@tonic-gate case PEC_RES_AGG:
5100Sstevel@tonic-gate ret = ENOTSUP;
5110Sstevel@tonic-gate break;
5120Sstevel@tonic-gate default:
5130Sstevel@tonic-gate ret = EINVAL;
5140Sstevel@tonic-gate }
5150Sstevel@tonic-gate return (ret);
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate
5180Sstevel@tonic-gate /*
5190Sstevel@tonic-gate * Destroy an existing pool or resource set.
5200Sstevel@tonic-gate */
5210Sstevel@tonic-gate int
pool_destroy(int class,int subclass,id_t id)5220Sstevel@tonic-gate pool_destroy(int class, int subclass, id_t id)
5230Sstevel@tonic-gate {
5240Sstevel@tonic-gate int ret;
5250Sstevel@tonic-gate
5260Sstevel@tonic-gate ASSERT(pool_lock_held());
5270Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
5280Sstevel@tonic-gate return (ENOTACTIVE);
5290Sstevel@tonic-gate switch (class) {
5300Sstevel@tonic-gate case PEC_POOL:
5310Sstevel@tonic-gate ret = pool_pool_destroy((poolid_t)id);
5320Sstevel@tonic-gate break;
5330Sstevel@tonic-gate case PEC_RES_COMP:
5340Sstevel@tonic-gate switch (subclass) {
5350Sstevel@tonic-gate case PREC_PSET:
5360Sstevel@tonic-gate ret = pool_pset_destroy((psetid_t)id);
5370Sstevel@tonic-gate break;
5380Sstevel@tonic-gate default:
5390Sstevel@tonic-gate ret = EINVAL;
5400Sstevel@tonic-gate }
5410Sstevel@tonic-gate break;
5420Sstevel@tonic-gate case PEC_RES_AGG:
5430Sstevel@tonic-gate ret = ENOTSUP;
5440Sstevel@tonic-gate break;
5450Sstevel@tonic-gate default:
5460Sstevel@tonic-gate ret = EINVAL;
5470Sstevel@tonic-gate }
5480Sstevel@tonic-gate return (ret);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate * Enable or disable pools.
5530Sstevel@tonic-gate */
5540Sstevel@tonic-gate int
pool_status(int status)5550Sstevel@tonic-gate pool_status(int status)
5560Sstevel@tonic-gate {
5570Sstevel@tonic-gate int ret = 0;
5580Sstevel@tonic-gate
5590Sstevel@tonic-gate ASSERT(pool_lock_held());
5600Sstevel@tonic-gate
5610Sstevel@tonic-gate if (pool_state == status)
5620Sstevel@tonic-gate return (0);
5630Sstevel@tonic-gate switch (status) {
5640Sstevel@tonic-gate case POOL_ENABLED:
5650Sstevel@tonic-gate ret = pool_enable();
5660Sstevel@tonic-gate if (ret != 0)
5670Sstevel@tonic-gate return (ret);
5680Sstevel@tonic-gate pool_state = POOL_ENABLED;
56911878SVenu.Iyer@Sun.COM pool_event_dispatch(POOL_E_ENABLE, NULL);
5700Sstevel@tonic-gate break;
5710Sstevel@tonic-gate case POOL_DISABLED:
5720Sstevel@tonic-gate ret = pool_disable();
5730Sstevel@tonic-gate if (ret != 0)
5740Sstevel@tonic-gate return (ret);
5750Sstevel@tonic-gate pool_state = POOL_DISABLED;
57611878SVenu.Iyer@Sun.COM pool_event_dispatch(POOL_E_DISABLE, NULL);
5770Sstevel@tonic-gate break;
5780Sstevel@tonic-gate default:
5790Sstevel@tonic-gate ret = EINVAL;
5800Sstevel@tonic-gate }
5810Sstevel@tonic-gate return (ret);
5820Sstevel@tonic-gate }
5830Sstevel@tonic-gate
5840Sstevel@tonic-gate /*
5850Sstevel@tonic-gate * Associate pool with resource set.
5860Sstevel@tonic-gate */
5870Sstevel@tonic-gate int
pool_assoc(poolid_t poolid,int idtype,id_t id)5880Sstevel@tonic-gate pool_assoc(poolid_t poolid, int idtype, id_t id)
5890Sstevel@tonic-gate {
5900Sstevel@tonic-gate int ret;
5910Sstevel@tonic-gate
5920Sstevel@tonic-gate ASSERT(pool_lock_held());
5930Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
5940Sstevel@tonic-gate return (ENOTACTIVE);
5950Sstevel@tonic-gate switch (idtype) {
5960Sstevel@tonic-gate case PREC_PSET:
5970Sstevel@tonic-gate ret = pool_pset_assoc(poolid, (psetid_t)id);
59811878SVenu.Iyer@Sun.COM if (ret == 0)
59911878SVenu.Iyer@Sun.COM pool_event_dispatch(POOL_E_CHANGE, poolid);
6000Sstevel@tonic-gate break;
6010Sstevel@tonic-gate default:
6020Sstevel@tonic-gate ret = EINVAL;
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate if (ret == 0)
6050Sstevel@tonic-gate pool_pool_mod = gethrtime();
6060Sstevel@tonic-gate return (ret);
6070Sstevel@tonic-gate }
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate /*
6100Sstevel@tonic-gate * Disassociate resource set from pool.
6110Sstevel@tonic-gate */
6120Sstevel@tonic-gate int
pool_dissoc(poolid_t poolid,int idtype)6130Sstevel@tonic-gate pool_dissoc(poolid_t poolid, int idtype)
6140Sstevel@tonic-gate {
6150Sstevel@tonic-gate int ret;
6160Sstevel@tonic-gate
6170Sstevel@tonic-gate ASSERT(pool_lock_held());
6180Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
6190Sstevel@tonic-gate return (ENOTACTIVE);
6200Sstevel@tonic-gate switch (idtype) {
6210Sstevel@tonic-gate case PREC_PSET:
6220Sstevel@tonic-gate ret = pool_pset_assoc(poolid, PS_NONE);
62311878SVenu.Iyer@Sun.COM if (ret == 0)
62411878SVenu.Iyer@Sun.COM pool_event_dispatch(POOL_E_CHANGE, poolid);
6250Sstevel@tonic-gate break;
6260Sstevel@tonic-gate default:
6270Sstevel@tonic-gate ret = EINVAL;
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate if (ret == 0)
6300Sstevel@tonic-gate pool_pool_mod = gethrtime();
6310Sstevel@tonic-gate return (ret);
6320Sstevel@tonic-gate }
6330Sstevel@tonic-gate
6340Sstevel@tonic-gate /*
6350Sstevel@tonic-gate * Transfer specified quantity of resources between resource sets.
6360Sstevel@tonic-gate */
6370Sstevel@tonic-gate /*ARGSUSED*/
6380Sstevel@tonic-gate int
pool_transfer(int type,id_t src,id_t dst,uint64_t qty)6390Sstevel@tonic-gate pool_transfer(int type, id_t src, id_t dst, uint64_t qty)
6400Sstevel@tonic-gate {
6410Sstevel@tonic-gate int ret = EINVAL;
64211878SVenu.Iyer@Sun.COM
6430Sstevel@tonic-gate return (ret);
6440Sstevel@tonic-gate }
6450Sstevel@tonic-gate
64611878SVenu.Iyer@Sun.COM static poolid_t
pool_lookup_id_by_pset(int id)64711878SVenu.Iyer@Sun.COM pool_lookup_id_by_pset(int id)
64811878SVenu.Iyer@Sun.COM {
64911878SVenu.Iyer@Sun.COM pool_t *pool = pool_default;
65011878SVenu.Iyer@Sun.COM psetid_t psetid = (psetid_t)id;
65111878SVenu.Iyer@Sun.COM
65211878SVenu.Iyer@Sun.COM ASSERT(pool_lock_held());
65311878SVenu.Iyer@Sun.COM for (pool = list_head(&pool_list); pool != NULL;
65411878SVenu.Iyer@Sun.COM pool = list_next(&pool_list, pool)) {
65511878SVenu.Iyer@Sun.COM if (pool->pool_pset->pset_id == psetid)
65611878SVenu.Iyer@Sun.COM return (pool->pool_id);
65711878SVenu.Iyer@Sun.COM }
65811878SVenu.Iyer@Sun.COM return (POOL_INVALID);
65911878SVenu.Iyer@Sun.COM }
66011878SVenu.Iyer@Sun.COM
6610Sstevel@tonic-gate /*
6620Sstevel@tonic-gate * Transfer resources specified by their IDs between resource sets.
6630Sstevel@tonic-gate */
6640Sstevel@tonic-gate int
pool_xtransfer(int type,id_t src_pset,id_t dst_pset,uint_t size,id_t * ids)66511878SVenu.Iyer@Sun.COM pool_xtransfer(int type, id_t src_pset, id_t dst_pset, uint_t size, id_t *ids)
6660Sstevel@tonic-gate {
6670Sstevel@tonic-gate int ret;
66811878SVenu.Iyer@Sun.COM poolid_t src_pool, dst_pool;
6690Sstevel@tonic-gate
6700Sstevel@tonic-gate ASSERT(pool_lock_held());
6710Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
6720Sstevel@tonic-gate return (ENOTACTIVE);
6730Sstevel@tonic-gate switch (type) {
6740Sstevel@tonic-gate case PREC_PSET:
67511878SVenu.Iyer@Sun.COM ret = pool_pset_xtransfer((psetid_t)src_pset,
67611878SVenu.Iyer@Sun.COM (psetid_t)dst_pset, size, ids);
677*12376SMichael.Lim@Sun.COM if (ret == 0) {
678*12376SMichael.Lim@Sun.COM if ((src_pool = pool_lookup_id_by_pset(src_pset)) !=
679*12376SMichael.Lim@Sun.COM POOL_INVALID)
680*12376SMichael.Lim@Sun.COM pool_event_dispatch(POOL_E_CHANGE, src_pool);
681*12376SMichael.Lim@Sun.COM if ((dst_pool = pool_lookup_id_by_pset(dst_pset)) !=
682*12376SMichael.Lim@Sun.COM POOL_INVALID)
683*12376SMichael.Lim@Sun.COM pool_event_dispatch(POOL_E_CHANGE, dst_pool);
684*12376SMichael.Lim@Sun.COM }
6850Sstevel@tonic-gate break;
6860Sstevel@tonic-gate default:
6870Sstevel@tonic-gate ret = EINVAL;
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate return (ret);
6900Sstevel@tonic-gate }
6910Sstevel@tonic-gate
6920Sstevel@tonic-gate /*
6930Sstevel@tonic-gate * Bind processes to pools.
6940Sstevel@tonic-gate */
6950Sstevel@tonic-gate int
pool_bind(poolid_t poolid,idtype_t idtype,id_t id)6960Sstevel@tonic-gate pool_bind(poolid_t poolid, idtype_t idtype, id_t id)
6970Sstevel@tonic-gate {
69811878SVenu.Iyer@Sun.COM pool_t *pool;
6990Sstevel@tonic-gate
7000Sstevel@tonic-gate ASSERT(pool_lock_held());
7010Sstevel@tonic-gate
7020Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
7030Sstevel@tonic-gate return (ENOTACTIVE);
7040Sstevel@tonic-gate if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
7050Sstevel@tonic-gate return (ESRCH);
7060Sstevel@tonic-gate
7070Sstevel@tonic-gate switch (idtype) {
7080Sstevel@tonic-gate case P_PID:
7090Sstevel@tonic-gate case P_TASKID:
7100Sstevel@tonic-gate case P_PROJID:
7110Sstevel@tonic-gate case P_ZONEID:
7120Sstevel@tonic-gate break;
7130Sstevel@tonic-gate default:
7140Sstevel@tonic-gate return (EINVAL);
7150Sstevel@tonic-gate }
7160Sstevel@tonic-gate return (pool_do_bind(pool, idtype, id, POOL_BIND_ALL));
7170Sstevel@tonic-gate }
7180Sstevel@tonic-gate
7190Sstevel@tonic-gate /*
7200Sstevel@tonic-gate * Query pool binding of the specifed process.
7210Sstevel@tonic-gate */
7220Sstevel@tonic-gate int
pool_query_binding(idtype_t idtype,id_t id,id_t * poolid)7230Sstevel@tonic-gate pool_query_binding(idtype_t idtype, id_t id, id_t *poolid)
7240Sstevel@tonic-gate {
7250Sstevel@tonic-gate proc_t *p;
7260Sstevel@tonic-gate
7270Sstevel@tonic-gate if (idtype != P_PID)
7280Sstevel@tonic-gate return (ENOTSUP);
7290Sstevel@tonic-gate if (id == P_MYID)
7300Sstevel@tonic-gate id = curproc->p_pid;
7310Sstevel@tonic-gate
7320Sstevel@tonic-gate ASSERT(pool_lock_held());
7330Sstevel@tonic-gate
7340Sstevel@tonic-gate mutex_enter(&pidlock);
7350Sstevel@tonic-gate if ((p = prfind((pid_t)id)) == NULL) {
7360Sstevel@tonic-gate mutex_exit(&pidlock);
7370Sstevel@tonic-gate return (ESRCH);
7380Sstevel@tonic-gate }
7390Sstevel@tonic-gate mutex_enter(&p->p_lock);
7400Sstevel@tonic-gate /*
7410Sstevel@tonic-gate * In local zones, lie about pool bindings of processes from
7420Sstevel@tonic-gate * the global zone.
7430Sstevel@tonic-gate */
7440Sstevel@tonic-gate if (!INGLOBALZONE(curproc) && INGLOBALZONE(p)) {
7450Sstevel@tonic-gate pool_t *pool;
7460Sstevel@tonic-gate
7470Sstevel@tonic-gate pool = zone_pool_get(curproc->p_zone);
7480Sstevel@tonic-gate *poolid = pool->pool_id;
7490Sstevel@tonic-gate } else {
7500Sstevel@tonic-gate *poolid = p->p_pool->pool_id;
7510Sstevel@tonic-gate }
7520Sstevel@tonic-gate mutex_exit(&p->p_lock);
7530Sstevel@tonic-gate mutex_exit(&pidlock);
7540Sstevel@tonic-gate return (0);
7550Sstevel@tonic-gate }
7560Sstevel@tonic-gate
7570Sstevel@tonic-gate static ea_object_t *
pool_system_pack(void)7580Sstevel@tonic-gate pool_system_pack(void)
7590Sstevel@tonic-gate {
7600Sstevel@tonic-gate ea_object_t *eo_system;
7610Sstevel@tonic-gate size_t bufsz = 0;
7620Sstevel@tonic-gate char *buf = NULL;
7630Sstevel@tonic-gate
7640Sstevel@tonic-gate ASSERT(pool_lock_held());
7650Sstevel@tonic-gate
7660Sstevel@tonic-gate eo_system = ea_alloc_group(EXT_GROUP | EXC_LOCAL | EXD_GROUP_SYSTEM);
7670Sstevel@tonic-gate (void) ea_attach_item(eo_system, &pool_sys_mod, sizeof (hrtime_t),
7680Sstevel@tonic-gate EXC_LOCAL | EXD_SYSTEM_TSTAMP | EXT_UINT64);
7690Sstevel@tonic-gate if (INGLOBALZONE(curproc))
7700Sstevel@tonic-gate (void) ea_attach_item(eo_system, &pool_pool_mod,
7710Sstevel@tonic-gate sizeof (hrtime_t),
7720Sstevel@tonic-gate EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
7730Sstevel@tonic-gate else
7740Sstevel@tonic-gate (void) ea_attach_item(eo_system,
7750Sstevel@tonic-gate &curproc->p_zone->zone_pool_mod,
7760Sstevel@tonic-gate sizeof (hrtime_t),
7770Sstevel@tonic-gate EXC_LOCAL | EXD_POOL_TSTAMP | EXT_UINT64);
7780Sstevel@tonic-gate (void) ea_attach_item(eo_system, &pool_pset_mod, sizeof (hrtime_t),
7790Sstevel@tonic-gate EXC_LOCAL | EXD_PSET_TSTAMP | EXT_UINT64);
7800Sstevel@tonic-gate (void) ea_attach_item(eo_system, &pool_cpu_mod, sizeof (hrtime_t),
7810Sstevel@tonic-gate EXC_LOCAL | EXD_CPU_TSTAMP | EXT_UINT64);
7820Sstevel@tonic-gate (void) nvlist_pack(pool_sys_prop, &buf, &bufsz, NV_ENCODE_NATIVE, 0);
7830Sstevel@tonic-gate (void) ea_attach_item(eo_system, buf, bufsz,
7840Sstevel@tonic-gate EXC_LOCAL | EXD_SYSTEM_PROP | EXT_RAW);
7850Sstevel@tonic-gate kmem_free(buf, bufsz);
7860Sstevel@tonic-gate return (eo_system);
7870Sstevel@tonic-gate }
7880Sstevel@tonic-gate
7890Sstevel@tonic-gate /*
7900Sstevel@tonic-gate * Pack information about pools and attach it to specified exacct group.
7910Sstevel@tonic-gate */
7920Sstevel@tonic-gate static int
pool_pool_pack(ea_object_t * eo_system)7930Sstevel@tonic-gate pool_pool_pack(ea_object_t *eo_system)
7940Sstevel@tonic-gate {
7950Sstevel@tonic-gate ea_object_t *eo_pool;
7960Sstevel@tonic-gate pool_t *pool;
7970Sstevel@tonic-gate size_t bufsz;
7980Sstevel@tonic-gate char *buf;
7990Sstevel@tonic-gate pool_t *myzonepool;
8000Sstevel@tonic-gate
8010Sstevel@tonic-gate ASSERT(pool_lock_held());
8020Sstevel@tonic-gate myzonepool = zone_pool_get(curproc->p_zone);
8030Sstevel@tonic-gate for (pool = list_head(&pool_list); pool;
8040Sstevel@tonic-gate pool = list_next(&pool_list, pool)) {
8050Sstevel@tonic-gate if (!INGLOBALZONE(curproc) && myzonepool != pool)
8060Sstevel@tonic-gate continue;
8070Sstevel@tonic-gate bufsz = 0;
8080Sstevel@tonic-gate buf = NULL;
8090Sstevel@tonic-gate eo_pool = ea_alloc_group(EXT_GROUP |
8100Sstevel@tonic-gate EXC_LOCAL | EXD_GROUP_POOL);
8110Sstevel@tonic-gate (void) ea_attach_item(eo_pool, &pool->pool_id, sizeof (id_t),
8120Sstevel@tonic-gate EXC_LOCAL | EXD_POOL_POOLID | EXT_UINT32);
8130Sstevel@tonic-gate (void) ea_attach_item(eo_pool, &pool->pool_pset->pset_id,
8140Sstevel@tonic-gate sizeof (id_t), EXC_LOCAL | EXD_POOL_PSETID | EXT_UINT32);
8150Sstevel@tonic-gate (void) nvlist_pack(pool->pool_props, &buf, &bufsz,
8160Sstevel@tonic-gate NV_ENCODE_NATIVE, 0);
8170Sstevel@tonic-gate (void) ea_attach_item(eo_pool, buf, bufsz,
8180Sstevel@tonic-gate EXC_LOCAL | EXD_POOL_PROP | EXT_RAW);
8190Sstevel@tonic-gate kmem_free(buf, bufsz);
8200Sstevel@tonic-gate (void) ea_attach_to_group(eo_system, eo_pool);
8210Sstevel@tonic-gate }
8220Sstevel@tonic-gate return (0);
8230Sstevel@tonic-gate }
8240Sstevel@tonic-gate
8250Sstevel@tonic-gate /*
8260Sstevel@tonic-gate * Pack the whole pool configuration in the specified buffer.
8270Sstevel@tonic-gate */
8280Sstevel@tonic-gate int
pool_pack_conf(void * kbuf,size_t kbufsz,size_t * asize)8290Sstevel@tonic-gate pool_pack_conf(void *kbuf, size_t kbufsz, size_t *asize)
8300Sstevel@tonic-gate {
8310Sstevel@tonic-gate ea_object_t *eo_system;
8320Sstevel@tonic-gate size_t ksize;
8330Sstevel@tonic-gate int ret = 0;
8340Sstevel@tonic-gate
8350Sstevel@tonic-gate ASSERT(pool_lock_held());
8360Sstevel@tonic-gate
8370Sstevel@tonic-gate eo_system = pool_system_pack(); /* 1. pack system */
8380Sstevel@tonic-gate (void) pool_pool_pack(eo_system); /* 2. pack all pools */
8390Sstevel@tonic-gate (void) pool_pset_pack(eo_system); /* 3. pack all psets */
8400Sstevel@tonic-gate ksize = ea_pack_object(eo_system, NULL, 0);
8410Sstevel@tonic-gate if (kbuf == NULL || kbufsz == 0)
8420Sstevel@tonic-gate *asize = ksize;
8430Sstevel@tonic-gate else if (ksize > kbufsz)
8440Sstevel@tonic-gate ret = ENOMEM;
8450Sstevel@tonic-gate else
8460Sstevel@tonic-gate *asize = ea_pack_object(eo_system, kbuf, kbufsz);
8470Sstevel@tonic-gate ea_free_object(eo_system, EUP_ALLOC);
8480Sstevel@tonic-gate return (ret);
8490Sstevel@tonic-gate }
8500Sstevel@tonic-gate
8510Sstevel@tonic-gate /*
8520Sstevel@tonic-gate * Start/end the commit transaction. If commit transaction is currently
8530Sstevel@tonic-gate * in progress, then all POOL_QUERY ioctls will return pools configuration
8540Sstevel@tonic-gate * at the beginning of transaction.
8550Sstevel@tonic-gate */
8560Sstevel@tonic-gate int
pool_commit(int state)8570Sstevel@tonic-gate pool_commit(int state)
8580Sstevel@tonic-gate {
8590Sstevel@tonic-gate ea_object_t *eo_system;
8600Sstevel@tonic-gate int ret = 0;
8610Sstevel@tonic-gate
8620Sstevel@tonic-gate ASSERT(pool_lock_held());
8630Sstevel@tonic-gate
8640Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
8650Sstevel@tonic-gate return (ENOTACTIVE);
8660Sstevel@tonic-gate switch (state) {
8670Sstevel@tonic-gate case 1:
8680Sstevel@tonic-gate /*
8690Sstevel@tonic-gate * Beginning commit transation.
8700Sstevel@tonic-gate */
8710Sstevel@tonic-gate if (pool_buf != NULL) /* transaction in progress */
8720Sstevel@tonic-gate return (EBUSY);
8730Sstevel@tonic-gate eo_system = pool_system_pack(); /* 1. pack system */
8740Sstevel@tonic-gate (void) pool_pool_pack(eo_system); /* 2. pack all pools */
8750Sstevel@tonic-gate (void) pool_pset_pack(eo_system); /* 3. pack all psets */
8760Sstevel@tonic-gate pool_bufsz = ea_pack_object(eo_system, NULL, 0);
8770Sstevel@tonic-gate pool_buf = kmem_alloc(pool_bufsz, KM_SLEEP);
8780Sstevel@tonic-gate pool_bufsz = ea_pack_object(eo_system, pool_buf, pool_bufsz);
8790Sstevel@tonic-gate ea_free_object(eo_system, EUP_ALLOC);
8800Sstevel@tonic-gate break;
8810Sstevel@tonic-gate case 0:
8820Sstevel@tonic-gate /*
8830Sstevel@tonic-gate * Finishing commit transaction.
8840Sstevel@tonic-gate */
8850Sstevel@tonic-gate if (pool_buf != NULL) {
8860Sstevel@tonic-gate kmem_free(pool_buf, pool_bufsz);
8870Sstevel@tonic-gate pool_buf = NULL;
8880Sstevel@tonic-gate pool_bufsz = 0;
8890Sstevel@tonic-gate }
8900Sstevel@tonic-gate break;
8910Sstevel@tonic-gate default:
8920Sstevel@tonic-gate ret = EINVAL;
8930Sstevel@tonic-gate }
8940Sstevel@tonic-gate return (ret);
8950Sstevel@tonic-gate }
8960Sstevel@tonic-gate
8970Sstevel@tonic-gate /*
8980Sstevel@tonic-gate * Check is the specified property is special
8990Sstevel@tonic-gate */
9000Sstevel@tonic-gate static pool_property_t *
pool_property_find(char * name,pool_property_t * list)9010Sstevel@tonic-gate pool_property_find(char *name, pool_property_t *list)
9020Sstevel@tonic-gate {
9030Sstevel@tonic-gate pool_property_t *prop;
9040Sstevel@tonic-gate
9050Sstevel@tonic-gate for (prop = list; prop->pp_name != NULL; prop++)
9060Sstevel@tonic-gate if (strcmp(prop->pp_name, name) == 0)
9070Sstevel@tonic-gate return (prop);
9080Sstevel@tonic-gate return (NULL);
9090Sstevel@tonic-gate }
9100Sstevel@tonic-gate
9110Sstevel@tonic-gate static pool_property_t pool_prop_sys[] = {
9120Sstevel@tonic-gate { "system.name", DATA_TYPE_STRING, PP_RDWR },
9130Sstevel@tonic-gate { "system.comment", DATA_TYPE_STRING, PP_RDWR },
9140Sstevel@tonic-gate { "system.version", DATA_TYPE_UINT64, PP_READ },
9150Sstevel@tonic-gate { "system.bind-default", DATA_TYPE_BYTE, PP_RDWR },
9160Sstevel@tonic-gate { "system.allocate-method", DATA_TYPE_STRING,
9170Sstevel@tonic-gate PP_RDWR | PP_OPTIONAL },
9180Sstevel@tonic-gate { "system.poold.log-level", DATA_TYPE_STRING,
9190Sstevel@tonic-gate PP_RDWR | PP_OPTIONAL },
9200Sstevel@tonic-gate { "system.poold.log-location", DATA_TYPE_STRING,
9210Sstevel@tonic-gate PP_RDWR | PP_OPTIONAL },
9220Sstevel@tonic-gate { "system.poold.monitor-interval", DATA_TYPE_UINT64,
9230Sstevel@tonic-gate PP_RDWR | PP_OPTIONAL },
9240Sstevel@tonic-gate { "system.poold.history-file", DATA_TYPE_STRING,
9250Sstevel@tonic-gate PP_RDWR | PP_OPTIONAL },
9260Sstevel@tonic-gate { "system.poold.objectives", DATA_TYPE_STRING,
9270Sstevel@tonic-gate PP_RDWR | PP_OPTIONAL },
9280Sstevel@tonic-gate { NULL, 0, 0 }
9290Sstevel@tonic-gate };
9300Sstevel@tonic-gate
9310Sstevel@tonic-gate static pool_property_t pool_prop_pool[] = {
9320Sstevel@tonic-gate { "pool.sys_id", DATA_TYPE_UINT64, PP_READ },
9330Sstevel@tonic-gate { "pool.name", DATA_TYPE_STRING, PP_RDWR },
9340Sstevel@tonic-gate { "pool.default", DATA_TYPE_BYTE, PP_READ },
9350Sstevel@tonic-gate { "pool.active", DATA_TYPE_BYTE, PP_RDWR },
9360Sstevel@tonic-gate { "pool.importance", DATA_TYPE_INT64, PP_RDWR },
9370Sstevel@tonic-gate { "pool.comment", DATA_TYPE_STRING, PP_RDWR },
9380Sstevel@tonic-gate { "pool.scheduler", DATA_TYPE_STRING,
9390Sstevel@tonic-gate PP_RDWR | PP_OPTIONAL },
9400Sstevel@tonic-gate { NULL, 0, 0 }
9410Sstevel@tonic-gate };
9420Sstevel@tonic-gate
9430Sstevel@tonic-gate /*
9440Sstevel@tonic-gate * Common routine to put new property on the specified list
9450Sstevel@tonic-gate */
9460Sstevel@tonic-gate int
pool_propput_common(nvlist_t * nvlist,nvpair_t * pair,pool_property_t * props)9470Sstevel@tonic-gate pool_propput_common(nvlist_t *nvlist, nvpair_t *pair, pool_property_t *props)
9480Sstevel@tonic-gate {
9490Sstevel@tonic-gate pool_property_t *prop;
9500Sstevel@tonic-gate
9510Sstevel@tonic-gate if ((prop = pool_property_find(nvpair_name(pair), props)) != NULL) {
9520Sstevel@tonic-gate /*
9530Sstevel@tonic-gate * No read-only properties or properties with bad types
9540Sstevel@tonic-gate */
9550Sstevel@tonic-gate if (!(prop->pp_perm & PP_WRITE) ||
9560Sstevel@tonic-gate prop->pp_type != nvpair_type(pair))
9570Sstevel@tonic-gate return (EINVAL);
9580Sstevel@tonic-gate }
9590Sstevel@tonic-gate return (nvlist_add_nvpair(nvlist, pair));
9600Sstevel@tonic-gate }
9610Sstevel@tonic-gate
9620Sstevel@tonic-gate /*
9630Sstevel@tonic-gate * Common routine to remove property from the given list
9640Sstevel@tonic-gate */
9650Sstevel@tonic-gate int
pool_proprm_common(nvlist_t * nvlist,char * name,pool_property_t * props)9660Sstevel@tonic-gate pool_proprm_common(nvlist_t *nvlist, char *name, pool_property_t *props)
9670Sstevel@tonic-gate {
9680Sstevel@tonic-gate pool_property_t *prop;
9690Sstevel@tonic-gate
9700Sstevel@tonic-gate if ((prop = pool_property_find(name, props)) != NULL) {
9710Sstevel@tonic-gate if (!(prop->pp_perm & PP_OPTIONAL))
9720Sstevel@tonic-gate return (EINVAL);
9730Sstevel@tonic-gate }
9740Sstevel@tonic-gate return (nvlist_remove_all(nvlist, name));
9750Sstevel@tonic-gate }
9760Sstevel@tonic-gate
9770Sstevel@tonic-gate static int
pool_system_propput(nvpair_t * pair)9780Sstevel@tonic-gate pool_system_propput(nvpair_t *pair)
9790Sstevel@tonic-gate {
9800Sstevel@tonic-gate int ret;
9810Sstevel@tonic-gate
9820Sstevel@tonic-gate ASSERT(pool_lock_held());
9830Sstevel@tonic-gate ret = pool_propput_common(pool_sys_prop, pair, pool_prop_sys);
9840Sstevel@tonic-gate if (ret == 0)
9850Sstevel@tonic-gate pool_sys_mod = gethrtime();
9860Sstevel@tonic-gate return (ret);
9870Sstevel@tonic-gate }
9880Sstevel@tonic-gate
9890Sstevel@tonic-gate static int
pool_system_proprm(char * name)9900Sstevel@tonic-gate pool_system_proprm(char *name)
9910Sstevel@tonic-gate {
9920Sstevel@tonic-gate int ret;
9930Sstevel@tonic-gate
9940Sstevel@tonic-gate ASSERT(pool_lock_held());
9950Sstevel@tonic-gate ret = pool_proprm_common(pool_sys_prop, name, pool_prop_sys);
9960Sstevel@tonic-gate if (ret == 0)
9970Sstevel@tonic-gate pool_sys_mod = gethrtime();
9980Sstevel@tonic-gate return (ret);
9990Sstevel@tonic-gate }
10000Sstevel@tonic-gate
10010Sstevel@tonic-gate static int
pool_pool_propput(poolid_t poolid,nvpair_t * pair)10020Sstevel@tonic-gate pool_pool_propput(poolid_t poolid, nvpair_t *pair)
10030Sstevel@tonic-gate {
10040Sstevel@tonic-gate pool_t *pool;
10050Sstevel@tonic-gate int ret;
10060Sstevel@tonic-gate
10070Sstevel@tonic-gate ASSERT(pool_lock_held());
10080Sstevel@tonic-gate if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
10090Sstevel@tonic-gate return (ESRCH);
10100Sstevel@tonic-gate ret = pool_propput_common(pool->pool_props, pair, pool_prop_pool);
10110Sstevel@tonic-gate if (ret == 0)
10120Sstevel@tonic-gate pool_pool_mod = gethrtime();
10130Sstevel@tonic-gate return (ret);
10140Sstevel@tonic-gate }
10150Sstevel@tonic-gate
10160Sstevel@tonic-gate static int
pool_pool_proprm(poolid_t poolid,char * name)10170Sstevel@tonic-gate pool_pool_proprm(poolid_t poolid, char *name)
10180Sstevel@tonic-gate {
10190Sstevel@tonic-gate int ret;
10200Sstevel@tonic-gate pool_t *pool;
10210Sstevel@tonic-gate
10220Sstevel@tonic-gate ASSERT(pool_lock_held());
10230Sstevel@tonic-gate if ((pool = pool_lookup_pool_by_id(poolid)) == NULL)
10240Sstevel@tonic-gate return (ESRCH);
10250Sstevel@tonic-gate ret = pool_proprm_common(pool->pool_props, name, pool_prop_pool);
10260Sstevel@tonic-gate if (ret == 0)
10270Sstevel@tonic-gate pool_pool_mod = gethrtime();
10280Sstevel@tonic-gate return (ret);
10290Sstevel@tonic-gate }
10300Sstevel@tonic-gate
10310Sstevel@tonic-gate int
pool_propput(int class,int subclass,id_t id,nvpair_t * pair)10320Sstevel@tonic-gate pool_propput(int class, int subclass, id_t id, nvpair_t *pair)
10330Sstevel@tonic-gate {
10340Sstevel@tonic-gate int ret;
10350Sstevel@tonic-gate
10360Sstevel@tonic-gate ASSERT(pool_lock_held());
10370Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
10380Sstevel@tonic-gate return (ENOTACTIVE);
10390Sstevel@tonic-gate switch (class) {
10400Sstevel@tonic-gate case PEC_SYSTEM:
10410Sstevel@tonic-gate ret = pool_system_propput(pair);
10420Sstevel@tonic-gate break;
10430Sstevel@tonic-gate case PEC_POOL:
10440Sstevel@tonic-gate ret = pool_pool_propput((poolid_t)id, pair);
10450Sstevel@tonic-gate break;
10460Sstevel@tonic-gate case PEC_RES_COMP:
10470Sstevel@tonic-gate switch (subclass) {
10480Sstevel@tonic-gate case PREC_PSET:
10490Sstevel@tonic-gate ret = pool_pset_propput((psetid_t)id, pair);
10500Sstevel@tonic-gate break;
10510Sstevel@tonic-gate default:
10520Sstevel@tonic-gate ret = EINVAL;
10530Sstevel@tonic-gate }
10540Sstevel@tonic-gate break;
10550Sstevel@tonic-gate case PEC_RES_AGG:
10560Sstevel@tonic-gate ret = ENOTSUP;
10570Sstevel@tonic-gate break;
10580Sstevel@tonic-gate case PEC_COMP:
10590Sstevel@tonic-gate switch (subclass) {
10600Sstevel@tonic-gate case PCEC_CPU:
10610Sstevel@tonic-gate ret = pool_cpu_propput((processorid_t)id, pair);
10620Sstevel@tonic-gate break;
10630Sstevel@tonic-gate default:
10640Sstevel@tonic-gate ret = EINVAL;
10650Sstevel@tonic-gate }
10660Sstevel@tonic-gate break;
10670Sstevel@tonic-gate default:
10680Sstevel@tonic-gate ret = EINVAL;
10690Sstevel@tonic-gate }
10700Sstevel@tonic-gate return (ret);
10710Sstevel@tonic-gate }
10720Sstevel@tonic-gate
10730Sstevel@tonic-gate int
pool_proprm(int class,int subclass,id_t id,char * name)10740Sstevel@tonic-gate pool_proprm(int class, int subclass, id_t id, char *name)
10750Sstevel@tonic-gate {
10760Sstevel@tonic-gate int ret;
10770Sstevel@tonic-gate
10780Sstevel@tonic-gate ASSERT(pool_lock_held());
10790Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
10800Sstevel@tonic-gate return (ENOTACTIVE);
10810Sstevel@tonic-gate switch (class) {
10820Sstevel@tonic-gate case PEC_SYSTEM:
10830Sstevel@tonic-gate ret = pool_system_proprm(name);
10840Sstevel@tonic-gate break;
10850Sstevel@tonic-gate case PEC_POOL:
10860Sstevel@tonic-gate ret = pool_pool_proprm((poolid_t)id, name);
10870Sstevel@tonic-gate break;
10880Sstevel@tonic-gate case PEC_RES_COMP:
10890Sstevel@tonic-gate switch (subclass) {
10900Sstevel@tonic-gate case PREC_PSET:
10910Sstevel@tonic-gate ret = pool_pset_proprm((psetid_t)id, name);
10920Sstevel@tonic-gate break;
10930Sstevel@tonic-gate default:
10940Sstevel@tonic-gate ret = EINVAL;
10950Sstevel@tonic-gate }
10960Sstevel@tonic-gate break;
10970Sstevel@tonic-gate case PEC_RES_AGG:
10980Sstevel@tonic-gate ret = ENOTSUP;
10990Sstevel@tonic-gate break;
11000Sstevel@tonic-gate case PEC_COMP:
11010Sstevel@tonic-gate switch (subclass) {
11020Sstevel@tonic-gate case PCEC_CPU:
11030Sstevel@tonic-gate ret = pool_cpu_proprm((processorid_t)id, name);
11040Sstevel@tonic-gate break;
11050Sstevel@tonic-gate default:
11060Sstevel@tonic-gate ret = EINVAL;
11070Sstevel@tonic-gate }
11080Sstevel@tonic-gate break;
11090Sstevel@tonic-gate default:
11100Sstevel@tonic-gate ret = EINVAL;
11110Sstevel@tonic-gate }
11120Sstevel@tonic-gate return (ret);
11130Sstevel@tonic-gate }
11140Sstevel@tonic-gate
11150Sstevel@tonic-gate int
pool_propget(char * name,int class,int subclass,id_t id,nvlist_t ** nvlp)11160Sstevel@tonic-gate pool_propget(char *name, int class, int subclass, id_t id, nvlist_t **nvlp)
11170Sstevel@tonic-gate {
11180Sstevel@tonic-gate int ret;
11190Sstevel@tonic-gate nvlist_t *nvl;
11200Sstevel@tonic-gate
11210Sstevel@tonic-gate ASSERT(pool_lock_held());
11220Sstevel@tonic-gate if (pool_state == POOL_DISABLED)
11230Sstevel@tonic-gate return (ENOTACTIVE);
11240Sstevel@tonic-gate
11250Sstevel@tonic-gate (void) nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
11260Sstevel@tonic-gate
11270Sstevel@tonic-gate switch (class) {
11280Sstevel@tonic-gate case PEC_SYSTEM:
11290Sstevel@tonic-gate case PEC_POOL:
11300Sstevel@tonic-gate ret = EINVAL;
11310Sstevel@tonic-gate break;
11320Sstevel@tonic-gate case PEC_RES_COMP:
11330Sstevel@tonic-gate switch (subclass) {
11340Sstevel@tonic-gate case PREC_PSET:
11350Sstevel@tonic-gate ret = pool_pset_propget((psetid_t)id, name, nvl);
11360Sstevel@tonic-gate break;
11370Sstevel@tonic-gate default:
11380Sstevel@tonic-gate ret = EINVAL;
11390Sstevel@tonic-gate }
11400Sstevel@tonic-gate break;
11410Sstevel@tonic-gate case PEC_RES_AGG:
11420Sstevel@tonic-gate ret = ENOTSUP;
11430Sstevel@tonic-gate break;
11440Sstevel@tonic-gate case PEC_COMP:
11450Sstevel@tonic-gate switch (subclass) {
11460Sstevel@tonic-gate case PCEC_CPU:
11470Sstevel@tonic-gate ret = pool_cpu_propget((processorid_t)id, name, nvl);
11480Sstevel@tonic-gate break;
11490Sstevel@tonic-gate default:
11500Sstevel@tonic-gate ret = EINVAL;
11510Sstevel@tonic-gate }
11520Sstevel@tonic-gate break;
11530Sstevel@tonic-gate default:
11540Sstevel@tonic-gate ret = EINVAL;
11550Sstevel@tonic-gate }
11560Sstevel@tonic-gate if (ret == 0)
11570Sstevel@tonic-gate *nvlp = nvl;
11580Sstevel@tonic-gate else
11590Sstevel@tonic-gate nvlist_free(nvl);
11600Sstevel@tonic-gate return (ret);
11610Sstevel@tonic-gate }
11620Sstevel@tonic-gate
11630Sstevel@tonic-gate /*
11640Sstevel@tonic-gate * pool_bind_wake and pool_bind_wakeall are helper functions to undo PBWAITs
11650Sstevel@tonic-gate * in case of failure in pool_do_bind().
11660Sstevel@tonic-gate */
11670Sstevel@tonic-gate static void
pool_bind_wake(proc_t * p)11680Sstevel@tonic-gate pool_bind_wake(proc_t *p)
11690Sstevel@tonic-gate {
11700Sstevel@tonic-gate ASSERT(pool_lock_held());
11710Sstevel@tonic-gate
11720Sstevel@tonic-gate mutex_enter(&p->p_lock);
11730Sstevel@tonic-gate ASSERT(p->p_poolflag & PBWAIT);
11740Sstevel@tonic-gate if (p->p_poolcnt > 0) {
11750Sstevel@tonic-gate mutex_enter(&pool_barrier_lock);
11760Sstevel@tonic-gate pool_barrier_count -= p->p_poolcnt;
11770Sstevel@tonic-gate mutex_exit(&pool_barrier_lock);
11780Sstevel@tonic-gate }
11790Sstevel@tonic-gate p->p_poolflag &= ~PBWAIT;
11800Sstevel@tonic-gate cv_signal(&p->p_poolcv);
11810Sstevel@tonic-gate mutex_exit(&p->p_lock);
11820Sstevel@tonic-gate }
11830Sstevel@tonic-gate
11840Sstevel@tonic-gate static void
pool_bind_wakeall(proc_t ** procs)11850Sstevel@tonic-gate pool_bind_wakeall(proc_t **procs)
11860Sstevel@tonic-gate {
11870Sstevel@tonic-gate proc_t *p, **pp;
11880Sstevel@tonic-gate
11890Sstevel@tonic-gate ASSERT(pool_lock_held());
11900Sstevel@tonic-gate for (pp = procs; (p = *pp) != NULL; pp++)
11910Sstevel@tonic-gate pool_bind_wake(p);
11920Sstevel@tonic-gate }
11930Sstevel@tonic-gate
11940Sstevel@tonic-gate /*
11950Sstevel@tonic-gate * Return the scheduling class for this pool, or
11960Sstevel@tonic-gate * POOL_CLASS_UNSET if not set
11970Sstevel@tonic-gate * POOL_CLASS_INVAL if set to an invalid class ID.
11980Sstevel@tonic-gate */
11990Sstevel@tonic-gate id_t
pool_get_class(pool_t * pool)12000Sstevel@tonic-gate pool_get_class(pool_t *pool)
12010Sstevel@tonic-gate {
12020Sstevel@tonic-gate char *name;
12030Sstevel@tonic-gate id_t cid;
12040Sstevel@tonic-gate
12050Sstevel@tonic-gate ASSERT(pool_lock_held());
12060Sstevel@tonic-gate
12070Sstevel@tonic-gate if (nvlist_lookup_string(pool->pool_props, "pool.scheduler",
12080Sstevel@tonic-gate &name) == 0) {
12090Sstevel@tonic-gate if (getcidbyname(name, &cid) == 0)
12100Sstevel@tonic-gate return (cid);
12110Sstevel@tonic-gate else
12120Sstevel@tonic-gate return (POOL_CLASS_INVAL);
12130Sstevel@tonic-gate }
12140Sstevel@tonic-gate return (POOL_CLASS_UNSET);
12150Sstevel@tonic-gate }
12160Sstevel@tonic-gate
12170Sstevel@tonic-gate /*
12180Sstevel@tonic-gate * Move process to the new scheduling class.
12190Sstevel@tonic-gate */
12200Sstevel@tonic-gate static void
pool_change_class(proc_t * p,id_t cid)12210Sstevel@tonic-gate pool_change_class(proc_t *p, id_t cid)
12220Sstevel@tonic-gate {
12230Sstevel@tonic-gate kthread_t *t;
12240Sstevel@tonic-gate void *cldata;
12250Sstevel@tonic-gate id_t oldcid;
12260Sstevel@tonic-gate void **bufs;
12270Sstevel@tonic-gate void **buf;
12280Sstevel@tonic-gate int nlwp;
12290Sstevel@tonic-gate int ret;
12300Sstevel@tonic-gate int i;
12310Sstevel@tonic-gate
12320Sstevel@tonic-gate /*
12330Sstevel@tonic-gate * Do not move kernel processes (such as zsched).
12340Sstevel@tonic-gate */
12350Sstevel@tonic-gate if (p->p_flag & SSYS)
12360Sstevel@tonic-gate return;
12370Sstevel@tonic-gate /*
12380Sstevel@tonic-gate * This process is in the pool barrier, so it can't possibly be
12390Sstevel@tonic-gate * adding new threads and we can use p_lwpcnt + p_zombcnt + 1
12400Sstevel@tonic-gate * (for possible agent LWP which doesn't use pool barrier) as
12410Sstevel@tonic-gate * our upper bound.
12420Sstevel@tonic-gate */
12430Sstevel@tonic-gate nlwp = p->p_lwpcnt + p->p_zombcnt + 1;
12440Sstevel@tonic-gate
12450Sstevel@tonic-gate /*
12460Sstevel@tonic-gate * Pre-allocate scheduling class specific buffers before
12470Sstevel@tonic-gate * grabbing p_lock.
12480Sstevel@tonic-gate */
12490Sstevel@tonic-gate bufs = kmem_zalloc(nlwp * sizeof (void *), KM_SLEEP);
12500Sstevel@tonic-gate for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
12510Sstevel@tonic-gate ret = CL_ALLOC(buf, cid, KM_SLEEP);
12520Sstevel@tonic-gate ASSERT(ret == 0);
12530Sstevel@tonic-gate }
12540Sstevel@tonic-gate
12550Sstevel@tonic-gate /*
12560Sstevel@tonic-gate * Move threads one by one to the new scheduling class.
12570Sstevel@tonic-gate * This never fails because we have all the right
12580Sstevel@tonic-gate * privileges here.
12590Sstevel@tonic-gate */
12600Sstevel@tonic-gate mutex_enter(&p->p_lock);
12610Sstevel@tonic-gate ASSERT(p->p_poolflag & PBWAIT);
12620Sstevel@tonic-gate buf = bufs;
12630Sstevel@tonic-gate t = p->p_tlist;
12640Sstevel@tonic-gate ASSERT(t != NULL);
12650Sstevel@tonic-gate do {
12660Sstevel@tonic-gate if (t->t_cid != cid) {
12670Sstevel@tonic-gate oldcid = t->t_cid;
12680Sstevel@tonic-gate cldata = t->t_cldata;
12690Sstevel@tonic-gate ret = CL_ENTERCLASS(t, cid, NULL, NULL, *buf);
12700Sstevel@tonic-gate ASSERT(ret == 0);
12710Sstevel@tonic-gate CL_EXITCLASS(oldcid, cldata);
12726247Sraf schedctl_set_cidpri(t);
12730Sstevel@tonic-gate *buf++ = NULL;
12740Sstevel@tonic-gate }
12750Sstevel@tonic-gate } while ((t = t->t_forw) != p->p_tlist);
12760Sstevel@tonic-gate mutex_exit(&p->p_lock);
12770Sstevel@tonic-gate /*
12780Sstevel@tonic-gate * Free unused scheduling class specific buffers.
12790Sstevel@tonic-gate */
12800Sstevel@tonic-gate for (i = 0, buf = bufs; i < nlwp; i++, buf++) {
12810Sstevel@tonic-gate if (*buf != NULL) {
12820Sstevel@tonic-gate CL_FREE(cid, *buf);
12830Sstevel@tonic-gate *buf = NULL;
12840Sstevel@tonic-gate }
12850Sstevel@tonic-gate }
12860Sstevel@tonic-gate kmem_free(bufs, nlwp * sizeof (void *));
12870Sstevel@tonic-gate }
12880Sstevel@tonic-gate
128911878SVenu.Iyer@Sun.COM void
pool_get_name(pool_t * pool,char ** name)129011878SVenu.Iyer@Sun.COM pool_get_name(pool_t *pool, char **name)
129111878SVenu.Iyer@Sun.COM {
129211878SVenu.Iyer@Sun.COM ASSERT(pool_lock_held());
129311878SVenu.Iyer@Sun.COM
129411878SVenu.Iyer@Sun.COM (void) nvlist_lookup_string(pool->pool_props, "pool.name", name);
129511878SVenu.Iyer@Sun.COM
129611878SVenu.Iyer@Sun.COM ASSERT(strlen(*name) != 0);
129711878SVenu.Iyer@Sun.COM }
129811878SVenu.Iyer@Sun.COM
129911878SVenu.Iyer@Sun.COM
13000Sstevel@tonic-gate /*
13010Sstevel@tonic-gate * The meat of the bind operation. The steps in pool_do_bind are:
13020Sstevel@tonic-gate *
13030Sstevel@tonic-gate * 1) Set PBWAIT in the p_poolflag of any process of interest, and add all
13040Sstevel@tonic-gate * such processes to an array. For any interesting process that has
13050Sstevel@tonic-gate * threads inside the pool barrier set, increment a counter by the
13060Sstevel@tonic-gate * count of such threads. Once PBWAIT is set on a process, that process
13070Sstevel@tonic-gate * will not disappear.
13080Sstevel@tonic-gate *
13090Sstevel@tonic-gate * 2) Wait for the counter from step 2 to drop to zero. Any process which
13100Sstevel@tonic-gate * calls pool_barrier_exit() and notices that PBWAIT has been set on it
13110Sstevel@tonic-gate * will decrement that counter before going to sleep, and the process
13120Sstevel@tonic-gate * calling pool_barrier_exit() which does the final decrement will wake us.
13130Sstevel@tonic-gate *
13140Sstevel@tonic-gate * 3) For each interesting process, perform a calculation on it to see if
13150Sstevel@tonic-gate * the bind will actually succeed. This uses the following three
13160Sstevel@tonic-gate * resource-set-specific functions:
13170Sstevel@tonic-gate *
13180Sstevel@tonic-gate * - int set_bind_start(procs, pool)
13190Sstevel@tonic-gate *
13200Sstevel@tonic-gate * Determine whether the given array of processes can be bound to the
13210Sstevel@tonic-gate * resource set associated with the given pool. If it can, take and hold
13220Sstevel@tonic-gate * any locks necessary to ensure that the operation will succeed, and
13230Sstevel@tonic-gate * make any necessary reservations in the target resource set. If it
13240Sstevel@tonic-gate * can't, return failure with no reservations made and no new locks held.
13250Sstevel@tonic-gate *
13260Sstevel@tonic-gate * - void set_bind_abort(procs, pool)
13270Sstevel@tonic-gate *
13280Sstevel@tonic-gate * set_bind_start() has completed successfully, but another resource set's
13290Sstevel@tonic-gate * set_bind_start() has failed, and we haven't begun the bind yet. Undo
13300Sstevel@tonic-gate * any reservations made and drop any locks acquired by our
13310Sstevel@tonic-gate * set_bind_start().
13320Sstevel@tonic-gate *
13330Sstevel@tonic-gate * - void set_bind_finish(void)
13340Sstevel@tonic-gate *
13350Sstevel@tonic-gate * The bind has completed successfully. The processes have been released,
13360Sstevel@tonic-gate * and the reservation acquired in set_bind_start() has been depleted as
13370Sstevel@tonic-gate * the processes have finished their bindings. Drop any locks acquired by
13380Sstevel@tonic-gate * set_bind_start().
13390Sstevel@tonic-gate *
13400Sstevel@tonic-gate * 4) If we've decided that we can proceed with the bind, iterate through
13410Sstevel@tonic-gate * the list of interesting processes, grab the necessary locks (which
13420Sstevel@tonic-gate * may differ per resource set), perform the bind, and ASSERT that it
13430Sstevel@tonic-gate * succeeds. Once a process has been rebound, it can be awakened.
13440Sstevel@tonic-gate *
13450Sstevel@tonic-gate * The operations from step 4 must be kept in sync with anything which might
13460Sstevel@tonic-gate * cause the bind operations (e.g., cpupart_bind_thread()) to fail, and
13470Sstevel@tonic-gate * are thus located in the same source files as the associated bind operations.
13480Sstevel@tonic-gate */
13490Sstevel@tonic-gate int
pool_do_bind(pool_t * pool,idtype_t idtype,id_t id,int flags)13500Sstevel@tonic-gate pool_do_bind(pool_t *pool, idtype_t idtype, id_t id, int flags)
13510Sstevel@tonic-gate {
13520Sstevel@tonic-gate extern uint_t nproc;
13530Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
13540Sstevel@tonic-gate proc_t **pp, **procs;
13550Sstevel@tonic-gate proc_t *prstart;
13560Sstevel@tonic-gate int procs_count = 0;
13570Sstevel@tonic-gate kproject_t *kpj;
13580Sstevel@tonic-gate procset_t set;
13590Sstevel@tonic-gate zone_t *zone;
13600Sstevel@tonic-gate int procs_size;
13610Sstevel@tonic-gate int rv = 0;
13620Sstevel@tonic-gate proc_t *p;
13630Sstevel@tonic-gate id_t cid = -1;
13640Sstevel@tonic-gate
13650Sstevel@tonic-gate ASSERT(pool_lock_held());
13660Sstevel@tonic-gate
13670Sstevel@tonic-gate if ((cid = pool_get_class(pool)) == POOL_CLASS_INVAL)
13680Sstevel@tonic-gate return (EINVAL);
13690Sstevel@tonic-gate
13700Sstevel@tonic-gate if (idtype == P_ZONEID) {
13710Sstevel@tonic-gate zone = zone_find_by_id(id);
13720Sstevel@tonic-gate if (zone == NULL)
13730Sstevel@tonic-gate return (ESRCH);
13740Sstevel@tonic-gate if (zone_status_get(zone) > ZONE_IS_RUNNING) {
13750Sstevel@tonic-gate zone_rele(zone);
13760Sstevel@tonic-gate return (EBUSY);
13770Sstevel@tonic-gate }
13780Sstevel@tonic-gate }
13790Sstevel@tonic-gate
13800Sstevel@tonic-gate if (idtype == P_PROJID) {
13813247Sgjelinek kpj = project_hold_by_id(id, global_zone, PROJECT_HOLD_FIND);
13820Sstevel@tonic-gate if (kpj == NULL)
13830Sstevel@tonic-gate return (ESRCH);
13840Sstevel@tonic-gate mutex_enter(&kpj->kpj_poolbind);
13850Sstevel@tonic-gate }
13860Sstevel@tonic-gate
13870Sstevel@tonic-gate if (idtype == P_PID) {
13880Sstevel@tonic-gate /*
13890Sstevel@tonic-gate * Fast-path for a single process case.
13900Sstevel@tonic-gate */
13910Sstevel@tonic-gate procs_size = 2; /* procs is NULL-terminated */
13920Sstevel@tonic-gate procs = kmem_zalloc(procs_size * sizeof (proc_t *), KM_SLEEP);
13930Sstevel@tonic-gate mutex_enter(&pidlock);
13940Sstevel@tonic-gate } else {
13950Sstevel@tonic-gate /*
13960Sstevel@tonic-gate * We will need enough slots for proc_t pointers for as many as
13970Sstevel@tonic-gate * twice the number of currently running processes (assuming
13980Sstevel@tonic-gate * that each one could be in fork() creating a new child).
13990Sstevel@tonic-gate */
14000Sstevel@tonic-gate for (;;) {
14010Sstevel@tonic-gate procs_size = nproc * 2;
14020Sstevel@tonic-gate procs = kmem_zalloc(procs_size * sizeof (proc_t *),
14030Sstevel@tonic-gate KM_SLEEP);
14040Sstevel@tonic-gate mutex_enter(&pidlock);
14050Sstevel@tonic-gate
14060Sstevel@tonic-gate if (nproc * 2 <= procs_size)
14070Sstevel@tonic-gate break;
14080Sstevel@tonic-gate /*
14090Sstevel@tonic-gate * If nproc has changed, try again.
14100Sstevel@tonic-gate */
14110Sstevel@tonic-gate mutex_exit(&pidlock);
14120Sstevel@tonic-gate kmem_free(procs, procs_size * sizeof (proc_t *));
14130Sstevel@tonic-gate }
14140Sstevel@tonic-gate }
14150Sstevel@tonic-gate
14160Sstevel@tonic-gate if (id == P_MYID)
14170Sstevel@tonic-gate id = getmyid(idtype);
14180Sstevel@tonic-gate setprocset(&set, POP_AND, idtype, id, P_ALL, 0);
14190Sstevel@tonic-gate
14200Sstevel@tonic-gate /*
14210Sstevel@tonic-gate * Do a first scan, and select target processes.
14220Sstevel@tonic-gate */
14230Sstevel@tonic-gate if (idtype == P_PID)
14240Sstevel@tonic-gate prstart = prfind(id);
14250Sstevel@tonic-gate else
14260Sstevel@tonic-gate prstart = practive;
14270Sstevel@tonic-gate for (p = prstart, pp = procs; p != NULL; p = p->p_next) {
14280Sstevel@tonic-gate mutex_enter(&p->p_lock);
14290Sstevel@tonic-gate /*
14300Sstevel@tonic-gate * Skip processes that don't match our (id, idtype) set or
14310Sstevel@tonic-gate * on the way of becoming zombies. Skip kernel processes
14320Sstevel@tonic-gate * from the global zone.
14330Sstevel@tonic-gate */
14340Sstevel@tonic-gate if (procinset(p, &set) == 0 ||
14350Sstevel@tonic-gate p->p_poolflag & PEXITED ||
14360Sstevel@tonic-gate ((p->p_flag & SSYS) && INGLOBALZONE(p))) {
14370Sstevel@tonic-gate mutex_exit(&p->p_lock);
14380Sstevel@tonic-gate continue;
14390Sstevel@tonic-gate }
14400Sstevel@tonic-gate if (!INGLOBALZONE(p)) {
14410Sstevel@tonic-gate switch (idtype) {
14420Sstevel@tonic-gate case P_PID:
14430Sstevel@tonic-gate case P_TASKID:
14440Sstevel@tonic-gate /*
14450Sstevel@tonic-gate * Can't bind processes or tasks
14460Sstevel@tonic-gate * in local zones to pools.
14470Sstevel@tonic-gate */
14480Sstevel@tonic-gate mutex_exit(&p->p_lock);
14490Sstevel@tonic-gate mutex_exit(&pidlock);
14500Sstevel@tonic-gate pool_bind_wakeall(procs);
14510Sstevel@tonic-gate rv = EINVAL;
14520Sstevel@tonic-gate goto out;
14530Sstevel@tonic-gate case P_PROJID:
14540Sstevel@tonic-gate /*
14550Sstevel@tonic-gate * Only projects in the global
14560Sstevel@tonic-gate * zone can be rebound.
14570Sstevel@tonic-gate */
14580Sstevel@tonic-gate mutex_exit(&p->p_lock);
14590Sstevel@tonic-gate continue;
14600Sstevel@tonic-gate case P_POOLID:
14610Sstevel@tonic-gate /*
14620Sstevel@tonic-gate * When rebinding pools, processes can be
14630Sstevel@tonic-gate * in different zones.
14640Sstevel@tonic-gate */
14650Sstevel@tonic-gate break;
14660Sstevel@tonic-gate }
14670Sstevel@tonic-gate }
14680Sstevel@tonic-gate
14690Sstevel@tonic-gate p->p_poolflag |= PBWAIT;
14700Sstevel@tonic-gate /*
14710Sstevel@tonic-gate * If some threads in this process are inside the pool
14720Sstevel@tonic-gate * barrier, add them to pool_barrier_count, as we have
14730Sstevel@tonic-gate * to wait for all of them to exit the barrier.
14740Sstevel@tonic-gate */
14750Sstevel@tonic-gate if (p->p_poolcnt > 0) {
14760Sstevel@tonic-gate mutex_enter(&pool_barrier_lock);
14770Sstevel@tonic-gate pool_barrier_count += p->p_poolcnt;
14780Sstevel@tonic-gate mutex_exit(&pool_barrier_lock);
14790Sstevel@tonic-gate }
14800Sstevel@tonic-gate ASSERT(pp < &procs[procs_size]);
14810Sstevel@tonic-gate *pp++ = p;
14820Sstevel@tonic-gate procs_count++;
14830Sstevel@tonic-gate mutex_exit(&p->p_lock);
14840Sstevel@tonic-gate
14850Sstevel@tonic-gate /*
14860Sstevel@tonic-gate * We just found our process, so if we're only rebinding a
14870Sstevel@tonic-gate * single process then get out of this loop.
14880Sstevel@tonic-gate */
14890Sstevel@tonic-gate if (idtype == P_PID)
14900Sstevel@tonic-gate break;
14910Sstevel@tonic-gate }
14920Sstevel@tonic-gate *pp = NULL; /* cap off the end of the array */
14930Sstevel@tonic-gate mutex_exit(&pidlock);
14940Sstevel@tonic-gate
14950Sstevel@tonic-gate /*
14960Sstevel@tonic-gate * Wait for relevant processes to stop before they try to enter the
14970Sstevel@tonic-gate * barrier or at the exit from the barrier. Make sure that we do
14980Sstevel@tonic-gate * not get stopped here while we're holding pool_lock. If we were
14990Sstevel@tonic-gate * requested to stop, or got a signal then return EAGAIN to let the
15000Sstevel@tonic-gate * library know that it needs to retry.
15010Sstevel@tonic-gate */
15020Sstevel@tonic-gate mutex_enter(&pool_barrier_lock);
15030Sstevel@tonic-gate lwp->lwp_nostop++;
15040Sstevel@tonic-gate while (pool_barrier_count > 0) {
15050Sstevel@tonic-gate (void) cv_wait_sig(&pool_barrier_cv, &pool_barrier_lock);
15060Sstevel@tonic-gate if (pool_barrier_count > 0) {
15070Sstevel@tonic-gate /*
15080Sstevel@tonic-gate * We either got a signal or were requested to
15090Sstevel@tonic-gate * stop by /proc. Bail out with EAGAIN. If we were
15100Sstevel@tonic-gate * requested to stop, we'll stop in post_syscall()
15110Sstevel@tonic-gate * on our way back to userland.
15120Sstevel@tonic-gate */
15130Sstevel@tonic-gate mutex_exit(&pool_barrier_lock);
15140Sstevel@tonic-gate pool_bind_wakeall(procs);
15150Sstevel@tonic-gate lwp->lwp_nostop--;
15160Sstevel@tonic-gate rv = EAGAIN;
15170Sstevel@tonic-gate goto out;
15180Sstevel@tonic-gate }
15190Sstevel@tonic-gate }
15200Sstevel@tonic-gate lwp->lwp_nostop--;
15210Sstevel@tonic-gate mutex_exit(&pool_barrier_lock);
15220Sstevel@tonic-gate
15239418SSurya.Prakki@Sun.COM if (idtype == P_PID) {
15249418SSurya.Prakki@Sun.COM if ((p = *procs) == NULL)
15259418SSurya.Prakki@Sun.COM goto skip;
15269418SSurya.Prakki@Sun.COM mutex_enter(&p->p_lock);
15279418SSurya.Prakki@Sun.COM /* Drop the process if it is exiting */
15289418SSurya.Prakki@Sun.COM if (p->p_poolflag & PEXITED) {
15299418SSurya.Prakki@Sun.COM mutex_exit(&p->p_lock);
15309418SSurya.Prakki@Sun.COM pool_bind_wake(p);
15319418SSurya.Prakki@Sun.COM procs_count--;
15329418SSurya.Prakki@Sun.COM } else
15339418SSurya.Prakki@Sun.COM mutex_exit(&p->p_lock);
15340Sstevel@tonic-gate goto skip;
15359418SSurya.Prakki@Sun.COM }
15360Sstevel@tonic-gate
15370Sstevel@tonic-gate /*
15380Sstevel@tonic-gate * Do another run, and drop processes that were inside the barrier
15390Sstevel@tonic-gate * in exit(), but when they have dropped to pool_barrier_exit
15400Sstevel@tonic-gate * they have become of no interest to us. Pick up child processes that
15410Sstevel@tonic-gate * were created by fork() but didn't exist during our first scan.
15420Sstevel@tonic-gate * Their parents are now stopped at pool_barrier_exit in cfork().
15430Sstevel@tonic-gate */
15440Sstevel@tonic-gate mutex_enter(&pidlock);
15450Sstevel@tonic-gate for (pp = procs; (p = *pp) != NULL; pp++) {
15469418SSurya.Prakki@Sun.COM mutex_enter(&p->p_lock);
15470Sstevel@tonic-gate if (p->p_poolflag & PEXITED) {
15480Sstevel@tonic-gate ASSERT(p->p_lwpcnt == 0);
15499418SSurya.Prakki@Sun.COM mutex_exit(&p->p_lock);
15500Sstevel@tonic-gate pool_bind_wake(p);
15510Sstevel@tonic-gate /* flip w/last non-NULL slot */
15520Sstevel@tonic-gate *pp = procs[procs_count - 1];
15530Sstevel@tonic-gate procs[procs_count - 1] = NULL;
15540Sstevel@tonic-gate procs_count--;
15550Sstevel@tonic-gate pp--; /* try this slot again */
15560Sstevel@tonic-gate continue;
15579418SSurya.Prakki@Sun.COM } else
15589418SSurya.Prakki@Sun.COM mutex_exit(&p->p_lock);
15590Sstevel@tonic-gate /*
15600Sstevel@tonic-gate * Look at the child and check if it should be rebound also.
15610Sstevel@tonic-gate * We're holding pidlock, so it is safe to reference p_child.
15620Sstevel@tonic-gate */
15630Sstevel@tonic-gate if ((p = p->p_child) == NULL)
15640Sstevel@tonic-gate continue;
15650Sstevel@tonic-gate
15660Sstevel@tonic-gate mutex_enter(&p->p_lock);
15678067SJordan.Vaughan@Sun.com
15680Sstevel@tonic-gate /*
15698067SJordan.Vaughan@Sun.com * Skip system processes and make sure that the child is in
15708067SJordan.Vaughan@Sun.com * the same task/project/pool/zone as the parent.
15710Sstevel@tonic-gate */
15728067SJordan.Vaughan@Sun.com if ((!INGLOBALZONE(p) && idtype != P_ZONEID &&
15738067SJordan.Vaughan@Sun.com idtype != P_POOLID) || p->p_flag & SSYS) {
15740Sstevel@tonic-gate mutex_exit(&p->p_lock);
15750Sstevel@tonic-gate continue;
15760Sstevel@tonic-gate }
15770Sstevel@tonic-gate
15780Sstevel@tonic-gate /*
15790Sstevel@tonic-gate * If the child process has been already created by fork(), has
15800Sstevel@tonic-gate * not exited, and has not been added to the list already,
15810Sstevel@tonic-gate * then add it now. We will hit this process again (since we
15820Sstevel@tonic-gate * stick it at the end of the procs list) but it will ignored
15830Sstevel@tonic-gate * because it will have the PBWAIT flag set.
15840Sstevel@tonic-gate */
15850Sstevel@tonic-gate if (procinset(p, &set) &&
15860Sstevel@tonic-gate !(p->p_poolflag & PEXITED) &&
15870Sstevel@tonic-gate !(p->p_poolflag & PBWAIT)) {
15880Sstevel@tonic-gate ASSERT(p->p_child == NULL); /* no child of a child */
15890Sstevel@tonic-gate procs[procs_count] = p;
15900Sstevel@tonic-gate procs[procs_count + 1] = NULL;
15910Sstevel@tonic-gate procs_count++;
15920Sstevel@tonic-gate p->p_poolflag |= PBWAIT;
15930Sstevel@tonic-gate }
15940Sstevel@tonic-gate mutex_exit(&p->p_lock);
15950Sstevel@tonic-gate }
15960Sstevel@tonic-gate mutex_exit(&pidlock);
15970Sstevel@tonic-gate skip:
15980Sstevel@tonic-gate /*
15990Sstevel@tonic-gate * If there's no processes to rebind then return ESRCH, unless
16000Sstevel@tonic-gate * we're associating a pool with new resource set, destroying it,
16010Sstevel@tonic-gate * or binding a zone to a pool.
16020Sstevel@tonic-gate */
16030Sstevel@tonic-gate if (procs_count == 0) {
16040Sstevel@tonic-gate if (idtype == P_POOLID || idtype == P_ZONEID)
16050Sstevel@tonic-gate rv = 0;
16060Sstevel@tonic-gate else
16070Sstevel@tonic-gate rv = ESRCH;
16080Sstevel@tonic-gate goto out;
16090Sstevel@tonic-gate }
16100Sstevel@tonic-gate
16110Sstevel@tonic-gate #ifdef DEBUG
16120Sstevel@tonic-gate /*
16136618Srh87107 * All processes in the array should have PBWAIT set, and none
16146618Srh87107 * should be in the critical section. Thus, although p_poolflag
16156618Srh87107 * and p_poolcnt are protected by p_lock, their ASSERTions below
16166618Srh87107 * should be stable without it. procinset(), however, ASSERTs that
16176618Srh87107 * the p_lock is held upon entry.
16180Sstevel@tonic-gate */
16190Sstevel@tonic-gate for (pp = procs; (p = *pp) != NULL; pp++) {
16206618Srh87107 int in_set;
16216618Srh87107
16226618Srh87107 mutex_enter(&p->p_lock);
16236618Srh87107 in_set = procinset(p, &set);
16246618Srh87107 mutex_exit(&p->p_lock);
16256618Srh87107
16266618Srh87107 ASSERT(in_set);
16270Sstevel@tonic-gate ASSERT(p->p_poolflag & PBWAIT);
16280Sstevel@tonic-gate ASSERT(p->p_poolcnt == 0);
16290Sstevel@tonic-gate }
16300Sstevel@tonic-gate #endif
16310Sstevel@tonic-gate
16320Sstevel@tonic-gate /*
16330Sstevel@tonic-gate * Do the check if processor set rebinding is going to succeed or not.
16340Sstevel@tonic-gate */
16350Sstevel@tonic-gate if ((flags & POOL_BIND_PSET) &&
16360Sstevel@tonic-gate (rv = pset_bind_start(procs, pool)) != 0) {
16370Sstevel@tonic-gate pool_bind_wakeall(procs);
16380Sstevel@tonic-gate goto out;
16390Sstevel@tonic-gate }
16400Sstevel@tonic-gate
16410Sstevel@tonic-gate /*
16420Sstevel@tonic-gate * At this point, all bind operations should succeed.
16430Sstevel@tonic-gate */
16440Sstevel@tonic-gate for (pp = procs; (p = *pp) != NULL; pp++) {
16450Sstevel@tonic-gate if (flags & POOL_BIND_PSET) {
16460Sstevel@tonic-gate psetid_t psetid = pool->pool_pset->pset_id;
16470Sstevel@tonic-gate void *zonebuf;
16480Sstevel@tonic-gate void *projbuf;
16490Sstevel@tonic-gate
16500Sstevel@tonic-gate /*
16510Sstevel@tonic-gate * Pre-allocate one buffer for FSS (per-project
16520Sstevel@tonic-gate * buffer for a new pset) in case if this is the
16530Sstevel@tonic-gate * first thread from its current project getting
16540Sstevel@tonic-gate * bound to this processor set.
16550Sstevel@tonic-gate */
16560Sstevel@tonic-gate projbuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_PROJ);
16570Sstevel@tonic-gate zonebuf = fss_allocbuf(FSS_ONE_BUF, FSS_ALLOC_ZONE);
16580Sstevel@tonic-gate
16590Sstevel@tonic-gate mutex_enter(&pidlock);
16600Sstevel@tonic-gate mutex_enter(&p->p_lock);
16610Sstevel@tonic-gate pool_pset_bind(p, psetid, projbuf, zonebuf);
16620Sstevel@tonic-gate mutex_exit(&p->p_lock);
16630Sstevel@tonic-gate mutex_exit(&pidlock);
16640Sstevel@tonic-gate /*
16650Sstevel@tonic-gate * Free buffers pre-allocated above if it
16660Sstevel@tonic-gate * wasn't actually used.
16670Sstevel@tonic-gate */
16680Sstevel@tonic-gate fss_freebuf(projbuf, FSS_ALLOC_PROJ);
16690Sstevel@tonic-gate fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
16700Sstevel@tonic-gate }
16710Sstevel@tonic-gate /*
16720Sstevel@tonic-gate * Now let's change the scheduling class of this
16730Sstevel@tonic-gate * process if our target pool has it defined.
16740Sstevel@tonic-gate */
16750Sstevel@tonic-gate if (cid != POOL_CLASS_UNSET)
16760Sstevel@tonic-gate pool_change_class(p, cid);
16770Sstevel@tonic-gate
16780Sstevel@tonic-gate /*
16790Sstevel@tonic-gate * It is safe to reference p_pool here without holding
16800Sstevel@tonic-gate * p_lock because it cannot change underneath of us.
16810Sstevel@tonic-gate * We're holding pool_lock here, so nobody else can be
16820Sstevel@tonic-gate * moving this process between pools. If process "p"
16830Sstevel@tonic-gate * would be exiting, we're guaranteed that it would be blocked
16840Sstevel@tonic-gate * at pool_barrier_enter() in exit(). Otherwise, it would've
16850Sstevel@tonic-gate * been skipped by one of our scans of the practive list
16860Sstevel@tonic-gate * as a process with PEXITED flag set.
16870Sstevel@tonic-gate */
16880Sstevel@tonic-gate if (p->p_pool != pool) {
16890Sstevel@tonic-gate ASSERT(p->p_pool->pool_ref > 0);
16900Sstevel@tonic-gate atomic_add_32(&p->p_pool->pool_ref, -1);
16910Sstevel@tonic-gate p->p_pool = pool;
16920Sstevel@tonic-gate atomic_add_32(&p->p_pool->pool_ref, 1);
16930Sstevel@tonic-gate }
16940Sstevel@tonic-gate /*
16950Sstevel@tonic-gate * Okay, we've tortured this guy enough.
16960Sstevel@tonic-gate * Let this poor process go now.
16970Sstevel@tonic-gate */
16980Sstevel@tonic-gate pool_bind_wake(p);
16990Sstevel@tonic-gate }
17000Sstevel@tonic-gate if (flags & POOL_BIND_PSET)
17010Sstevel@tonic-gate pset_bind_finish();
17020Sstevel@tonic-gate
17030Sstevel@tonic-gate out: switch (idtype) {
17040Sstevel@tonic-gate case P_PROJID:
17050Sstevel@tonic-gate ASSERT(kpj != NULL);
17060Sstevel@tonic-gate mutex_exit(&kpj->kpj_poolbind);
17070Sstevel@tonic-gate project_rele(kpj);
17080Sstevel@tonic-gate break;
17090Sstevel@tonic-gate case P_ZONEID:
17100Sstevel@tonic-gate if (rv == 0) {
17110Sstevel@tonic-gate mutex_enter(&cpu_lock);
17120Sstevel@tonic-gate zone_pool_set(zone, pool);
17130Sstevel@tonic-gate mutex_exit(&cpu_lock);
17140Sstevel@tonic-gate }
17150Sstevel@tonic-gate zone->zone_pool_mod = gethrtime();
17160Sstevel@tonic-gate zone_rele(zone);
17170Sstevel@tonic-gate break;
17180Sstevel@tonic-gate }
17190Sstevel@tonic-gate
17200Sstevel@tonic-gate kmem_free(procs, procs_size * sizeof (proc_t *));
17210Sstevel@tonic-gate ASSERT(pool_barrier_count == 0);
17220Sstevel@tonic-gate return (rv);
17230Sstevel@tonic-gate }
172411878SVenu.Iyer@Sun.COM
172511878SVenu.Iyer@Sun.COM void
pool_event_cb_register(pool_event_cb_t * cb)172611878SVenu.Iyer@Sun.COM pool_event_cb_register(pool_event_cb_t *cb)
172711878SVenu.Iyer@Sun.COM {
172811878SVenu.Iyer@Sun.COM ASSERT(!pool_lock_held() || panicstr);
172911878SVenu.Iyer@Sun.COM ASSERT(cb->pec_func != NULL);
173011878SVenu.Iyer@Sun.COM
173111878SVenu.Iyer@Sun.COM mutex_enter(&pool_event_cb_lock);
173211878SVenu.Iyer@Sun.COM if (!pool_event_cb_init) {
173311878SVenu.Iyer@Sun.COM list_create(&pool_event_cb_list, sizeof (pool_event_cb_t),
173411878SVenu.Iyer@Sun.COM offsetof(pool_event_cb_t, pec_list));
173511878SVenu.Iyer@Sun.COM pool_event_cb_init = B_TRUE;
173611878SVenu.Iyer@Sun.COM }
173711878SVenu.Iyer@Sun.COM list_insert_tail(&pool_event_cb_list, cb);
173811878SVenu.Iyer@Sun.COM mutex_exit(&pool_event_cb_lock);
173911878SVenu.Iyer@Sun.COM }
174011878SVenu.Iyer@Sun.COM
174111878SVenu.Iyer@Sun.COM void
pool_event_cb_unregister(pool_event_cb_t * cb)174211878SVenu.Iyer@Sun.COM pool_event_cb_unregister(pool_event_cb_t *cb)
174311878SVenu.Iyer@Sun.COM {
174411878SVenu.Iyer@Sun.COM ASSERT(!pool_lock_held() || panicstr);
174511878SVenu.Iyer@Sun.COM
174611878SVenu.Iyer@Sun.COM mutex_enter(&pool_event_cb_lock);
174711878SVenu.Iyer@Sun.COM list_remove(&pool_event_cb_list, cb);
174811878SVenu.Iyer@Sun.COM mutex_exit(&pool_event_cb_lock);
174911878SVenu.Iyer@Sun.COM }
175011878SVenu.Iyer@Sun.COM
175111878SVenu.Iyer@Sun.COM typedef struct {
175211878SVenu.Iyer@Sun.COM pool_event_t tqd_what;
175311878SVenu.Iyer@Sun.COM poolid_t tqd_id;
175411878SVenu.Iyer@Sun.COM } pool_tqd_t;
175511878SVenu.Iyer@Sun.COM
175611878SVenu.Iyer@Sun.COM void
pool_event_notify(void * arg)175711878SVenu.Iyer@Sun.COM pool_event_notify(void *arg)
175811878SVenu.Iyer@Sun.COM {
175911878SVenu.Iyer@Sun.COM pool_tqd_t *tqd = (pool_tqd_t *)arg;
176011878SVenu.Iyer@Sun.COM pool_event_cb_t *cb;
176111878SVenu.Iyer@Sun.COM
176211878SVenu.Iyer@Sun.COM ASSERT(!pool_lock_held() || panicstr);
176311878SVenu.Iyer@Sun.COM
176411878SVenu.Iyer@Sun.COM mutex_enter(&pool_event_cb_lock);
176511878SVenu.Iyer@Sun.COM for (cb = list_head(&pool_event_cb_list); cb != NULL;
176611878SVenu.Iyer@Sun.COM cb = list_next(&pool_event_cb_list, cb)) {
176711878SVenu.Iyer@Sun.COM cb->pec_func(tqd->tqd_what, tqd->tqd_id, cb->pec_arg);
176811878SVenu.Iyer@Sun.COM }
176911878SVenu.Iyer@Sun.COM mutex_exit(&pool_event_cb_lock);
177011878SVenu.Iyer@Sun.COM kmem_free(tqd, sizeof (*tqd));
177111878SVenu.Iyer@Sun.COM }
177211878SVenu.Iyer@Sun.COM
177311878SVenu.Iyer@Sun.COM void
pool_event_dispatch(pool_event_t what,poolid_t id)177411878SVenu.Iyer@Sun.COM pool_event_dispatch(pool_event_t what, poolid_t id)
177511878SVenu.Iyer@Sun.COM {
177611878SVenu.Iyer@Sun.COM pool_tqd_t *tqd = NULL;
177711878SVenu.Iyer@Sun.COM
177811878SVenu.Iyer@Sun.COM ASSERT(pool_lock_held());
177911878SVenu.Iyer@Sun.COM
178011878SVenu.Iyer@Sun.COM if (pool_event_cb_taskq == NULL) {
178111878SVenu.Iyer@Sun.COM pool_event_cb_taskq = taskq_create("pool_event_cb_taskq", 1,
178211878SVenu.Iyer@Sun.COM -1, 1, 1, TASKQ_PREPOPULATE);
178311878SVenu.Iyer@Sun.COM }
178411878SVenu.Iyer@Sun.COM
178511878SVenu.Iyer@Sun.COM tqd = kmem_alloc(sizeof (*tqd), KM_SLEEP);
178611878SVenu.Iyer@Sun.COM tqd->tqd_what = what;
178711878SVenu.Iyer@Sun.COM tqd->tqd_id = id;
178811878SVenu.Iyer@Sun.COM
178911878SVenu.Iyer@Sun.COM (void) taskq_dispatch(pool_event_cb_taskq, pool_event_notify, tqd,
179011878SVenu.Iyer@Sun.COM KM_SLEEP);
179111878SVenu.Iyer@Sun.COM }
1792