xref: /onnv-gate/usr/src/lib/libpool/common/pool_commit.c (revision 3247:e05001c14ea2)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52324Ssdussud  * Common Development and Distribution License (the "License").
62324Ssdussud  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
222324Ssdussud  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * These functions implement the process of commitment for a pool
300Sstevel@tonic-gate  * configuration. This process can be described as taking instructions
310Sstevel@tonic-gate  * from a static configuration file and using the information about
320Sstevel@tonic-gate  * the target system contained in the dynamic configuration to make
330Sstevel@tonic-gate  * decisions about how best to allocate resources to meet the
340Sstevel@tonic-gate  * constraints specified in the static configuration file.
350Sstevel@tonic-gate  *
360Sstevel@tonic-gate  * Mechanically, this process relies upon ordering the individual
370Sstevel@tonic-gate  * components of the file and stepping through the lists of components
380Sstevel@tonic-gate  * and taking actions depending on their type and which file they are
390Sstevel@tonic-gate  * part of.
400Sstevel@tonic-gate  *
410Sstevel@tonic-gate  * Configuration components can be broken down into different types
420Sstevel@tonic-gate  * which are then treated according to the following table:
430Sstevel@tonic-gate  *
440Sstevel@tonic-gate  * Element Type		Action
450Sstevel@tonic-gate  * system || pool ||
460Sstevel@tonic-gate  * res_comp || res_agg	If the element is a required element, then create or
470Sstevel@tonic-gate  *			update it (don't destroy required elements in the
480Sstevel@tonic-gate  *			static configuration) otherwise manipulate the
490Sstevel@tonic-gate  *			dynamic configuration to create, destroy or update
500Sstevel@tonic-gate  *			the element on the system.
510Sstevel@tonic-gate  * comp			Create, destroy or update the static configuration
520Sstevel@tonic-gate  *			component.
530Sstevel@tonic-gate  *
540Sstevel@tonic-gate  * The treatment of the different elements reflects the fact that all
550Sstevel@tonic-gate  * elements other than comp are configurable and thus libpool can
560Sstevel@tonic-gate  * create, destroy and modify these elements at will. comp elements
570Sstevel@tonic-gate  * reflect the disposition of the system, these elements can be moved
580Sstevel@tonic-gate  * around but they can't be created or destroyed in the dynamic
590Sstevel@tonic-gate  * configuration in the commit process. comp elements can be created
600Sstevel@tonic-gate  * and destroyed in the static configuration file as a result of a
610Sstevel@tonic-gate  * commit operation, since it's possible for a comp to not appear in
620Sstevel@tonic-gate  * the dynamic configuration. For instance, if the static
630Sstevel@tonic-gate  * configuration file was created on a different machine or after a DR
640Sstevel@tonic-gate  * operation which has removed or added components.
650Sstevel@tonic-gate  *
660Sstevel@tonic-gate  */
670Sstevel@tonic-gate #include <assert.h>
680Sstevel@tonic-gate #include <stdio.h>
690Sstevel@tonic-gate #include <stdlib.h>
700Sstevel@tonic-gate #include <sys/types.h>
710Sstevel@tonic-gate #include <errno.h>
720Sstevel@tonic-gate #include <string.h>
730Sstevel@tonic-gate #include <limits.h>
740Sstevel@tonic-gate #include <unistd.h>
750Sstevel@tonic-gate 
760Sstevel@tonic-gate #include <pool.h>
770Sstevel@tonic-gate #include "pool_internal.h"
780Sstevel@tonic-gate #include "pool_impl.h"
790Sstevel@tonic-gate 
800Sstevel@tonic-gate #define	MIN(x, y) ((x) < (y) ? (x) : (y))
810Sstevel@tonic-gate #define	MAX(x, y) ((x) > (y) ? (x) : (y))
820Sstevel@tonic-gate #define	POA_IMPORTANCE_NUM	0
830Sstevel@tonic-gate #define	POA_SURPLUS_TO_DEFAULT_NUM	1
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * This resource specific structure is used to determine allocation of resources
870Sstevel@tonic-gate  * during resource set allocation.  Each set will receive its min, plus
880Sstevel@tonic-gate  * some number of dealt resources based on the global allocation policy.
890Sstevel@tonic-gate  */
900Sstevel@tonic-gate typedef struct res_info {
910Sstevel@tonic-gate 	pool_resource_t	*ri_res;	/* Resource set */
920Sstevel@tonic-gate 	uint64_t	ri_min;		/* Resource set's low watermark */
930Sstevel@tonic-gate 	uint64_t	ri_max;		/* Resource set's high watermark */
940Sstevel@tonic-gate 	uint64_t	ri_oldsize;	/* Size of resource set at the start */
950Sstevel@tonic-gate 	uint64_t	ri_newsize;	/* New resource set size allocated */
960Sstevel@tonic-gate 	uint64_t	ri_pinned;	/* Count of pinned resources in set */
970Sstevel@tonic-gate 	uint64_t	ri_dealt;	/* Count of resources dealt to set */
980Sstevel@tonic-gate 	int64_t		ri_transfer;	/* oldsize - newsize */
990Sstevel@tonic-gate 					/* The signed quantity of resources */
1000Sstevel@tonic-gate 					/* to tranfer into or out of this */
1010Sstevel@tonic-gate 					/* resource set */
1020Sstevel@tonic-gate 					/* + transfer: tranfer resources out */
1030Sstevel@tonic-gate 					/* - transfer: tranfer resources in */
1040Sstevel@tonic-gate } res_info_t;
1050Sstevel@tonic-gate 
1060Sstevel@tonic-gate /*
1070Sstevel@tonic-gate  * diff_and_fix operations
1080Sstevel@tonic-gate  */
1090Sstevel@tonic-gate static int		commit_create(pool_conf_t *, pool_elem_t **);
1100Sstevel@tonic-gate static int		commit_delete(pool_elem_t *);
1110Sstevel@tonic-gate static int		commit_update(pool_elem_t *, pool_elem_t *, int);
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate /*
1140Sstevel@tonic-gate  * configuration commit processing
1150Sstevel@tonic-gate  */
1160Sstevel@tonic-gate static int		diff_and_fix(pool_conf_t *, pool_conf_t *);
1170Sstevel@tonic-gate static int		process_elem_lt(pool_elem_t *, pool_conf_t *);
1180Sstevel@tonic-gate static int		process_elem_gt(pool_elem_t *, pool_conf_t *,
1190Sstevel@tonic-gate     pool_conf_t *);
1200Sstevel@tonic-gate static int		process_lists(int, pool_conf_t *,
1210Sstevel@tonic-gate     pool_conf_t *, int);
1220Sstevel@tonic-gate static pool_elem_t	**get_elem_list(const pool_conf_t *, int, uint_t *);
1230Sstevel@tonic-gate static int		share_resources(pool_conf_t *);
1240Sstevel@tonic-gate static int		resource_allocate(const char *, pool_resource_t **,
1250Sstevel@tonic-gate     uint_t);
1260Sstevel@tonic-gate static int		resource_allocate_default(pool_resource_t **, uint_t);
1270Sstevel@tonic-gate static int		pset_allocate_imp(pool_resource_t **, uint_t);
1280Sstevel@tonic-gate static int		resource_compare_by_descending_importance(const void *,
1290Sstevel@tonic-gate     const void *);
1300Sstevel@tonic-gate static int		compute_size_to_transfer(const void *, const void *);
1310Sstevel@tonic-gate static int		set_importance_cb(pool_conf_t *, pool_t *, void *);
1320Sstevel@tonic-gate static int		unset_importance_cb(pool_conf_t *, pool_t *, void *);
1330Sstevel@tonic-gate static int		add_importance_props(pool_conf_t *);
1340Sstevel@tonic-gate static int		remove_importance_props(pool_conf_t *);
1350Sstevel@tonic-gate static int		clone_element(pool_conf_t *, pool_elem_t *,
1360Sstevel@tonic-gate     const char *, pool_value_t *, void *);
1370Sstevel@tonic-gate static int		clean_element(pool_conf_t *, pool_elem_t *,
1380Sstevel@tonic-gate     const char *, pool_value_t *, void *);
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate /*
1410Sstevel@tonic-gate  * commit_create() is used to create a configuration element upon the
1420Sstevel@tonic-gate  * system.  Since only pools and resource actually need to perform any
1430Sstevel@tonic-gate  * action, other elements are ignored as a no-op.
1440Sstevel@tonic-gate  */
1450Sstevel@tonic-gate static int
commit_create(pool_conf_t * conf,pool_elem_t ** e1)1460Sstevel@tonic-gate commit_create(pool_conf_t *conf, pool_elem_t **e1)
1470Sstevel@tonic-gate {
1480Sstevel@tonic-gate 	pool_resource_t *res;
1490Sstevel@tonic-gate 	pool_t *pool;
1500Sstevel@tonic-gate 	const char *res_type;
1510Sstevel@tonic-gate 	pool_elem_t *src = *e1;
1520Sstevel@tonic-gate 	uint64_t smin, smax, dmax;
1530Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
1540Sstevel@tonic-gate 	char *name;
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	switch (pool_elem_class(src)) {
1570Sstevel@tonic-gate 	case PEC_SYSTEM:	/* NO-OP */
1580Sstevel@tonic-gate 		break;
1590Sstevel@tonic-gate 	case PEC_POOL:
1600Sstevel@tonic-gate 		name = elem_get_name(src);
1610Sstevel@tonic-gate 		if ((pool = pool_create(conf, name)) == NULL) {
1620Sstevel@tonic-gate 			free(name);
1630Sstevel@tonic-gate 			return (PO_FAIL);
1640Sstevel@tonic-gate 		}
1650Sstevel@tonic-gate 		free(name);
1660Sstevel@tonic-gate 		/*
1670Sstevel@tonic-gate 		 * Now copy the properties from the original pool to the
1680Sstevel@tonic-gate 		 * new one
1690Sstevel@tonic-gate 		 */
1700Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(src), src, TO_ELEM(pool),
1710Sstevel@tonic-gate 		    clone_element) != PO_SUCCESS)
1720Sstevel@tonic-gate 			return (PO_FAIL);
1730Sstevel@tonic-gate 		/*
1740Sstevel@tonic-gate 		 * Add a pointer to the src element which can be
1750Sstevel@tonic-gate 		 * updated with a sys_id when the sys_id is allocated
1760Sstevel@tonic-gate 		 * to the created element.
1770Sstevel@tonic-gate 		 */
1780Sstevel@tonic-gate 		pool_set_pair(TO_ELEM(pool), src);
1790Sstevel@tonic-gate 		*e1 = TO_ELEM(pool);
1800Sstevel@tonic-gate 		break;
1810Sstevel@tonic-gate 	case PEC_RES_COMP:
1820Sstevel@tonic-gate 	case PEC_RES_AGG:
1830Sstevel@tonic-gate 		name = elem_get_name(src);
1840Sstevel@tonic-gate 		res_type = pool_elem_class_string(src);
1850Sstevel@tonic-gate 		if ((res = pool_resource_create(conf, res_type, name)) ==
1860Sstevel@tonic-gate 		    NULL) {
1870Sstevel@tonic-gate 			free(name);
1880Sstevel@tonic-gate 			return (PO_FAIL);
1890Sstevel@tonic-gate 		}
1900Sstevel@tonic-gate 		free(name);
1910Sstevel@tonic-gate 		/*
1920Sstevel@tonic-gate 		 * Need to do some ordering of property updates.
1930Sstevel@tonic-gate 		 * Compare the values of source min/max and
1940Sstevel@tonic-gate 		 * destination min/max. If smin < dmax then update the
1950Sstevel@tonic-gate 		 * smin first, else update the max first.
1960Sstevel@tonic-gate 		 */
1970Sstevel@tonic-gate 		if (resource_get_min(pool_elem_res(src), &smin) != PO_SUCCESS ||
1980Sstevel@tonic-gate 		    resource_get_max(pool_elem_res(src), &smax) != PO_SUCCESS ||
1990Sstevel@tonic-gate 		    resource_get_max(res, &dmax) != PO_SUCCESS)
2000Sstevel@tonic-gate 			return (PO_FAIL);
2010Sstevel@tonic-gate 		if (smin < dmax) {
2020Sstevel@tonic-gate 			pool_value_set_uint64(&val, smin);
2030Sstevel@tonic-gate 			if (pool_put_ns_property(TO_ELEM(res), c_min_prop,
2040Sstevel@tonic-gate 			    &val) != PO_SUCCESS)
2050Sstevel@tonic-gate 				return (PO_FAIL);
2060Sstevel@tonic-gate 		} else {
2070Sstevel@tonic-gate 			pool_value_set_uint64(&val, smax);
2080Sstevel@tonic-gate 			if (pool_put_ns_property(TO_ELEM(res), c_max_prop,
2090Sstevel@tonic-gate 			    &val) != PO_SUCCESS)
2100Sstevel@tonic-gate 				return (PO_FAIL);
2110Sstevel@tonic-gate 		}
2120Sstevel@tonic-gate 		/*
2130Sstevel@tonic-gate 		 * Now copy the properties from the original resource
2140Sstevel@tonic-gate 		 * to the new one
2150Sstevel@tonic-gate 		 */
2160Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(src), src, TO_ELEM(res),
2170Sstevel@tonic-gate 		    clone_element) != PO_SUCCESS)
2180Sstevel@tonic-gate 			return (PO_FAIL);
2190Sstevel@tonic-gate 		/*
2200Sstevel@tonic-gate 		 * Add a pointer to the src element which can be
2210Sstevel@tonic-gate 		 * updated with a sys_id when the sys_id is allocated
2220Sstevel@tonic-gate 		 * to the created element.
2230Sstevel@tonic-gate 		 */
2240Sstevel@tonic-gate 		pool_set_pair(TO_ELEM(res), src);
2250Sstevel@tonic-gate 		*e1 = TO_ELEM(res);
2260Sstevel@tonic-gate 		break;
2270Sstevel@tonic-gate 	case PEC_COMP:		/* NO-OP */
2280Sstevel@tonic-gate 		break;
2290Sstevel@tonic-gate 	default:
2300Sstevel@tonic-gate 		return (PO_FAIL);
2310Sstevel@tonic-gate 	}
2320Sstevel@tonic-gate 	return (PO_SUCCESS);
2330Sstevel@tonic-gate }
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate /*
2370Sstevel@tonic-gate  * commit_delete() is used to delete a configuration element upon the
2380Sstevel@tonic-gate  * system.  Since only pools and resources actually need to perform
2390Sstevel@tonic-gate  * any action, other elements are ignored as a no-op.
2400Sstevel@tonic-gate  */
2410Sstevel@tonic-gate static int
commit_delete(pool_elem_t * pe)2420Sstevel@tonic-gate commit_delete(pool_elem_t *pe)
2430Sstevel@tonic-gate {
2440Sstevel@tonic-gate 	pool_resource_t *res;
2450Sstevel@tonic-gate 	pool_t *pool;
2460Sstevel@tonic-gate 	int ret = 0;
2470Sstevel@tonic-gate 
248*3247Sgjelinek 	if (elem_is_tmp(pe))
249*3247Sgjelinek 		return (PO_SUCCESS);
250*3247Sgjelinek 
2510Sstevel@tonic-gate 	switch (pool_elem_class(pe)) {
2520Sstevel@tonic-gate 	case PEC_SYSTEM:	/* NO-OP */
2530Sstevel@tonic-gate 		break;
2540Sstevel@tonic-gate 	case PEC_POOL:
2550Sstevel@tonic-gate 		pool = pool_elem_pool(pe);
2560Sstevel@tonic-gate 		ret = pool_destroy(TO_CONF(pe), pool);
2570Sstevel@tonic-gate 		break;
2580Sstevel@tonic-gate 	case PEC_RES_COMP:
2590Sstevel@tonic-gate 	case PEC_RES_AGG:
2600Sstevel@tonic-gate 		res = pool_elem_res(pe);
2610Sstevel@tonic-gate 		ret = pool_resource_destroy(TO_CONF(pe), res);
2620Sstevel@tonic-gate 		break;
2630Sstevel@tonic-gate 	case PEC_COMP:		/* NO-OP */
2640Sstevel@tonic-gate 		break;
2650Sstevel@tonic-gate 	default:
2660Sstevel@tonic-gate 		return (PO_FAIL);
2670Sstevel@tonic-gate 	}
2680Sstevel@tonic-gate 	return (ret);
2690Sstevel@tonic-gate }
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate /*
2720Sstevel@tonic-gate  * commit_update() is used to update a configuration element upon the
2730Sstevel@tonic-gate  * system or in a static configuration file. The pass parameter
2740Sstevel@tonic-gate  * governs whether properties are being updated or associations.  In
2750Sstevel@tonic-gate  * pass 0, properties are updated. If the element is of class
2760Sstevel@tonic-gate  * PEC_COMP, then make sure that the element in the static
2770Sstevel@tonic-gate  * configuration file is correctly located before proceeding with the
2780Sstevel@tonic-gate  * update. Then, the element in the dynamic configuration file is
2790Sstevel@tonic-gate  * updated. In pass 1, ie. pass != 0, any pool components have their
2800Sstevel@tonic-gate  * associations updated in the dynamic configuration.
2810Sstevel@tonic-gate  */
2820Sstevel@tonic-gate static int
commit_update(pool_elem_t * e1,pool_elem_t * e2,int pass)2830Sstevel@tonic-gate commit_update(pool_elem_t *e1, pool_elem_t *e2, int pass)
2840Sstevel@tonic-gate {
2850Sstevel@tonic-gate 	if (pass == 0) {
2860Sstevel@tonic-gate 		pool_resource_t *res1;
2870Sstevel@tonic-gate 		pool_resource_t *res2;
2880Sstevel@tonic-gate 		if (pool_elem_class(e1) == PEC_COMP) {
2890Sstevel@tonic-gate 			res1 = pool_get_owning_resource(TO_CONF(e1),
2900Sstevel@tonic-gate 			    pool_elem_comp(e1));
2910Sstevel@tonic-gate 			res2 = pool_get_owning_resource(TO_CONF(e2),
2920Sstevel@tonic-gate 			    pool_elem_comp(e2));
2930Sstevel@tonic-gate 			if (pool_elem_compare_name(TO_ELEM(res1),
2940Sstevel@tonic-gate 			    TO_ELEM(res2)) != 0) {
2950Sstevel@tonic-gate 				char *name;
2960Sstevel@tonic-gate 				const pool_resource_t *newres;
2970Sstevel@tonic-gate 				pool_component_t *comps[2] = { NULL };
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 				comps[0] = pool_elem_comp(e2);
3002324Ssdussud 				name = elem_get_name(TO_ELEM(res1));
3010Sstevel@tonic-gate 				newres = pool_get_resource(TO_CONF(e2),
3020Sstevel@tonic-gate 				    pool_elem_class_string(TO_ELEM(res1)),
3030Sstevel@tonic-gate 				    name);
3040Sstevel@tonic-gate 				free(name);
3050Sstevel@tonic-gate 				assert(newres);
3060Sstevel@tonic-gate #ifdef DEBUG
3070Sstevel@tonic-gate 				dprintf("transferring: res, comp\n");
3080Sstevel@tonic-gate 				pool_elem_dprintf(TO_ELEM(newres));
3090Sstevel@tonic-gate 				pool_elem_dprintf(e2);
3100Sstevel@tonic-gate #endif	/* DEBUG */
3110Sstevel@tonic-gate 				(void) pool_resource_xtransfer(TO_CONF(e2),
3120Sstevel@tonic-gate 				    res2, (pool_resource_t *)newres, comps);
3130Sstevel@tonic-gate 			}
3140Sstevel@tonic-gate 		}
3150Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(e2), e2, NULL,
3160Sstevel@tonic-gate 		    clean_element) != PO_SUCCESS) {
3170Sstevel@tonic-gate 			return (PO_FAIL);
3180Sstevel@tonic-gate 		}
3190Sstevel@tonic-gate 		/*
3200Sstevel@tonic-gate 		 * Need to do some ordering of property updates if the
3210Sstevel@tonic-gate 		 * element to be updated is a resource.  Compare the
3220Sstevel@tonic-gate 		 * values of source min/max and destination
3230Sstevel@tonic-gate 		 * min/max. If smin < dmax then update the smin first,
3240Sstevel@tonic-gate 		 * else update the max first.
3250Sstevel@tonic-gate 		 */
3260Sstevel@tonic-gate 		if (pool_elem_class(e1) == PEC_RES_COMP ||
3270Sstevel@tonic-gate 		    pool_elem_class(e1) == PEC_RES_AGG) {
3280Sstevel@tonic-gate 			uint64_t smin, smax, dmax;
3290Sstevel@tonic-gate 			pool_value_t val = POOL_VALUE_INITIALIZER;
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 			if (resource_get_min(pool_elem_res(e1), &smin) !=
3320Sstevel@tonic-gate 			    PO_SUCCESS ||
3330Sstevel@tonic-gate 			    resource_get_max(pool_elem_res(e1), &smax) !=
3340Sstevel@tonic-gate 			    PO_SUCCESS ||
3350Sstevel@tonic-gate 			    resource_get_max(pool_elem_res(e2), &dmax) !=
3360Sstevel@tonic-gate 			    PO_SUCCESS)
3370Sstevel@tonic-gate 				return (PO_FAIL);
3380Sstevel@tonic-gate 			if (smin < dmax) {
3390Sstevel@tonic-gate 				pool_value_set_uint64(&val, smin);
3400Sstevel@tonic-gate 				if (pool_put_ns_property(e2, c_min_prop,
3410Sstevel@tonic-gate 				    &val) != PO_SUCCESS)
3420Sstevel@tonic-gate 					return (PO_FAIL);
3430Sstevel@tonic-gate 			} else {
3440Sstevel@tonic-gate 				pool_value_set_uint64(&val, smax);
3450Sstevel@tonic-gate 				if (pool_put_ns_property(e2, c_max_prop,
3460Sstevel@tonic-gate 				    &val) != PO_SUCCESS)
3470Sstevel@tonic-gate 					return (PO_FAIL);
3480Sstevel@tonic-gate 			}
3490Sstevel@tonic-gate 		}
3500Sstevel@tonic-gate 		/*
3510Sstevel@tonic-gate 		 * This next couple of steps needs some
3520Sstevel@tonic-gate 		 * explanation. The first walk, copies all the
3530Sstevel@tonic-gate 		 * properties that are writeable from the static
3540Sstevel@tonic-gate 		 * configuration to the dynamic configuration. The
3550Sstevel@tonic-gate 		 * second walk copies all properties (writeable or
3560Sstevel@tonic-gate 		 * not) from the dynamic configuration element back to
3570Sstevel@tonic-gate 		 * the static configuration element. This ensures that
3580Sstevel@tonic-gate 		 * updates from the static configuration element are
3590Sstevel@tonic-gate 		 * correctly applied to the dynamic configuration and
3600Sstevel@tonic-gate 		 * then the static configuration element is updated
3610Sstevel@tonic-gate 		 * with the latest values of the read-only xproperties
3620Sstevel@tonic-gate 		 * from the dynamic configuration element. The
3630Sstevel@tonic-gate 		 * enforcing of permisssions is performed in
3640Sstevel@tonic-gate 		 * clone_element by its choice of property
3650Sstevel@tonic-gate 		 * manipulation function.
3660Sstevel@tonic-gate 		 */
3670Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(e1), e1, e2, clone_element) !=
3680Sstevel@tonic-gate 		    PO_SUCCESS) {
3690Sstevel@tonic-gate 			return (PO_FAIL);
3700Sstevel@tonic-gate 		}
3710Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(e2), e2, e1, clone_element) !=
3720Sstevel@tonic-gate 		    PO_SUCCESS) {
3730Sstevel@tonic-gate 			return (PO_FAIL);
3740Sstevel@tonic-gate 		}
3750Sstevel@tonic-gate 	} else {
3760Sstevel@tonic-gate 		if (pool_elem_class(e1) == PEC_POOL) {
3770Sstevel@tonic-gate 			pool_resource_t **rs;
3780Sstevel@tonic-gate 			uint_t nelem;
3790Sstevel@tonic-gate 			int i;
3800Sstevel@tonic-gate 			pool_value_t val = POOL_VALUE_INITIALIZER;
3810Sstevel@tonic-gate 			pool_value_t *pvals[] = { NULL, NULL };
3820Sstevel@tonic-gate 
3830Sstevel@tonic-gate 			pvals[0] = &val;
3840Sstevel@tonic-gate 			if (pool_value_set_string(&val, "pset") != PO_SUCCESS ||
3850Sstevel@tonic-gate 			    pool_value_set_name(&val, c_type) != PO_SUCCESS)
3860Sstevel@tonic-gate 				return (PO_FAIL);
3870Sstevel@tonic-gate 			if ((rs = pool_query_pool_resources(TO_CONF(e1),
3880Sstevel@tonic-gate 			    pool_elem_pool(e1), &nelem, pvals)) != NULL) {
3890Sstevel@tonic-gate 				for (i = 0; i < nelem; i++) {
3900Sstevel@tonic-gate 					const pool_resource_t *tgt_res;
3910Sstevel@tonic-gate 					char *res_name =
3920Sstevel@tonic-gate 					    elem_get_name(TO_ELEM(rs[i]));
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate 					if ((tgt_res = pool_get_resource(
3950Sstevel@tonic-gate 					    TO_CONF(e2), pool_elem_class_string(
3960Sstevel@tonic-gate 					    TO_ELEM(rs[i])), res_name)) ==
3970Sstevel@tonic-gate 					    NULL) {
3980Sstevel@tonic-gate 						tgt_res = get_default_resource(
3990Sstevel@tonic-gate 						    rs[i]);
4000Sstevel@tonic-gate 					}
4010Sstevel@tonic-gate 					free(res_name);
4020Sstevel@tonic-gate 					if (pool_associate(TO_CONF(e2),
4030Sstevel@tonic-gate 					    pool_elem_pool(e2), tgt_res) !=
4040Sstevel@tonic-gate 					    PO_SUCCESS) {
4050Sstevel@tonic-gate 						free(rs);
4060Sstevel@tonic-gate 						return (PO_FAIL);
4070Sstevel@tonic-gate 					}
4080Sstevel@tonic-gate 				}
4090Sstevel@tonic-gate 				free(rs);
4100Sstevel@tonic-gate 			}
4110Sstevel@tonic-gate 		}
4120Sstevel@tonic-gate 	}
4130Sstevel@tonic-gate 	return (PO_SUCCESS);
4140Sstevel@tonic-gate }
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate /*
4170Sstevel@tonic-gate  * diff_and_fix() works out the differences between two configurations
4180Sstevel@tonic-gate  * and modifies the state of the system to match the operations
4190Sstevel@tonic-gate  * required to bring the two configurations into sync.
4200Sstevel@tonic-gate  *
4210Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL.
4220Sstevel@tonic-gate  */
4230Sstevel@tonic-gate static int
diff_and_fix(pool_conf_t * stc,pool_conf_t * dyn)4240Sstevel@tonic-gate diff_and_fix(pool_conf_t *stc, pool_conf_t *dyn)
4250Sstevel@tonic-gate {
4260Sstevel@tonic-gate 	/*
4270Sstevel@tonic-gate 	 * The ordering of the operations is significant, we must
4280Sstevel@tonic-gate 	 * process the system element, then the pools elements, then
4290Sstevel@tonic-gate 	 * the resource elements, then the pools elements again and
4300Sstevel@tonic-gate 	 * finally the resource components.
4310Sstevel@tonic-gate 	 *
4320Sstevel@tonic-gate 	 * TODO
4330Sstevel@tonic-gate 	 * PEC_RES_COMP are the only type of resources
4340Sstevel@tonic-gate 	 * currently. When PEC_RES_AGG resources are added they must
4350Sstevel@tonic-gate 	 * also be processed.
4360Sstevel@tonic-gate 	 */
4370Sstevel@tonic-gate 	if (process_lists(PEC_SYSTEM, stc, dyn, 0) != PO_SUCCESS) {
4380Sstevel@tonic-gate 		return (PO_FAIL);
4390Sstevel@tonic-gate 	}
4400Sstevel@tonic-gate 	if (process_lists(PEC_POOL, stc, dyn, 0) != PO_SUCCESS) {
4410Sstevel@tonic-gate 		return (PO_FAIL);
4420Sstevel@tonic-gate 	}
4430Sstevel@tonic-gate 	if (process_lists(PEC_RES_COMP, stc, dyn, 0) != PO_SUCCESS) {
4440Sstevel@tonic-gate 		return (PO_FAIL);
4450Sstevel@tonic-gate 	}
4460Sstevel@tonic-gate 	if (process_lists(PEC_COMP, stc, dyn, 0) != PO_SUCCESS) {
4470Sstevel@tonic-gate 		return (PO_FAIL);
4480Sstevel@tonic-gate 	}
4490Sstevel@tonic-gate 	if (process_lists(PEC_POOL, stc, dyn, 1) != PO_SUCCESS) {
4500Sstevel@tonic-gate 		return (PO_FAIL);
4510Sstevel@tonic-gate 	}
4520Sstevel@tonic-gate 	/*
4530Sstevel@tonic-gate 	 * Share the resources. It has to be called for both
4540Sstevel@tonic-gate 	 * configurations to ensure that the configurations still look
4550Sstevel@tonic-gate 	 * the same.
4560Sstevel@tonic-gate 	 */
4570Sstevel@tonic-gate 	if (share_resources(dyn) != PO_SUCCESS) {
4580Sstevel@tonic-gate 		return (PO_FAIL);
4590Sstevel@tonic-gate 	}
4600Sstevel@tonic-gate 	if (share_resources(stc) != PO_SUCCESS) {
4610Sstevel@tonic-gate 		return (PO_FAIL);
4620Sstevel@tonic-gate 	}
4630Sstevel@tonic-gate 	return (PO_SUCCESS);
4640Sstevel@tonic-gate }
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate static int
process_elem_lt(pool_elem_t * pe,pool_conf_t * dyn)4670Sstevel@tonic-gate process_elem_lt(pool_elem_t *pe, pool_conf_t *dyn)
4680Sstevel@tonic-gate {
4690Sstevel@tonic-gate 	if (pool_elem_class(pe) == PEC_COMP) {
4700Sstevel@tonic-gate 		if (pool_component_destroy(pool_elem_comp(pe)) == PO_FAIL) {
4710Sstevel@tonic-gate 			return (PO_FAIL);
4720Sstevel@tonic-gate 		}
4730Sstevel@tonic-gate 	} else if (! elem_is_default(pe)) {
4740Sstevel@tonic-gate 		if (commit_create(dyn, &pe) != PO_SUCCESS) {
4750Sstevel@tonic-gate 			return (PO_FAIL);
4760Sstevel@tonic-gate 		}
4770Sstevel@tonic-gate 	}
4780Sstevel@tonic-gate 	return (PO_SUCCESS);
4790Sstevel@tonic-gate }
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate static int
process_elem_gt(pool_elem_t * pe,pool_conf_t * stc,pool_conf_t * dyn)4820Sstevel@tonic-gate process_elem_gt(pool_elem_t *pe, pool_conf_t *stc, pool_conf_t *dyn)
4830Sstevel@tonic-gate {
4840Sstevel@tonic-gate 	if (pool_elem_class(pe) == PEC_COMP) {
4850Sstevel@tonic-gate 		pool_resource_t *owner;
4860Sstevel@tonic-gate 		const pool_resource_t *parent_res;
4870Sstevel@tonic-gate 		pool_value_t val = POOL_VALUE_INITIALIZER;
4880Sstevel@tonic-gate 		const pool_component_t *newcomp;
4890Sstevel@tonic-gate 		const char *resname;
4900Sstevel@tonic-gate 		const char *restype;
4910Sstevel@tonic-gate 		/*
4920Sstevel@tonic-gate 		 * I have to find the right parent in the static
4930Sstevel@tonic-gate 		 * configuration. It may not exist, in which case it's
4940Sstevel@tonic-gate 		 * correct to put it in the default
4950Sstevel@tonic-gate 		 */
4960Sstevel@tonic-gate 		owner = pool_get_owning_resource(dyn,
4970Sstevel@tonic-gate 		    pool_elem_comp(pe));
4980Sstevel@tonic-gate 		if (pool_get_ns_property(TO_ELEM(owner), "name", &val) ==
4990Sstevel@tonic-gate 		    POC_INVAL)
5000Sstevel@tonic-gate 			return (PO_FAIL);
5010Sstevel@tonic-gate 
5020Sstevel@tonic-gate 		if (pool_value_get_string(&val, &resname) == PO_FAIL)
5030Sstevel@tonic-gate 			return (PO_FAIL);
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 		if ((resname = strdup(resname)) == NULL)
5060Sstevel@tonic-gate 			return (PO_FAIL);
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 		restype = pool_elem_class_string(TO_ELEM(owner));
5090Sstevel@tonic-gate 		parent_res = pool_get_resource(stc, restype, resname);
5100Sstevel@tonic-gate 		free((void *)resname);
5110Sstevel@tonic-gate 		if (parent_res == NULL)
5120Sstevel@tonic-gate 			parent_res = resource_by_sysid(stc, PS_NONE, restype);
5130Sstevel@tonic-gate 		/*
5140Sstevel@tonic-gate 		 * Now need to make a copy of the component in the
5150Sstevel@tonic-gate 		 * dynamic configuration in the static configuration.
5160Sstevel@tonic-gate 		 */
5170Sstevel@tonic-gate 		if ((newcomp = pool_component_create(stc, parent_res,
5180Sstevel@tonic-gate 		    elem_get_sysid(pe))) == NULL)
5190Sstevel@tonic-gate 			return (PO_FAIL);
5200Sstevel@tonic-gate 
5210Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(pe), pe, TO_ELEM(newcomp),
5220Sstevel@tonic-gate 		    clone_element) != PO_SUCCESS)
5230Sstevel@tonic-gate 			return (PO_FAIL);
5240Sstevel@tonic-gate 	} else if (elem_is_default(pe)) {
5250Sstevel@tonic-gate 		pool_resource_t *newres;
5260Sstevel@tonic-gate 		pool_t *newpool;
5270Sstevel@tonic-gate 		char *name;
5280Sstevel@tonic-gate 
5290Sstevel@tonic-gate 		if ((name = elem_get_name(pe)) == NULL)
5300Sstevel@tonic-gate 			return (PO_FAIL);
5310Sstevel@tonic-gate 		switch (pool_elem_class(pe)) {
5320Sstevel@tonic-gate 		case PEC_POOL:
5330Sstevel@tonic-gate 			if ((newpool = pool_create(stc, name)) == NULL) {
5340Sstevel@tonic-gate 				free(name);
5350Sstevel@tonic-gate 				return (PO_FAIL);
5360Sstevel@tonic-gate 			}
5370Sstevel@tonic-gate 			free(name);
5380Sstevel@tonic-gate 			if (pool_walk_properties(TO_CONF(pe), pe,
5390Sstevel@tonic-gate 			    TO_ELEM(newpool), clone_element) != PO_SUCCESS)
5400Sstevel@tonic-gate 				return (PO_FAIL);
5410Sstevel@tonic-gate 			break;
5420Sstevel@tonic-gate 		case PEC_RES_AGG:
5430Sstevel@tonic-gate 		case PEC_RES_COMP:
5440Sstevel@tonic-gate 			if ((newres = pool_resource_create(stc,
5450Sstevel@tonic-gate 			    pool_elem_class_string(pe), name)) ==
5460Sstevel@tonic-gate 			    NULL) {
5470Sstevel@tonic-gate 				free(name);
5480Sstevel@tonic-gate 				return (PO_FAIL);
5490Sstevel@tonic-gate 			}
5500Sstevel@tonic-gate 			free(name);
5510Sstevel@tonic-gate 			if (pool_walk_properties(TO_CONF(pe), pe,
5520Sstevel@tonic-gate 			    TO_ELEM(newres), clone_element) != PO_SUCCESS)
5530Sstevel@tonic-gate 				return (PO_FAIL);
5540Sstevel@tonic-gate 			break;
5550Sstevel@tonic-gate 		default:
5560Sstevel@tonic-gate 			free(name);
5570Sstevel@tonic-gate 			break;
5580Sstevel@tonic-gate 		}
5590Sstevel@tonic-gate 	} else {
5600Sstevel@tonic-gate 		if (commit_delete(pe) != PO_SUCCESS)
5610Sstevel@tonic-gate 			return (PO_FAIL);
5620Sstevel@tonic-gate 	}
5630Sstevel@tonic-gate 	return (PO_SUCCESS);
5640Sstevel@tonic-gate }
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate /*
5670Sstevel@tonic-gate  * This function compares the elements of the supplied type in the
5680Sstevel@tonic-gate  * static and dynamic configurations supplied. The lists of elements
5690Sstevel@tonic-gate  * are compared and used to create, delete and updated elements in
5700Sstevel@tonic-gate  * both the static and dynamic configurations. The pass parameter is
5710Sstevel@tonic-gate  * used to indicate to commit_update() whether property updates or
5720Sstevel@tonic-gate  * association updates should be performed.
5730Sstevel@tonic-gate  */
5740Sstevel@tonic-gate static int
process_lists(int type,pool_conf_t * stc,pool_conf_t * dyn,int pass)5750Sstevel@tonic-gate process_lists(int type, pool_conf_t *stc, pool_conf_t *dyn, int pass)
5760Sstevel@tonic-gate {
5770Sstevel@tonic-gate 	uint_t stc_nelem = 0, dyn_nelem = 0;
5780Sstevel@tonic-gate 	pool_elem_t **stc_elems, **dyn_elems;
5790Sstevel@tonic-gate 	int i, j;
5800Sstevel@tonic-gate 	int status = PO_SUCCESS;
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate 	if ((stc_elems = get_elem_list(stc, type, &stc_nelem)) == NULL)
5830Sstevel@tonic-gate 		return (PO_FAIL);
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 	qsort(stc_elems, stc_nelem, sizeof (pool_elem_t *),
5860Sstevel@tonic-gate 	    qsort_elem_compare);
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate 	if ((dyn_elems = get_elem_list(dyn, type, &dyn_nelem)) == NULL) {
5890Sstevel@tonic-gate 		free(stc_elems);
5900Sstevel@tonic-gate 		return (PO_FAIL);
5910Sstevel@tonic-gate 	}
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate 	qsort(dyn_elems, dyn_nelem, sizeof (pool_elem_t *),
5940Sstevel@tonic-gate 	    qsort_elem_compare);
5950Sstevel@tonic-gate 	/*
5960Sstevel@tonic-gate 	 * Step through and do the updating, remember that we are
5970Sstevel@tonic-gate 	 * comparing using the compare function for the configuration
5980Sstevel@tonic-gate 	 * and that is fixed.
5990Sstevel@tonic-gate 	 */
6000Sstevel@tonic-gate 	i = j = 0;
6010Sstevel@tonic-gate 	while (status == PO_SUCCESS && i < stc_nelem && j < dyn_nelem) {
6020Sstevel@tonic-gate 		int compare;
6030Sstevel@tonic-gate 		/*
6040Sstevel@tonic-gate 		 * We are going to do this by stepping through the static
6050Sstevel@tonic-gate 		 * list first.
6060Sstevel@tonic-gate 		 */
6070Sstevel@tonic-gate 		if (elem_is_default(stc_elems[i]) &&
6080Sstevel@tonic-gate 		    elem_is_default(dyn_elems[j]))
6090Sstevel@tonic-gate 			compare = 0;
6100Sstevel@tonic-gate 		else
6110Sstevel@tonic-gate 			compare = pool_elem_compare_name(stc_elems[i],
6120Sstevel@tonic-gate 			    dyn_elems[j]);
6130Sstevel@tonic-gate 		if (compare < 0) {
6140Sstevel@tonic-gate 			status = process_elem_lt(stc_elems[i], dyn);
6150Sstevel@tonic-gate 			i++;
6160Sstevel@tonic-gate 		} else if (compare > 0) {
6170Sstevel@tonic-gate 			status = process_elem_gt(dyn_elems[j], stc, dyn);
6180Sstevel@tonic-gate 			j++;
6190Sstevel@tonic-gate 		} else {	/* compare == 0 */
6200Sstevel@tonic-gate 			if (commit_update(stc_elems[i], dyn_elems[j], pass)
6210Sstevel@tonic-gate 			    != PO_SUCCESS) {
6220Sstevel@tonic-gate 				status = PO_FAIL;
6230Sstevel@tonic-gate 			}
6240Sstevel@tonic-gate 			i++;
6250Sstevel@tonic-gate 			j++;
6260Sstevel@tonic-gate 		}
6270Sstevel@tonic-gate 	}
6280Sstevel@tonic-gate 	if (status == PO_FAIL) {
6290Sstevel@tonic-gate 		free(stc_elems);
6300Sstevel@tonic-gate 		free(dyn_elems);
6310Sstevel@tonic-gate 		return (PO_FAIL);
6320Sstevel@tonic-gate 	}
6330Sstevel@tonic-gate 	while (status == PO_SUCCESS && i < stc_nelem) {
6340Sstevel@tonic-gate 		status = process_elem_lt(stc_elems[i], dyn);
6350Sstevel@tonic-gate 		i++;
6360Sstevel@tonic-gate 	}
6370Sstevel@tonic-gate 	if (status == PO_FAIL) {
6380Sstevel@tonic-gate 		free(stc_elems);
6390Sstevel@tonic-gate 		free(dyn_elems);
6400Sstevel@tonic-gate 		return (PO_FAIL);
6410Sstevel@tonic-gate 	}
6420Sstevel@tonic-gate 	while (status == PO_SUCCESS && j < dyn_nelem) {
6430Sstevel@tonic-gate 		status = process_elem_gt(dyn_elems[j], stc, dyn);
6440Sstevel@tonic-gate 		j++;
6450Sstevel@tonic-gate 	}
6460Sstevel@tonic-gate 	free(stc_elems);
6470Sstevel@tonic-gate 	free(dyn_elems);
6480Sstevel@tonic-gate 	return (status);
6490Sstevel@tonic-gate }
6500Sstevel@tonic-gate 
6510Sstevel@tonic-gate /*
6520Sstevel@tonic-gate  * get_elem_list() returns a list of pool_elem_t's. The size of the
6530Sstevel@tonic-gate  * list is written into nelem. The list contains elements of all types
6540Sstevel@tonic-gate  * that pools is interested in: i.e. system, pool, resources and
6550Sstevel@tonic-gate  * resource components. It is the caller's responsibility to free the
6560Sstevel@tonic-gate  * list when it is finished with.
6570Sstevel@tonic-gate  *
6580Sstevel@tonic-gate  * The array of pointers returned by the type specific query can be
6590Sstevel@tonic-gate  * safely cast to be an array of pool_elem_t pointers. In the case of
6600Sstevel@tonic-gate  * PEC_RES_COMP some additional processing is required to qualify the
6610Sstevel@tonic-gate  * list of elements.
6620Sstevel@tonic-gate  *
6630Sstevel@tonic-gate  * Returns a pointer to a list of pool_elem_t's or NULL on failure.
6640Sstevel@tonic-gate  */
6650Sstevel@tonic-gate static pool_elem_t **
get_elem_list(const pool_conf_t * conf,int type,uint_t * nelem)6660Sstevel@tonic-gate get_elem_list(const pool_conf_t *conf, int type, uint_t *nelem)
6670Sstevel@tonic-gate {
6680Sstevel@tonic-gate 	pool_resource_t **rl;
6690Sstevel@tonic-gate 	pool_t **pl;
6700Sstevel@tonic-gate 	pool_component_t **cl;
6710Sstevel@tonic-gate 	pool_elem_t **elems = NULL;
6720Sstevel@tonic-gate 	int i;
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate 	switch (type) {
6750Sstevel@tonic-gate 	case PEC_SYSTEM:
6760Sstevel@tonic-gate 		if ((elems = malloc(sizeof (pool_elem_t *))) == NULL)
6770Sstevel@tonic-gate 			return (NULL);
6780Sstevel@tonic-gate 		*nelem = 1;
6790Sstevel@tonic-gate 		elems[0] = pool_conf_to_elem(conf);
6800Sstevel@tonic-gate 		break;
6810Sstevel@tonic-gate 	case PEC_POOL:
6820Sstevel@tonic-gate 		if ((pl = pool_query_pools(conf, nelem, NULL)) != NULL) {
6830Sstevel@tonic-gate 			elems = (pool_elem_t **)pl;
6840Sstevel@tonic-gate 		}
6850Sstevel@tonic-gate 		break;
6860Sstevel@tonic-gate 	case PEC_RES_COMP:
6870Sstevel@tonic-gate 		if ((rl = pool_query_resources(conf, nelem, NULL)) != NULL) {
6880Sstevel@tonic-gate 			int j = 0;
6890Sstevel@tonic-gate 			elems = (pool_elem_t **)rl;
6900Sstevel@tonic-gate 			for (i = 0; i < *nelem; i++) {
6910Sstevel@tonic-gate 				if (pool_elem_class(TO_ELEM(rl[i])) ==
6920Sstevel@tonic-gate 				    PEC_RES_COMP)
6930Sstevel@tonic-gate 					elems[j++] = TO_ELEM(rl[i]);
6940Sstevel@tonic-gate 			}
6950Sstevel@tonic-gate 			*nelem = j;
6960Sstevel@tonic-gate 		}
6970Sstevel@tonic-gate 		break;
6980Sstevel@tonic-gate 	case PEC_COMP:
6990Sstevel@tonic-gate 		if ((cl = pool_query_components(conf, nelem, NULL)) != NULL) {
7000Sstevel@tonic-gate 			elems = (pool_elem_t **)cl;
7010Sstevel@tonic-gate 		}
7020Sstevel@tonic-gate 		break;
7030Sstevel@tonic-gate 	default:
7040Sstevel@tonic-gate 		abort();
7050Sstevel@tonic-gate 		break;
7060Sstevel@tonic-gate 	}
7070Sstevel@tonic-gate 	return (elems);
7080Sstevel@tonic-gate }
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate /*
7110Sstevel@tonic-gate  * share_resources() sets up the allocation of resources by each
7120Sstevel@tonic-gate  * provider.  Firstly all resources are updated with the importance of
7130Sstevel@tonic-gate  * each pool, then each resource provider is invoked in turn with a
7140Sstevel@tonic-gate  * list of it's own resources.  Finally, the pool importance details
7150Sstevel@tonic-gate  * are removed from the resources.
7160Sstevel@tonic-gate  *
7170Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
7180Sstevel@tonic-gate  */
7190Sstevel@tonic-gate static int
share_resources(pool_conf_t * conf)7200Sstevel@tonic-gate share_resources(pool_conf_t *conf)
7210Sstevel@tonic-gate {
7220Sstevel@tonic-gate 	pool_resource_t **resources;
7230Sstevel@tonic-gate 	uint_t nelem;
7240Sstevel@tonic-gate 	pool_value_t *props[] = { NULL, NULL };
7250Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
7260Sstevel@tonic-gate 
7270Sstevel@tonic-gate 	props[0] = &val;
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate 	/*
7300Sstevel@tonic-gate 	 * Call an allocation function for each type of supported resource.
7310Sstevel@tonic-gate 	 * This function is responsible for "sharing" resources to resource
7320Sstevel@tonic-gate 	 * sets as determined by the system.allocate-method.
7330Sstevel@tonic-gate 	 */
7340Sstevel@tonic-gate 
7350Sstevel@tonic-gate 	if (pool_value_set_string(props[0], "pset") != PO_SUCCESS ||
7360Sstevel@tonic-gate 	    pool_value_set_name(props[0], c_type) != PO_SUCCESS)
7370Sstevel@tonic-gate 		return (PO_FAIL);
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate 	if (add_importance_props(conf) != PO_SUCCESS) {
7400Sstevel@tonic-gate 		(void) remove_importance_props(conf);
7410Sstevel@tonic-gate 		return (PO_FAIL);
7420Sstevel@tonic-gate 	}
7430Sstevel@tonic-gate 
7440Sstevel@tonic-gate 	if ((resources = pool_query_resources(conf, &nelem, props)) != NULL) {
7450Sstevel@tonic-gate 		/*
7460Sstevel@tonic-gate 		 * 'pool.importance' defines the importance of a pool;
7470Sstevel@tonic-gate 		 * resources inherit the importance of the pool that
7480Sstevel@tonic-gate 		 * is associated with them. If more than one pool is
7490Sstevel@tonic-gate 		 * associated with a resource, the importance of the
7500Sstevel@tonic-gate 		 * resource is the maximum importance of all
7510Sstevel@tonic-gate 		 * associated pools.  Use '_importance' on resources
7520Sstevel@tonic-gate 		 * to determine who gets extra.
7530Sstevel@tonic-gate 		 */
7540Sstevel@tonic-gate 		if (resource_allocate("pset", resources, nelem) != PO_SUCCESS) {
7550Sstevel@tonic-gate 			free(resources);
7560Sstevel@tonic-gate 			(void) remove_importance_props(conf);
7570Sstevel@tonic-gate 			return (PO_FAIL);
7580Sstevel@tonic-gate 		}
7590Sstevel@tonic-gate 	}
7600Sstevel@tonic-gate 	free(resources);
7610Sstevel@tonic-gate 	(void) remove_importance_props(conf);
7620Sstevel@tonic-gate 	return (PO_SUCCESS);
7630Sstevel@tonic-gate }
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate /*
7670Sstevel@tonic-gate  * Work out which allocation method to use based on the value of the
7680Sstevel@tonic-gate  * system.allocate-method property.
7690Sstevel@tonic-gate  */
7700Sstevel@tonic-gate int
resource_allocate(const char * type,pool_resource_t ** res,uint_t nelem)7710Sstevel@tonic-gate resource_allocate(const char *type, pool_resource_t **res, uint_t nelem)
7720Sstevel@tonic-gate {
7730Sstevel@tonic-gate 	pool_elem_t *pe;
7740Sstevel@tonic-gate 	const char *method_name;
7750Sstevel@tonic-gate 	uint64_t method;
7760Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
7770Sstevel@tonic-gate 	int ret;
7780Sstevel@tonic-gate 
7790Sstevel@tonic-gate 	pe = pool_conf_to_elem(TO_CONF(TO_ELEM(res[0])));
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 	if (pool_get_ns_property(pe, "allocate-method", &val) != POC_STRING)
7820Sstevel@tonic-gate 		method_name = POA_IMPORTANCE;
7830Sstevel@tonic-gate 	else {
7840Sstevel@tonic-gate 		(void) pool_value_get_string(&val, &method_name);
7850Sstevel@tonic-gate 	}
7860Sstevel@tonic-gate 	if (strcmp(POA_IMPORTANCE, method_name) != 0) {
7870Sstevel@tonic-gate 		if (strcmp(POA_SURPLUS_TO_DEFAULT, method_name) != 0) {
7880Sstevel@tonic-gate 			pool_seterror(POE_INVALID_CONF);
7890Sstevel@tonic-gate 			return (PO_FAIL);
7900Sstevel@tonic-gate 		} else {
7910Sstevel@tonic-gate 			method = POA_SURPLUS_TO_DEFAULT_NUM;
7920Sstevel@tonic-gate 		}
7930Sstevel@tonic-gate 	} else {
7940Sstevel@tonic-gate 		method = POA_IMPORTANCE_NUM;
7950Sstevel@tonic-gate 	}
7960Sstevel@tonic-gate 	switch (method) {
7970Sstevel@tonic-gate 	case POA_IMPORTANCE_NUM:
7980Sstevel@tonic-gate 		/*
7990Sstevel@tonic-gate 		 * TODO: Add support for new resource types
8000Sstevel@tonic-gate 		 */
8010Sstevel@tonic-gate 		switch (pool_resource_elem_class_from_string(type)) {
8020Sstevel@tonic-gate 		case PREC_PSET:
8030Sstevel@tonic-gate 			ret = pset_allocate_imp(res, nelem);
8040Sstevel@tonic-gate 			break;
8050Sstevel@tonic-gate 		default:
8060Sstevel@tonic-gate 			ret = PO_FAIL;
8070Sstevel@tonic-gate 			break;
8080Sstevel@tonic-gate 		}
8090Sstevel@tonic-gate 		break;
8100Sstevel@tonic-gate 	case POA_SURPLUS_TO_DEFAULT_NUM:
8110Sstevel@tonic-gate 		ret = resource_allocate_default(res, nelem);
8120Sstevel@tonic-gate 		break;
8130Sstevel@tonic-gate 	}
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 	return (ret);
8160Sstevel@tonic-gate }
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate /*
8190Sstevel@tonic-gate  * Each set will get its minimum, however if there is more than the
8200Sstevel@tonic-gate  * total minimum available, then leave this in the default set.
8210Sstevel@tonic-gate  */
8220Sstevel@tonic-gate int
resource_allocate_default(pool_resource_t ** res,uint_t nelem)8230Sstevel@tonic-gate resource_allocate_default(pool_resource_t **res, uint_t nelem)
8240Sstevel@tonic-gate {
8250Sstevel@tonic-gate 	res_info_t *res_info;
8260Sstevel@tonic-gate 	uint_t j;
8270Sstevel@tonic-gate 	pool_resource_t *default_res = NULL;
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 	if (nelem == 1)
8300Sstevel@tonic-gate 		return (PO_SUCCESS);
8310Sstevel@tonic-gate 
8320Sstevel@tonic-gate 	if ((res_info = calloc(nelem, sizeof (res_info_t))) == NULL) {
8330Sstevel@tonic-gate 		return (PO_FAIL);
8340Sstevel@tonic-gate 	}
8350Sstevel@tonic-gate 
8360Sstevel@tonic-gate 	/* Load current resource values. */
8370Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
8380Sstevel@tonic-gate 
8390Sstevel@tonic-gate 		if (default_res == NULL &&
8400Sstevel@tonic-gate 		    resource_is_default(res[j]) == PO_TRUE)
8410Sstevel@tonic-gate 			default_res = res[j];
8420Sstevel@tonic-gate 
8430Sstevel@tonic-gate 		if (resource_get_max(res[j],
8440Sstevel@tonic-gate 		    &res_info[j].ri_max) == PO_FAIL ||
8450Sstevel@tonic-gate 		    resource_get_min(res[j],
8460Sstevel@tonic-gate 			&res_info[j].ri_min) == PO_FAIL ||
8470Sstevel@tonic-gate 		    resource_get_size(res[j],
8480Sstevel@tonic-gate 			&res_info[j].ri_oldsize) == PO_FAIL ||
8490Sstevel@tonic-gate 		    resource_get_pinned(res[j],
8500Sstevel@tonic-gate 			&res_info[j].ri_pinned) == PO_FAIL) {
8510Sstevel@tonic-gate 			free(res_info);
8520Sstevel@tonic-gate 			return (PO_FAIL);
8530Sstevel@tonic-gate 		}
8540Sstevel@tonic-gate 		res_info[j].ri_res = res[j];
8550Sstevel@tonic-gate 	}
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	/*
8580Sstevel@tonic-gate 	 * Firstly, for all resources that have size greater than min,
8590Sstevel@tonic-gate 	 * transfer all movable size above min to the default resource.
8600Sstevel@tonic-gate 	 */
8610Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate 		uint64_t real_min;
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate 		/* compute the real minimum number of resources */
8660Sstevel@tonic-gate 		real_min = MAX(res_info[j].ri_pinned, res_info[j].ri_min);
8670Sstevel@tonic-gate 		if (res_info[j].ri_res != default_res &&
8680Sstevel@tonic-gate 		    res_info[j].ri_oldsize > real_min) {
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate 			uint64_t num;
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate 			num = res_info[j].ri_oldsize - real_min;
8730Sstevel@tonic-gate 			if (pool_resource_transfer(
8740Sstevel@tonic-gate 			    TO_CONF(TO_ELEM(default_res)),
8750Sstevel@tonic-gate 				res_info[j].ri_res, default_res, num) !=
8760Sstevel@tonic-gate 			    PO_SUCCESS) {
8770Sstevel@tonic-gate 				free(res_info);
8780Sstevel@tonic-gate 				return (PO_FAIL);
8790Sstevel@tonic-gate 			}
8800Sstevel@tonic-gate 		}
8810Sstevel@tonic-gate 	}
8820Sstevel@tonic-gate 	/*
8830Sstevel@tonic-gate 	 * Now, transfer resources below min from the default.
8840Sstevel@tonic-gate 	 */
8850Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
8860Sstevel@tonic-gate 		/*
8870Sstevel@tonic-gate 		 * We don't want to interfere with resources which are reserved
8880Sstevel@tonic-gate 		 */
8890Sstevel@tonic-gate 		if (res_info[j].ri_res != default_res &&
8900Sstevel@tonic-gate 		    res_info[j].ri_oldsize < res_info[j].ri_min) {
8910Sstevel@tonic-gate 			if (pool_resource_transfer(
8920Sstevel@tonic-gate 			    TO_CONF(TO_ELEM(default_res)),
8930Sstevel@tonic-gate 			    default_res, res_info[j].ri_res,
8940Sstevel@tonic-gate 			    res_info[j].ri_min - res_info[j].ri_oldsize) !=
8950Sstevel@tonic-gate 			    PO_SUCCESS) {
8960Sstevel@tonic-gate 				free(res_info);
8970Sstevel@tonic-gate 				return (PO_FAIL);
8980Sstevel@tonic-gate 			}
8990Sstevel@tonic-gate 		}
9000Sstevel@tonic-gate 	}
9010Sstevel@tonic-gate 	free(res_info);
9020Sstevel@tonic-gate 	return (PO_SUCCESS);
9030Sstevel@tonic-gate }
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate /*
9060Sstevel@tonic-gate  * Allocate cpus to pset resource sets, favoring sets with higher importance.
9070Sstevel@tonic-gate  *
9080Sstevel@tonic-gate  * Step 1: Sort resource sets by decreasing importance, and load each sets
9090Sstevel@tonic-gate  *	   current size (oldsize), min, max, and number of pinned cpus.
9100Sstevel@tonic-gate  *	   Compute the total number of cpus by totaling oldsize.
9110Sstevel@tonic-gate  *
9120Sstevel@tonic-gate  * Step 2: Compute the newsize for each set:
9130Sstevel@tonic-gate  *
9140Sstevel@tonic-gate  * 	Give each set its min number of cpus.  This min may be greater than
9150Sstevel@tonic-gate  *	its pset.min due to pinned cpus. If there are more cpus than the total
9160Sstevel@tonic-gate  *	of all mins, then the surplus cpus are dealt round-robin to all sets
9170Sstevel@tonic-gate  *	(up to their max) in order of decreasing importance.  A set may be
9180Sstevel@tonic-gate  *	skipped during dealing because it started with more than its min due to
9190Sstevel@tonic-gate  * 	pinned cpus.  The dealing stops when there are no more cpus or all
9200Sstevel@tonic-gate  *	sets are at their max. If all sets are at their max, any remaining cpus
9210Sstevel@tonic-gate  *	are given to the default set.
9220Sstevel@tonic-gate  *
9230Sstevel@tonic-gate  * Step 3: Transfer cpus from sets with (oldsize > newsize) to sets with
9240Sstevel@tonic-gate  *	   (oldsize < newsize).
9250Sstevel@tonic-gate  */
9260Sstevel@tonic-gate int
pset_allocate_imp(pool_resource_t ** res,uint_t nelem)9270Sstevel@tonic-gate pset_allocate_imp(pool_resource_t **res, uint_t nelem)
9280Sstevel@tonic-gate {
9290Sstevel@tonic-gate 	res_info_t *res_info;
9300Sstevel@tonic-gate 	res_info_t *default_res_info;
9310Sstevel@tonic-gate 	const pool_resource_t *default_res = NULL;
9320Sstevel@tonic-gate 	uint64_t tot_resources = 0;	/* total count of resources */
9330Sstevel@tonic-gate 	uint64_t tot_min = 0;		/* total of all resource set mins */
9340Sstevel@tonic-gate 	uint64_t num_to_deal = 0;	/* total resources above mins to deal */
9350Sstevel@tonic-gate 	uint64_t sets_maxed = 0;	/* number of resource sets dealt to  */
9360Sstevel@tonic-gate 					/* their max */
9370Sstevel@tonic-gate 	uint64_t sets_finished = 0;	/* number of resource sets that have */
9380Sstevel@tonic-gate 					/* size == newsize */
9390Sstevel@tonic-gate 	int donor, receiver;
9400Sstevel@tonic-gate 	int deal;
9410Sstevel@tonic-gate 	int j;
9420Sstevel@tonic-gate 	int ret = PO_SUCCESS;
9430Sstevel@tonic-gate 
9440Sstevel@tonic-gate 	/*
9450Sstevel@tonic-gate 	 * Build list of res_info_t's
9460Sstevel@tonic-gate 	 */
9470Sstevel@tonic-gate 	if ((res_info = calloc(nelem, sizeof (res_info_t))) == NULL) {
9480Sstevel@tonic-gate 		pool_seterror(POE_SYSTEM);
9490Sstevel@tonic-gate 		return (PO_FAIL);
9500Sstevel@tonic-gate 	}
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate 	/* Order resources by importance, most important being first */
9530Sstevel@tonic-gate 	qsort(res, nelem, sizeof (pool_resource_t *),
9540Sstevel@tonic-gate 	    resource_compare_by_descending_importance);
9550Sstevel@tonic-gate 
9560Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate 		/* Track which resource is the default */
9590Sstevel@tonic-gate 		if (default_res == NULL &&
9600Sstevel@tonic-gate 		    resource_is_default(res[j]) == PO_TRUE) {
9610Sstevel@tonic-gate 			default_res = res[j];
9620Sstevel@tonic-gate 			default_res_info = &(res_info[j]);
9630Sstevel@tonic-gate 		}
9640Sstevel@tonic-gate 
9650Sstevel@tonic-gate 		/* Load sets' current values */
9660Sstevel@tonic-gate 		if (resource_get_max(res[j], &res_info[j].ri_max) == PO_FAIL ||
9670Sstevel@tonic-gate 		    resource_get_min(res[j], &res_info[j].ri_min) == PO_FAIL ||
9680Sstevel@tonic-gate 		    resource_get_size(res[j], &res_info[j].ri_oldsize) ==
9690Sstevel@tonic-gate 		    PO_FAIL ||
9700Sstevel@tonic-gate 		    resource_get_pinned(res[j],
9710Sstevel@tonic-gate 		    &res_info[j].ri_pinned) == PO_FAIL) {
9720Sstevel@tonic-gate 			free(res_info);
9730Sstevel@tonic-gate 			return (PO_FAIL);
9740Sstevel@tonic-gate 		}
9750Sstevel@tonic-gate 
9760Sstevel@tonic-gate 		/* Start each set's newsize out at their min. */
9770Sstevel@tonic-gate 		res_info[j].ri_newsize = res_info[j].ri_min;
9780Sstevel@tonic-gate 
9790Sstevel@tonic-gate 		/* pre-deal pinned resources that exceed min */
9800Sstevel@tonic-gate 		if (res_info[j].ri_pinned > res_info[j].ri_min) {
9810Sstevel@tonic-gate 			res_info[j].ri_newsize = res_info[j].ri_pinned;
9820Sstevel@tonic-gate 			res_info[j].ri_dealt =
9830Sstevel@tonic-gate 			    res_info[j].ri_newsize - res_info[j].ri_min;
9840Sstevel@tonic-gate 		}
9850Sstevel@tonic-gate 		res_info[j].ri_res = res[j];
9860Sstevel@tonic-gate 
9870Sstevel@tonic-gate 		/* Compute total number of resources to deal out */
9880Sstevel@tonic-gate 		tot_resources += res_info[j].ri_oldsize;
9890Sstevel@tonic-gate 		tot_min += res_info[j].ri_newsize;
9900Sstevel@tonic-gate 
9910Sstevel@tonic-gate #ifdef DEBUG
9920Sstevel@tonic-gate 		dprintf("res allocation details\n");
9930Sstevel@tonic-gate 		pool_elem_dprintf(TO_ELEM(res[j]));
9940Sstevel@tonic-gate 		dprintf("size=%llu\n", res_info[j].ri_oldsize);
9950Sstevel@tonic-gate #endif	/* DEBUG */
9960Sstevel@tonic-gate 	}
9970Sstevel@tonic-gate 
9980Sstevel@tonic-gate 	num_to_deal = tot_resources - tot_min;
9990Sstevel@tonic-gate 
10000Sstevel@tonic-gate 	/*
10010Sstevel@tonic-gate 	 * Deal one resource to each set, and then another, until all
10020Sstevel@tonic-gate 	 * resources are dealt or all sets are at their max.
10030Sstevel@tonic-gate 	 */
10040Sstevel@tonic-gate 	for (deal = 1; num_to_deal > 0 && sets_maxed < nelem; deal++) {
10050Sstevel@tonic-gate 		for (j = 0; j < nelem; j++) {
10060Sstevel@tonic-gate 
10070Sstevel@tonic-gate 			/*
10080Sstevel@tonic-gate 			 * Skip this resource set if it has already been
10090Sstevel@tonic-gate 			 * pre-dealt a resource due to pinned resources.
10100Sstevel@tonic-gate 			 */
10110Sstevel@tonic-gate 			if (res_info[j].ri_dealt >= deal)
10120Sstevel@tonic-gate 				continue;
10130Sstevel@tonic-gate 
10140Sstevel@tonic-gate 			if (res_info[j].ri_newsize < res_info[j].ri_max) {
10150Sstevel@tonic-gate 
10160Sstevel@tonic-gate 				res_info[j].ri_dealt++;
10170Sstevel@tonic-gate 				res_info[j].ri_newsize++;
10180Sstevel@tonic-gate 				if (res_info[j].ri_newsize ==
10190Sstevel@tonic-gate 				    res_info[j].ri_max)
10200Sstevel@tonic-gate 					sets_maxed++;
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 				num_to_deal--;
10230Sstevel@tonic-gate 				if (num_to_deal == 0)
10240Sstevel@tonic-gate 					break;
10250Sstevel@tonic-gate 			}
10260Sstevel@tonic-gate 		}
10270Sstevel@tonic-gate 	}
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 	/*
10300Sstevel@tonic-gate 	 * If all resource sets are at their max, deal the remaining to the
10310Sstevel@tonic-gate 	 * default resource set.
10320Sstevel@tonic-gate 	 */
10330Sstevel@tonic-gate 	if ((sets_maxed == nelem) && (num_to_deal > 0)) {
10340Sstevel@tonic-gate 		default_res_info->ri_dealt += num_to_deal;
10350Sstevel@tonic-gate 		default_res_info->ri_newsize += num_to_deal;
10360Sstevel@tonic-gate 	}
10370Sstevel@tonic-gate 
10380Sstevel@tonic-gate 	/*
10390Sstevel@tonic-gate 	 * Sort so that resource sets needing resources preced resource sets
10400Sstevel@tonic-gate 	 * that have extra resources.  The sort function will also compute
10410Sstevel@tonic-gate 	 * The quantity of resources that need to be transfered into or out
10420Sstevel@tonic-gate 	 * of each set so that it's size == newsize.
10430Sstevel@tonic-gate 	 */
10440Sstevel@tonic-gate 	qsort(res_info, nelem, sizeof (res_info_t),
10450Sstevel@tonic-gate 	    compute_size_to_transfer);
10460Sstevel@tonic-gate 
10470Sstevel@tonic-gate 	/*
10480Sstevel@tonic-gate 	 * The donor index starts at the end of the resource set list and
10490Sstevel@tonic-gate 	 * walks up.  The receiver index starts at the beginning of the
10500Sstevel@tonic-gate 	 * resource set list and walks down.  Cpu's are transfered from the
10510Sstevel@tonic-gate 	 * donors to the receivers until all sets have transfer == 0).
10520Sstevel@tonic-gate 	 */
10530Sstevel@tonic-gate 	donor = nelem - 1;
10540Sstevel@tonic-gate 	receiver = 0;
10550Sstevel@tonic-gate 
10560Sstevel@tonic-gate 	/* Number of sets with transfer == 0 */
10570Sstevel@tonic-gate 	sets_finished = 0;
10580Sstevel@tonic-gate 
10590Sstevel@tonic-gate 	/* Tranfer resources so that each set's size becomes newsize */
10600Sstevel@tonic-gate 	for (;;) {
10610Sstevel@tonic-gate 
10620Sstevel@tonic-gate 		uint64_t ntrans;
10630Sstevel@tonic-gate 		if (donor == receiver) {
10640Sstevel@tonic-gate 			if (res_info[donor].ri_transfer != 0) {
10650Sstevel@tonic-gate 				free(res_info);
10660Sstevel@tonic-gate 				return (PO_FAIL);
10670Sstevel@tonic-gate 			}
10680Sstevel@tonic-gate 			sets_finished++;
10690Sstevel@tonic-gate 			break;
10700Sstevel@tonic-gate 		}
10710Sstevel@tonic-gate 		if (res_info[donor].ri_transfer == 0) {
10720Sstevel@tonic-gate 			sets_finished++;
10730Sstevel@tonic-gate 			donor--;
10740Sstevel@tonic-gate 			continue;
10750Sstevel@tonic-gate 		}
10760Sstevel@tonic-gate 		if (res_info[receiver].ri_transfer == 0) {
10770Sstevel@tonic-gate 			sets_finished++;
10780Sstevel@tonic-gate 			receiver++;
10790Sstevel@tonic-gate 			continue;
10800Sstevel@tonic-gate 		}
10810Sstevel@tonic-gate 
10820Sstevel@tonic-gate 		/* Transfer resources from the donor set to the receiver */
10830Sstevel@tonic-gate 		ntrans = MIN(res_info[donor].ri_transfer,
10840Sstevel@tonic-gate 			    -res_info[receiver].ri_transfer);
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate 		if (pool_resource_transfer(
10870Sstevel@tonic-gate 			TO_CONF(TO_ELEM(res_info[donor].ri_res)),
10880Sstevel@tonic-gate 			    res_info[donor].ri_res, res_info[receiver].ri_res,
10890Sstevel@tonic-gate 			    ntrans) != PO_SUCCESS) {
10900Sstevel@tonic-gate 				free(res_info);
10910Sstevel@tonic-gate 				return (PO_FAIL);
10920Sstevel@tonic-gate 			}
10930Sstevel@tonic-gate 		res_info[donor].ri_transfer -= ntrans;
10940Sstevel@tonic-gate 		res_info[receiver].ri_transfer += ntrans;
10950Sstevel@tonic-gate 	}
10960Sstevel@tonic-gate 
10970Sstevel@tonic-gate 	if (sets_finished != nelem)
10980Sstevel@tonic-gate 		ret = PO_FAIL;
10990Sstevel@tonic-gate 
11000Sstevel@tonic-gate 	free(res_info);
11010Sstevel@tonic-gate 	return (ret);
11020Sstevel@tonic-gate }
11030Sstevel@tonic-gate 
11040Sstevel@tonic-gate /*
11050Sstevel@tonic-gate  * Used as a qsort parameter to help order resources in terms of their
11060Sstevel@tonic-gate  * importance, higher importance being first.
11070Sstevel@tonic-gate  */
11080Sstevel@tonic-gate int
resource_compare_by_descending_importance(const void * arg1,const void * arg2)11090Sstevel@tonic-gate resource_compare_by_descending_importance(const void *arg1, const void *arg2)
11100Sstevel@tonic-gate {
11110Sstevel@tonic-gate 	pool_elem_t *elem1;
11120Sstevel@tonic-gate 	pool_elem_t *elem2;
11130Sstevel@tonic-gate 	pool_resource_t **res1 = (pool_resource_t **)arg1;
11140Sstevel@tonic-gate 	pool_resource_t **res2 = (pool_resource_t **)arg2;
11150Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
11160Sstevel@tonic-gate 	int64_t i1 = 0, i2 = 0;
11170Sstevel@tonic-gate 
11180Sstevel@tonic-gate 	elem1 = TO_ELEM(*res1);
11190Sstevel@tonic-gate 	elem2 = TO_ELEM(*res2);
11200Sstevel@tonic-gate 
11210Sstevel@tonic-gate 	if (pool_get_property(TO_CONF(elem1), elem1, "_importance", &val) ==
11220Sstevel@tonic-gate 	    POC_INT)
11230Sstevel@tonic-gate 		(void) pool_value_get_int64(&val, &i1);
11240Sstevel@tonic-gate 
11250Sstevel@tonic-gate 	if (pool_get_property(TO_CONF(elem2), elem2, "_importance", &val) ==
11260Sstevel@tonic-gate 	    POC_INT)
11270Sstevel@tonic-gate 		(void) pool_value_get_int64(&val, &i2);
11280Sstevel@tonic-gate 	return (i1 > i2 ? -1 : (i1 < i2 ? 1 : 0));
11290Sstevel@tonic-gate }
11300Sstevel@tonic-gate 
11310Sstevel@tonic-gate /*
11320Sstevel@tonic-gate  * Sort in increasing order so that resource sets with extra resources are at
11330Sstevel@tonic-gate  * the end and resource sets needing resources are at the beginning.
11340Sstevel@tonic-gate  */
11350Sstevel@tonic-gate int
compute_size_to_transfer(const void * arg1,const void * arg2)11360Sstevel@tonic-gate compute_size_to_transfer(const void *arg1, const void *arg2)
11370Sstevel@tonic-gate {
11380Sstevel@tonic-gate 	res_info_t *r1 = (res_info_t *)arg1, *r2 = (res_info_t *)arg2;
11390Sstevel@tonic-gate 	r1->ri_transfer = (int64_t)r1->ri_oldsize - (int64_t)r1->ri_newsize;
11400Sstevel@tonic-gate 	r2->ri_transfer = (int64_t)r2->ri_oldsize - (int64_t)r2->ri_newsize;
11410Sstevel@tonic-gate 	return (r1->ri_transfer > r2->ri_transfer ? 1 :
11420Sstevel@tonic-gate 	    (r1->ri_transfer < r2->ri_transfer ? -1 : 0));
11430Sstevel@tonic-gate }
11440Sstevel@tonic-gate 
11450Sstevel@tonic-gate /*
11460Sstevel@tonic-gate  * set_importance_cb() is used to create "_importance" props on each
11470Sstevel@tonic-gate  * resource associated with a pool.
11480Sstevel@tonic-gate  *
11490Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
11500Sstevel@tonic-gate  */
11510Sstevel@tonic-gate /*ARGSUSED*/
11520Sstevel@tonic-gate static int
set_importance_cb(pool_conf_t * conf,pool_t * pool,void * unused)11530Sstevel@tonic-gate set_importance_cb(pool_conf_t *conf, pool_t *pool, void *unused)
11540Sstevel@tonic-gate {
11550Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
11560Sstevel@tonic-gate 	int64_t importance;
11570Sstevel@tonic-gate 	pool_resource_t **res;
11580Sstevel@tonic-gate 	uint_t nelem, i;
11590Sstevel@tonic-gate 
11600Sstevel@tonic-gate 	if (pool_get_property(conf, TO_ELEM(pool), "pool.importance", &val) !=
11610Sstevel@tonic-gate 	    POC_INT) {
11620Sstevel@tonic-gate 		pool_seterror(POE_INVALID_CONF);
11630Sstevel@tonic-gate 		return (PO_FAIL);
11640Sstevel@tonic-gate 	}
11650Sstevel@tonic-gate 	(void) pool_value_get_int64(&val, &importance);
11660Sstevel@tonic-gate 	if ((res = pool_query_pool_resources(conf, pool, &nelem, NULL)) ==
11670Sstevel@tonic-gate 	    NULL) {
11680Sstevel@tonic-gate 		return (PO_FAIL);
11690Sstevel@tonic-gate 	}
11700Sstevel@tonic-gate 	for (i = 0; res[i] != NULL; i++) {
11710Sstevel@tonic-gate 		int64_t old_importance = INT64_MIN;
11720Sstevel@tonic-gate 		pool_elem_t *elem = TO_ELEM(res[i]);
11730Sstevel@tonic-gate 
11740Sstevel@tonic-gate 		if (pool_get_property(conf, elem, "_importance", &val) ==
11750Sstevel@tonic-gate 		    POC_INT)
11760Sstevel@tonic-gate 			(void) pool_value_get_int64(&val, &old_importance);
11770Sstevel@tonic-gate 		if (old_importance <= importance) {
11780Sstevel@tonic-gate 			(void) pool_value_set_int64(&val, importance);
11790Sstevel@tonic-gate 			(void) pool_put_property(conf, elem, "_importance",
11800Sstevel@tonic-gate 			    &val);
11810Sstevel@tonic-gate 		}
11820Sstevel@tonic-gate 	}
11830Sstevel@tonic-gate 	free(res);
11840Sstevel@tonic-gate 	return (PO_SUCCESS);
11850Sstevel@tonic-gate }
11860Sstevel@tonic-gate 
11870Sstevel@tonic-gate /*
11880Sstevel@tonic-gate  * unset_importance_cb() is used to remove "_importance" props from
11890Sstevel@tonic-gate  * each resource associated with a pool.
11900Sstevel@tonic-gate  *
11910Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
11920Sstevel@tonic-gate  */
11930Sstevel@tonic-gate /*ARGSUSED*/
11940Sstevel@tonic-gate static int
unset_importance_cb(pool_conf_t * conf,pool_t * pool,void * unused)11950Sstevel@tonic-gate unset_importance_cb(pool_conf_t *conf, pool_t *pool, void *unused)
11960Sstevel@tonic-gate {
11970Sstevel@tonic-gate 	pool_resource_t **res;
11980Sstevel@tonic-gate 	uint_t nelem, i;
11990Sstevel@tonic-gate 
12000Sstevel@tonic-gate 	if ((res = pool_query_pool_resources(conf, pool, &nelem, NULL)) ==
12010Sstevel@tonic-gate 	    NULL) {
12020Sstevel@tonic-gate 		return (PO_FAIL);
12030Sstevel@tonic-gate 	}
12040Sstevel@tonic-gate 	for (i = 0; res[i] != NULL; i++) {
12050Sstevel@tonic-gate 		if (pool_rm_property(conf, TO_ELEM(res[i]), "_importance") ==
12060Sstevel@tonic-gate 		    PO_FAIL) {
12070Sstevel@tonic-gate 			free(res);
12080Sstevel@tonic-gate 			return (PO_FAIL);
12090Sstevel@tonic-gate 		}
12100Sstevel@tonic-gate 	}
12110Sstevel@tonic-gate 	free(res);
12120Sstevel@tonic-gate 	return (PO_SUCCESS);
12130Sstevel@tonic-gate }
12140Sstevel@tonic-gate 
12150Sstevel@tonic-gate /*
12160Sstevel@tonic-gate  * add_importance_props() is used to create "_importance" props on
12170Sstevel@tonic-gate  * each resource associated with a pool.
12180Sstevel@tonic-gate  *
12190Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
12200Sstevel@tonic-gate  */
12210Sstevel@tonic-gate static int
add_importance_props(pool_conf_t * conf)12220Sstevel@tonic-gate add_importance_props(pool_conf_t *conf)
12230Sstevel@tonic-gate {
12240Sstevel@tonic-gate 	return (pool_walk_pools(conf, NULL, set_importance_cb));
12250Sstevel@tonic-gate }
12260Sstevel@tonic-gate 
12270Sstevel@tonic-gate /*
12280Sstevel@tonic-gate  * remove_importance_props() is used to remove "_importance" props on
12290Sstevel@tonic-gate  * each resource associated with a pool.
12300Sstevel@tonic-gate  *
12310Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
12320Sstevel@tonic-gate  */
12330Sstevel@tonic-gate static int
remove_importance_props(pool_conf_t * conf)12340Sstevel@tonic-gate remove_importance_props(pool_conf_t *conf)
12350Sstevel@tonic-gate {
12360Sstevel@tonic-gate 	return (pool_walk_pools(conf, NULL, unset_importance_cb));
12370Sstevel@tonic-gate }
12380Sstevel@tonic-gate 
12390Sstevel@tonic-gate /*
12400Sstevel@tonic-gate  * pool_conf_commit_sys() takes a configuration and modifies both the
12410Sstevel@tonic-gate  * supplied configuration and the dynamic configuration. The goal of
12420Sstevel@tonic-gate  * this modification is to generate a dynamic configuration which best
12430Sstevel@tonic-gate  * represents the constraints laid down in the static configuration
12440Sstevel@tonic-gate  * and to update the static configuration with the results of this
12450Sstevel@tonic-gate  * process.
12460Sstevel@tonic-gate  *
12470Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
12480Sstevel@tonic-gate  */
12490Sstevel@tonic-gate int
pool_conf_commit_sys(pool_conf_t * conf,int validate)12500Sstevel@tonic-gate pool_conf_commit_sys(pool_conf_t *conf, int validate)
12510Sstevel@tonic-gate {
12520Sstevel@tonic-gate 	pool_conf_t *dyn;
12530Sstevel@tonic-gate 
12540Sstevel@tonic-gate 	if ((dyn = pool_conf_alloc()) == NULL)
12550Sstevel@tonic-gate 		return (PO_FAIL);
12560Sstevel@tonic-gate 	if (pool_conf_open(dyn, pool_dynamic_location(), PO_RDWR) !=
12570Sstevel@tonic-gate 	    PO_SUCCESS) {
12580Sstevel@tonic-gate 		pool_conf_free(dyn);
12590Sstevel@tonic-gate 		return (PO_FAIL);
12600Sstevel@tonic-gate 	}
12610Sstevel@tonic-gate 	if (validate == PO_TRUE) {
12620Sstevel@tonic-gate 		if (pool_conf_validate(conf, POV_RUNTIME) != PO_SUCCESS) {
12630Sstevel@tonic-gate 			(void) pool_conf_close(dyn);
12640Sstevel@tonic-gate 			pool_conf_free(dyn);
12650Sstevel@tonic-gate 			return (PO_FAIL);
12660Sstevel@tonic-gate 		}
12670Sstevel@tonic-gate 	}
12680Sstevel@tonic-gate 	/*
12690Sstevel@tonic-gate 	 * Now try to make the two things "the same".
12700Sstevel@tonic-gate 	 */
12710Sstevel@tonic-gate 	if (diff_and_fix(conf, dyn) != PO_SUCCESS) {
12720Sstevel@tonic-gate 		(void) pool_conf_close(dyn);
12730Sstevel@tonic-gate 		pool_conf_free(dyn);
12740Sstevel@tonic-gate 		pool_seterror(POE_INVALID_CONF);
12750Sstevel@tonic-gate 		return (PO_FAIL);
12760Sstevel@tonic-gate 	}
12770Sstevel@tonic-gate 	if (dyn->pc_prov->pc_commit(dyn) != PO_SUCCESS) {
12780Sstevel@tonic-gate 		(void) pool_conf_close(dyn);
12790Sstevel@tonic-gate 		pool_conf_free(dyn);
12800Sstevel@tonic-gate 		return (PO_FAIL);
12810Sstevel@tonic-gate 	}
12820Sstevel@tonic-gate 	(void) pool_conf_close(dyn);
12830Sstevel@tonic-gate 	pool_conf_free(dyn);
12840Sstevel@tonic-gate 	return (PO_SUCCESS);
12850Sstevel@tonic-gate }
12860Sstevel@tonic-gate 
12870Sstevel@tonic-gate /*
12880Sstevel@tonic-gate  * Copies all properties from one element to another. If the property
12890Sstevel@tonic-gate  * is a readonly property, then don't copy it.
12900Sstevel@tonic-gate  */
12910Sstevel@tonic-gate /* ARGSUSED */
12920Sstevel@tonic-gate static int
clone_element(pool_conf_t * conf,pool_elem_t * pe,const char * name,pool_value_t * pv,void * user)12930Sstevel@tonic-gate clone_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
12940Sstevel@tonic-gate     pool_value_t *pv, void *user)
12950Sstevel@tonic-gate {
12960Sstevel@tonic-gate 	pool_elem_t *tgt = (pool_elem_t *)user;
12970Sstevel@tonic-gate 	const pool_prop_t *prop;
12980Sstevel@tonic-gate #ifdef DEBUG
12990Sstevel@tonic-gate 	dprintf("Cloning %s from %s\n",
13000Sstevel@tonic-gate 	    pool_conf_location(TO_CONF(TO_ELEM(tgt))),
13010Sstevel@tonic-gate 	    pool_conf_location(TO_CONF(pe)));
13020Sstevel@tonic-gate 	assert(TO_CONF(TO_ELEM(tgt)) != TO_CONF(pe));
13030Sstevel@tonic-gate 	dprintf("clone_element: Processing %s\n", name);
13040Sstevel@tonic-gate 	pool_value_dprintf(pv);
13050Sstevel@tonic-gate #endif	/* DEBUG */
13060Sstevel@tonic-gate 	/*
13070Sstevel@tonic-gate 	 * Some properties should be ignored
13080Sstevel@tonic-gate 	 */
13090Sstevel@tonic-gate 	if ((prop = provider_get_prop(pe, name)) != NULL &&
13100Sstevel@tonic-gate 	    prop_is_readonly(prop) == PO_TRUE)
13110Sstevel@tonic-gate 		return (PO_SUCCESS);
1312*3247Sgjelinek 
1313*3247Sgjelinek 	/* The temporary property needs special handling */
1314*3247Sgjelinek 	if (strstr(name, ".temporary") != NULL)
1315*3247Sgjelinek 		return (pool_set_temporary(TO_CONF(tgt), tgt) ==
1316*3247Sgjelinek 		    PO_FAIL ?  PO_FAIL : PO_SUCCESS);
1317*3247Sgjelinek 	else
1318*3247Sgjelinek 		return (pool_put_property(TO_CONF(tgt), tgt, name, pv) ==
1319*3247Sgjelinek 		    PO_FAIL ? PO_FAIL : PO_SUCCESS);
13200Sstevel@tonic-gate }
13210Sstevel@tonic-gate 
13220Sstevel@tonic-gate /*
13230Sstevel@tonic-gate  * Removes all properties from one element. Properties which are
13240Sstevel@tonic-gate  * managed by the configuration are ignored.
13250Sstevel@tonic-gate  */
13260Sstevel@tonic-gate /* ARGSUSED3 */
13270Sstevel@tonic-gate static int
clean_element(pool_conf_t * conf,pool_elem_t * pe,const char * name,pool_value_t * pv,void * user)13280Sstevel@tonic-gate clean_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
13290Sstevel@tonic-gate     pool_value_t *pv, void *user)
13300Sstevel@tonic-gate {
13310Sstevel@tonic-gate 	const pool_prop_t *prop;
13320Sstevel@tonic-gate 	/*
13330Sstevel@tonic-gate 	 * Some properties should be ignored
13340Sstevel@tonic-gate 	 */
1335*3247Sgjelinek 	if (strstr(name, ".temporary") != NULL ||
1336*3247Sgjelinek 	    ((prop = provider_get_prop(pe, name)) != NULL &&
1337*3247Sgjelinek 	    prop_is_optional(prop) == PO_FALSE))
13380Sstevel@tonic-gate 		return (PO_SUCCESS);
13390Sstevel@tonic-gate 	return (pool_rm_property(conf, (pool_elem_t *)pe, name) == PO_FAIL);
13400Sstevel@tonic-gate }
1341