xref: /onnv-gate/usr/src/lib/libpool/common/pool_commit.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * These functions implement the process of commitment for a pool
31*0Sstevel@tonic-gate  * configuration. This process can be described as taking instructions
32*0Sstevel@tonic-gate  * from a static configuration file and using the information about
33*0Sstevel@tonic-gate  * the target system contained in the dynamic configuration to make
34*0Sstevel@tonic-gate  * decisions about how best to allocate resources to meet the
35*0Sstevel@tonic-gate  * constraints specified in the static configuration file.
36*0Sstevel@tonic-gate  *
37*0Sstevel@tonic-gate  * Mechanically, this process relies upon ordering the individual
38*0Sstevel@tonic-gate  * components of the file and stepping through the lists of components
39*0Sstevel@tonic-gate  * and taking actions depending on their type and which file they are
40*0Sstevel@tonic-gate  * part of.
41*0Sstevel@tonic-gate  *
42*0Sstevel@tonic-gate  * Configuration components can be broken down into different types
43*0Sstevel@tonic-gate  * which are then treated according to the following table:
44*0Sstevel@tonic-gate  *
45*0Sstevel@tonic-gate  * Element Type		Action
46*0Sstevel@tonic-gate  * system || pool ||
47*0Sstevel@tonic-gate  * res_comp || res_agg	If the element is a required element, then create or
48*0Sstevel@tonic-gate  *			update it (don't destroy required elements in the
49*0Sstevel@tonic-gate  *			static configuration) otherwise manipulate the
50*0Sstevel@tonic-gate  *			dynamic configuration to create, destroy or update
51*0Sstevel@tonic-gate  *			the element on the system.
52*0Sstevel@tonic-gate  * comp			Create, destroy or update the static configuration
53*0Sstevel@tonic-gate  *			component.
54*0Sstevel@tonic-gate  *
55*0Sstevel@tonic-gate  * The treatment of the different elements reflects the fact that all
56*0Sstevel@tonic-gate  * elements other than comp are configurable and thus libpool can
57*0Sstevel@tonic-gate  * create, destroy and modify these elements at will. comp elements
58*0Sstevel@tonic-gate  * reflect the disposition of the system, these elements can be moved
59*0Sstevel@tonic-gate  * around but they can't be created or destroyed in the dynamic
60*0Sstevel@tonic-gate  * configuration in the commit process. comp elements can be created
61*0Sstevel@tonic-gate  * and destroyed in the static configuration file as a result of a
62*0Sstevel@tonic-gate  * commit operation, since it's possible for a comp to not appear in
63*0Sstevel@tonic-gate  * the dynamic configuration. For instance, if the static
64*0Sstevel@tonic-gate  * configuration file was created on a different machine or after a DR
65*0Sstevel@tonic-gate  * operation which has removed or added components.
66*0Sstevel@tonic-gate  *
67*0Sstevel@tonic-gate  */
68*0Sstevel@tonic-gate #include <assert.h>
69*0Sstevel@tonic-gate #include <stdio.h>
70*0Sstevel@tonic-gate #include <stdlib.h>
71*0Sstevel@tonic-gate #include <sys/types.h>
72*0Sstevel@tonic-gate #include <errno.h>
73*0Sstevel@tonic-gate #include <string.h>
74*0Sstevel@tonic-gate #include <limits.h>
75*0Sstevel@tonic-gate #include <unistd.h>
76*0Sstevel@tonic-gate 
77*0Sstevel@tonic-gate #include <pool.h>
78*0Sstevel@tonic-gate #include "pool_internal.h"
79*0Sstevel@tonic-gate #include "pool_impl.h"
80*0Sstevel@tonic-gate 
81*0Sstevel@tonic-gate #define	MIN(x, y) ((x) < (y) ? (x) : (y))
82*0Sstevel@tonic-gate #define	MAX(x, y) ((x) > (y) ? (x) : (y))
83*0Sstevel@tonic-gate #define	POA_IMPORTANCE_NUM	0
84*0Sstevel@tonic-gate #define	POA_SURPLUS_TO_DEFAULT_NUM	1
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate /*
87*0Sstevel@tonic-gate  * This resource specific structure is used to determine allocation of resources
88*0Sstevel@tonic-gate  * during resource set allocation.  Each set will receive its min, plus
89*0Sstevel@tonic-gate  * some number of dealt resources based on the global allocation policy.
90*0Sstevel@tonic-gate  */
91*0Sstevel@tonic-gate typedef struct res_info {
92*0Sstevel@tonic-gate 	pool_resource_t	*ri_res;	/* Resource set */
93*0Sstevel@tonic-gate 	uint64_t	ri_min;		/* Resource set's low watermark */
94*0Sstevel@tonic-gate 	uint64_t	ri_max;		/* Resource set's high watermark */
95*0Sstevel@tonic-gate 	uint64_t	ri_oldsize;	/* Size of resource set at the start */
96*0Sstevel@tonic-gate 	uint64_t	ri_newsize;	/* New resource set size allocated */
97*0Sstevel@tonic-gate 	uint64_t	ri_pinned;	/* Count of pinned resources in set */
98*0Sstevel@tonic-gate 	uint64_t	ri_dealt;	/* Count of resources dealt to set */
99*0Sstevel@tonic-gate 	int64_t		ri_transfer;	/* oldsize - newsize */
100*0Sstevel@tonic-gate 					/* The signed quantity of resources */
101*0Sstevel@tonic-gate 					/* to tranfer into or out of this */
102*0Sstevel@tonic-gate 					/* resource set */
103*0Sstevel@tonic-gate 					/* + transfer: tranfer resources out */
104*0Sstevel@tonic-gate 					/* - transfer: tranfer resources in */
105*0Sstevel@tonic-gate } res_info_t;
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate /*
108*0Sstevel@tonic-gate  * diff_and_fix operations
109*0Sstevel@tonic-gate  */
110*0Sstevel@tonic-gate static int		commit_create(pool_conf_t *, pool_elem_t **);
111*0Sstevel@tonic-gate static int		commit_delete(pool_elem_t *);
112*0Sstevel@tonic-gate static int		commit_update(pool_elem_t *, pool_elem_t *, int);
113*0Sstevel@tonic-gate 
114*0Sstevel@tonic-gate /*
115*0Sstevel@tonic-gate  * configuration commit processing
116*0Sstevel@tonic-gate  */
117*0Sstevel@tonic-gate static int		diff_and_fix(pool_conf_t *, pool_conf_t *);
118*0Sstevel@tonic-gate static int		process_elem_lt(pool_elem_t *, pool_conf_t *);
119*0Sstevel@tonic-gate static int		process_elem_gt(pool_elem_t *, pool_conf_t *,
120*0Sstevel@tonic-gate     pool_conf_t *);
121*0Sstevel@tonic-gate static int		process_lists(int, pool_conf_t *,
122*0Sstevel@tonic-gate     pool_conf_t *, int);
123*0Sstevel@tonic-gate static pool_elem_t	**get_elem_list(const pool_conf_t *, int, uint_t *);
124*0Sstevel@tonic-gate static int		share_resources(pool_conf_t *);
125*0Sstevel@tonic-gate static int		resource_allocate(const char *, pool_resource_t **,
126*0Sstevel@tonic-gate     uint_t);
127*0Sstevel@tonic-gate static int		resource_allocate_default(pool_resource_t **, uint_t);
128*0Sstevel@tonic-gate static int		pset_allocate_imp(pool_resource_t **, uint_t);
129*0Sstevel@tonic-gate static int		resource_compare_by_descending_importance(const void *,
130*0Sstevel@tonic-gate     const void *);
131*0Sstevel@tonic-gate static int		compute_size_to_transfer(const void *, const void *);
132*0Sstevel@tonic-gate static int		set_importance_cb(pool_conf_t *, pool_t *, void *);
133*0Sstevel@tonic-gate static int		unset_importance_cb(pool_conf_t *, pool_t *, void *);
134*0Sstevel@tonic-gate static int		add_importance_props(pool_conf_t *);
135*0Sstevel@tonic-gate static int		remove_importance_props(pool_conf_t *);
136*0Sstevel@tonic-gate static int		clone_element(pool_conf_t *, pool_elem_t *,
137*0Sstevel@tonic-gate     const char *, pool_value_t *, void *);
138*0Sstevel@tonic-gate static int		clean_element(pool_conf_t *, pool_elem_t *,
139*0Sstevel@tonic-gate     const char *, pool_value_t *, void *);
140*0Sstevel@tonic-gate 
141*0Sstevel@tonic-gate /*
142*0Sstevel@tonic-gate  * commit_create() is used to create a configuration element upon the
143*0Sstevel@tonic-gate  * system.  Since only pools and resource actually need to perform any
144*0Sstevel@tonic-gate  * action, other elements are ignored as a no-op.
145*0Sstevel@tonic-gate  */
146*0Sstevel@tonic-gate static int
147*0Sstevel@tonic-gate commit_create(pool_conf_t *conf, pool_elem_t **e1)
148*0Sstevel@tonic-gate {
149*0Sstevel@tonic-gate 	pool_resource_t *res;
150*0Sstevel@tonic-gate 	pool_t *pool;
151*0Sstevel@tonic-gate 	const char *res_type;
152*0Sstevel@tonic-gate 	pool_elem_t *src = *e1;
153*0Sstevel@tonic-gate 	uint64_t smin, smax, dmax;
154*0Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
155*0Sstevel@tonic-gate 	char *name;
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate 	switch (pool_elem_class(src)) {
158*0Sstevel@tonic-gate 	case PEC_SYSTEM:	/* NO-OP */
159*0Sstevel@tonic-gate 		break;
160*0Sstevel@tonic-gate 	case PEC_POOL:
161*0Sstevel@tonic-gate 		name = elem_get_name(src);
162*0Sstevel@tonic-gate 		if ((pool = pool_create(conf, name)) == NULL) {
163*0Sstevel@tonic-gate 			free(name);
164*0Sstevel@tonic-gate 			return (PO_FAIL);
165*0Sstevel@tonic-gate 		}
166*0Sstevel@tonic-gate 		free(name);
167*0Sstevel@tonic-gate 		/*
168*0Sstevel@tonic-gate 		 * Now copy the properties from the original pool to the
169*0Sstevel@tonic-gate 		 * new one
170*0Sstevel@tonic-gate 		 */
171*0Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(src), src, TO_ELEM(pool),
172*0Sstevel@tonic-gate 		    clone_element) != PO_SUCCESS)
173*0Sstevel@tonic-gate 			return (PO_FAIL);
174*0Sstevel@tonic-gate 		/*
175*0Sstevel@tonic-gate 		 * Add a pointer to the src element which can be
176*0Sstevel@tonic-gate 		 * updated with a sys_id when the sys_id is allocated
177*0Sstevel@tonic-gate 		 * to the created element.
178*0Sstevel@tonic-gate 		 */
179*0Sstevel@tonic-gate 		pool_set_pair(TO_ELEM(pool), src);
180*0Sstevel@tonic-gate 		*e1 = TO_ELEM(pool);
181*0Sstevel@tonic-gate 		break;
182*0Sstevel@tonic-gate 	case PEC_RES_COMP:
183*0Sstevel@tonic-gate 	case PEC_RES_AGG:
184*0Sstevel@tonic-gate 		name = elem_get_name(src);
185*0Sstevel@tonic-gate 		res_type = pool_elem_class_string(src);
186*0Sstevel@tonic-gate 		if ((res = pool_resource_create(conf, res_type, name)) ==
187*0Sstevel@tonic-gate 		    NULL) {
188*0Sstevel@tonic-gate 			free(name);
189*0Sstevel@tonic-gate 			return (PO_FAIL);
190*0Sstevel@tonic-gate 		}
191*0Sstevel@tonic-gate 		free(name);
192*0Sstevel@tonic-gate 		/*
193*0Sstevel@tonic-gate 		 * Need to do some ordering of property updates.
194*0Sstevel@tonic-gate 		 * Compare the values of source min/max and
195*0Sstevel@tonic-gate 		 * destination min/max. If smin < dmax then update the
196*0Sstevel@tonic-gate 		 * smin first, else update the max first.
197*0Sstevel@tonic-gate 		 */
198*0Sstevel@tonic-gate 		if (resource_get_min(pool_elem_res(src), &smin) != PO_SUCCESS ||
199*0Sstevel@tonic-gate 		    resource_get_max(pool_elem_res(src), &smax) != PO_SUCCESS ||
200*0Sstevel@tonic-gate 		    resource_get_max(res, &dmax) != PO_SUCCESS)
201*0Sstevel@tonic-gate 			return (PO_FAIL);
202*0Sstevel@tonic-gate 		if (smin < dmax) {
203*0Sstevel@tonic-gate 			pool_value_set_uint64(&val, smin);
204*0Sstevel@tonic-gate 			if (pool_put_ns_property(TO_ELEM(res), c_min_prop,
205*0Sstevel@tonic-gate 			    &val) != PO_SUCCESS)
206*0Sstevel@tonic-gate 				return (PO_FAIL);
207*0Sstevel@tonic-gate 		} else {
208*0Sstevel@tonic-gate 			pool_value_set_uint64(&val, smax);
209*0Sstevel@tonic-gate 			if (pool_put_ns_property(TO_ELEM(res), c_max_prop,
210*0Sstevel@tonic-gate 			    &val) != PO_SUCCESS)
211*0Sstevel@tonic-gate 				return (PO_FAIL);
212*0Sstevel@tonic-gate 		}
213*0Sstevel@tonic-gate 		/*
214*0Sstevel@tonic-gate 		 * Now copy the properties from the original resource
215*0Sstevel@tonic-gate 		 * to the new one
216*0Sstevel@tonic-gate 		 */
217*0Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(src), src, TO_ELEM(res),
218*0Sstevel@tonic-gate 		    clone_element) != PO_SUCCESS)
219*0Sstevel@tonic-gate 			return (PO_FAIL);
220*0Sstevel@tonic-gate 		/*
221*0Sstevel@tonic-gate 		 * Add a pointer to the src element which can be
222*0Sstevel@tonic-gate 		 * updated with a sys_id when the sys_id is allocated
223*0Sstevel@tonic-gate 		 * to the created element.
224*0Sstevel@tonic-gate 		 */
225*0Sstevel@tonic-gate 		pool_set_pair(TO_ELEM(res), src);
226*0Sstevel@tonic-gate 		*e1 = TO_ELEM(res);
227*0Sstevel@tonic-gate 		break;
228*0Sstevel@tonic-gate 	case PEC_COMP:		/* NO-OP */
229*0Sstevel@tonic-gate 		break;
230*0Sstevel@tonic-gate 	default:
231*0Sstevel@tonic-gate 		return (PO_FAIL);
232*0Sstevel@tonic-gate 	}
233*0Sstevel@tonic-gate 	return (PO_SUCCESS);
234*0Sstevel@tonic-gate }
235*0Sstevel@tonic-gate 
236*0Sstevel@tonic-gate 
237*0Sstevel@tonic-gate /*
238*0Sstevel@tonic-gate  * commit_delete() is used to delete a configuration element upon the
239*0Sstevel@tonic-gate  * system.  Since only pools and resources actually need to perform
240*0Sstevel@tonic-gate  * any action, other elements are ignored as a no-op.
241*0Sstevel@tonic-gate  */
242*0Sstevel@tonic-gate static int
243*0Sstevel@tonic-gate commit_delete(pool_elem_t *pe)
244*0Sstevel@tonic-gate {
245*0Sstevel@tonic-gate 	pool_resource_t *res;
246*0Sstevel@tonic-gate 	pool_t *pool;
247*0Sstevel@tonic-gate 	int ret = 0;
248*0Sstevel@tonic-gate 
249*0Sstevel@tonic-gate 	switch (pool_elem_class(pe)) {
250*0Sstevel@tonic-gate 	case PEC_SYSTEM:	/* NO-OP */
251*0Sstevel@tonic-gate 		break;
252*0Sstevel@tonic-gate 	case PEC_POOL:
253*0Sstevel@tonic-gate 		pool = pool_elem_pool(pe);
254*0Sstevel@tonic-gate 		ret = pool_destroy(TO_CONF(pe), pool);
255*0Sstevel@tonic-gate 		break;
256*0Sstevel@tonic-gate 	case PEC_RES_COMP:
257*0Sstevel@tonic-gate 	case PEC_RES_AGG:
258*0Sstevel@tonic-gate 		res = pool_elem_res(pe);
259*0Sstevel@tonic-gate 		ret = pool_resource_destroy(TO_CONF(pe), res);
260*0Sstevel@tonic-gate 		break;
261*0Sstevel@tonic-gate 	case PEC_COMP:		/* NO-OP */
262*0Sstevel@tonic-gate 		break;
263*0Sstevel@tonic-gate 	default:
264*0Sstevel@tonic-gate 		return (PO_FAIL);
265*0Sstevel@tonic-gate 	}
266*0Sstevel@tonic-gate 	return (ret);
267*0Sstevel@tonic-gate }
268*0Sstevel@tonic-gate 
269*0Sstevel@tonic-gate /*
270*0Sstevel@tonic-gate  * commit_update() is used to update a configuration element upon the
271*0Sstevel@tonic-gate  * system or in a static configuration file. The pass parameter
272*0Sstevel@tonic-gate  * governs whether properties are being updated or associations.  In
273*0Sstevel@tonic-gate  * pass 0, properties are updated. If the element is of class
274*0Sstevel@tonic-gate  * PEC_COMP, then make sure that the element in the static
275*0Sstevel@tonic-gate  * configuration file is correctly located before proceeding with the
276*0Sstevel@tonic-gate  * update. Then, the element in the dynamic configuration file is
277*0Sstevel@tonic-gate  * updated. In pass 1, ie. pass != 0, any pool components have their
278*0Sstevel@tonic-gate  * associations updated in the dynamic configuration.
279*0Sstevel@tonic-gate  */
280*0Sstevel@tonic-gate static int
281*0Sstevel@tonic-gate commit_update(pool_elem_t *e1, pool_elem_t *e2, int pass)
282*0Sstevel@tonic-gate {
283*0Sstevel@tonic-gate 	if (pass == 0) {
284*0Sstevel@tonic-gate 		pool_resource_t *res1;
285*0Sstevel@tonic-gate 		pool_resource_t *res2;
286*0Sstevel@tonic-gate 		if (pool_elem_class(e1) == PEC_COMP) {
287*0Sstevel@tonic-gate 			res1 = pool_get_owning_resource(TO_CONF(e1),
288*0Sstevel@tonic-gate 			    pool_elem_comp(e1));
289*0Sstevel@tonic-gate 			res2 = pool_get_owning_resource(TO_CONF(e2),
290*0Sstevel@tonic-gate 			    pool_elem_comp(e2));
291*0Sstevel@tonic-gate 			if (pool_elem_compare_name(TO_ELEM(res1),
292*0Sstevel@tonic-gate 			    TO_ELEM(res2)) != 0) {
293*0Sstevel@tonic-gate 				char *name;
294*0Sstevel@tonic-gate 				const pool_resource_t *newres;
295*0Sstevel@tonic-gate 				pool_component_t *comps[2] = { NULL };
296*0Sstevel@tonic-gate 
297*0Sstevel@tonic-gate 				comps[0] = pool_elem_comp(e2);
298*0Sstevel@tonic-gate 				name = elem_get_name(TO_ELEM(res2));
299*0Sstevel@tonic-gate 				newres = pool_get_resource(TO_CONF(e2),
300*0Sstevel@tonic-gate 				    pool_elem_class_string(TO_ELEM(res1)),
301*0Sstevel@tonic-gate 				    name);
302*0Sstevel@tonic-gate 				free(name);
303*0Sstevel@tonic-gate 				assert(newres);
304*0Sstevel@tonic-gate #ifdef DEBUG
305*0Sstevel@tonic-gate 				dprintf("transferring: res, comp\n");
306*0Sstevel@tonic-gate 				pool_elem_dprintf(TO_ELEM(newres));
307*0Sstevel@tonic-gate 				pool_elem_dprintf(e2);
308*0Sstevel@tonic-gate #endif	/* DEBUG */
309*0Sstevel@tonic-gate 				(void) pool_resource_xtransfer(TO_CONF(e2),
310*0Sstevel@tonic-gate 				    res2, (pool_resource_t *)newres, comps);
311*0Sstevel@tonic-gate 			}
312*0Sstevel@tonic-gate 		}
313*0Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(e2), e2, NULL,
314*0Sstevel@tonic-gate 		    clean_element) != PO_SUCCESS) {
315*0Sstevel@tonic-gate 			return (PO_FAIL);
316*0Sstevel@tonic-gate 		}
317*0Sstevel@tonic-gate 		/*
318*0Sstevel@tonic-gate 		 * Need to do some ordering of property updates if the
319*0Sstevel@tonic-gate 		 * element to be updated is a resource.  Compare the
320*0Sstevel@tonic-gate 		 * values of source min/max and destination
321*0Sstevel@tonic-gate 		 * min/max. If smin < dmax then update the smin first,
322*0Sstevel@tonic-gate 		 * else update the max first.
323*0Sstevel@tonic-gate 		 */
324*0Sstevel@tonic-gate 		if (pool_elem_class(e1) == PEC_RES_COMP ||
325*0Sstevel@tonic-gate 		    pool_elem_class(e1) == PEC_RES_AGG) {
326*0Sstevel@tonic-gate 			uint64_t smin, smax, dmax;
327*0Sstevel@tonic-gate 			pool_value_t val = POOL_VALUE_INITIALIZER;
328*0Sstevel@tonic-gate 
329*0Sstevel@tonic-gate 			if (resource_get_min(pool_elem_res(e1), &smin) !=
330*0Sstevel@tonic-gate 			    PO_SUCCESS ||
331*0Sstevel@tonic-gate 			    resource_get_max(pool_elem_res(e1), &smax) !=
332*0Sstevel@tonic-gate 			    PO_SUCCESS ||
333*0Sstevel@tonic-gate 			    resource_get_max(pool_elem_res(e2), &dmax) !=
334*0Sstevel@tonic-gate 			    PO_SUCCESS)
335*0Sstevel@tonic-gate 				return (PO_FAIL);
336*0Sstevel@tonic-gate 			if (smin < dmax) {
337*0Sstevel@tonic-gate 				pool_value_set_uint64(&val, smin);
338*0Sstevel@tonic-gate 				if (pool_put_ns_property(e2, c_min_prop,
339*0Sstevel@tonic-gate 				    &val) != PO_SUCCESS)
340*0Sstevel@tonic-gate 					return (PO_FAIL);
341*0Sstevel@tonic-gate 			} else {
342*0Sstevel@tonic-gate 				pool_value_set_uint64(&val, smax);
343*0Sstevel@tonic-gate 				if (pool_put_ns_property(e2, c_max_prop,
344*0Sstevel@tonic-gate 				    &val) != PO_SUCCESS)
345*0Sstevel@tonic-gate 					return (PO_FAIL);
346*0Sstevel@tonic-gate 			}
347*0Sstevel@tonic-gate 		}
348*0Sstevel@tonic-gate 		/*
349*0Sstevel@tonic-gate 		 * This next couple of steps needs some
350*0Sstevel@tonic-gate 		 * explanation. The first walk, copies all the
351*0Sstevel@tonic-gate 		 * properties that are writeable from the static
352*0Sstevel@tonic-gate 		 * configuration to the dynamic configuration. The
353*0Sstevel@tonic-gate 		 * second walk copies all properties (writeable or
354*0Sstevel@tonic-gate 		 * not) from the dynamic configuration element back to
355*0Sstevel@tonic-gate 		 * the static configuration element. This ensures that
356*0Sstevel@tonic-gate 		 * updates from the static configuration element are
357*0Sstevel@tonic-gate 		 * correctly applied to the dynamic configuration and
358*0Sstevel@tonic-gate 		 * then the static configuration element is updated
359*0Sstevel@tonic-gate 		 * with the latest values of the read-only xproperties
360*0Sstevel@tonic-gate 		 * from the dynamic configuration element. The
361*0Sstevel@tonic-gate 		 * enforcing of permisssions is performed in
362*0Sstevel@tonic-gate 		 * clone_element by its choice of property
363*0Sstevel@tonic-gate 		 * manipulation function.
364*0Sstevel@tonic-gate 		 */
365*0Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(e1), e1, e2, clone_element) !=
366*0Sstevel@tonic-gate 		    PO_SUCCESS) {
367*0Sstevel@tonic-gate 			return (PO_FAIL);
368*0Sstevel@tonic-gate 		}
369*0Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(e2), e2, e1, clone_element) !=
370*0Sstevel@tonic-gate 		    PO_SUCCESS) {
371*0Sstevel@tonic-gate 			return (PO_FAIL);
372*0Sstevel@tonic-gate 		}
373*0Sstevel@tonic-gate 	} else {
374*0Sstevel@tonic-gate 		if (pool_elem_class(e1) == PEC_POOL) {
375*0Sstevel@tonic-gate 			pool_resource_t **rs;
376*0Sstevel@tonic-gate 			uint_t nelem;
377*0Sstevel@tonic-gate 			int i;
378*0Sstevel@tonic-gate 			pool_value_t val = POOL_VALUE_INITIALIZER;
379*0Sstevel@tonic-gate 			pool_value_t *pvals[] = { NULL, NULL };
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate 			pvals[0] = &val;
382*0Sstevel@tonic-gate 			if (pool_value_set_string(&val, "pset") != PO_SUCCESS ||
383*0Sstevel@tonic-gate 			    pool_value_set_name(&val, c_type) != PO_SUCCESS)
384*0Sstevel@tonic-gate 				return (PO_FAIL);
385*0Sstevel@tonic-gate 			if ((rs = pool_query_pool_resources(TO_CONF(e1),
386*0Sstevel@tonic-gate 			    pool_elem_pool(e1), &nelem, pvals)) != NULL) {
387*0Sstevel@tonic-gate 				for (i = 0; i < nelem; i++) {
388*0Sstevel@tonic-gate 					const pool_resource_t *tgt_res;
389*0Sstevel@tonic-gate 					char *res_name =
390*0Sstevel@tonic-gate 					    elem_get_name(TO_ELEM(rs[i]));
391*0Sstevel@tonic-gate 
392*0Sstevel@tonic-gate 					if ((tgt_res = pool_get_resource(
393*0Sstevel@tonic-gate 					    TO_CONF(e2), pool_elem_class_string(
394*0Sstevel@tonic-gate 					    TO_ELEM(rs[i])), res_name)) ==
395*0Sstevel@tonic-gate 					    NULL) {
396*0Sstevel@tonic-gate 						tgt_res = get_default_resource(
397*0Sstevel@tonic-gate 						    rs[i]);
398*0Sstevel@tonic-gate 					}
399*0Sstevel@tonic-gate 					free(res_name);
400*0Sstevel@tonic-gate 					if (pool_associate(TO_CONF(e2),
401*0Sstevel@tonic-gate 					    pool_elem_pool(e2), tgt_res) !=
402*0Sstevel@tonic-gate 					    PO_SUCCESS) {
403*0Sstevel@tonic-gate 						free(rs);
404*0Sstevel@tonic-gate 						return (PO_FAIL);
405*0Sstevel@tonic-gate 					}
406*0Sstevel@tonic-gate 				}
407*0Sstevel@tonic-gate 				free(rs);
408*0Sstevel@tonic-gate 			}
409*0Sstevel@tonic-gate 		}
410*0Sstevel@tonic-gate 	}
411*0Sstevel@tonic-gate 	return (PO_SUCCESS);
412*0Sstevel@tonic-gate }
413*0Sstevel@tonic-gate 
414*0Sstevel@tonic-gate /*
415*0Sstevel@tonic-gate  * diff_and_fix() works out the differences between two configurations
416*0Sstevel@tonic-gate  * and modifies the state of the system to match the operations
417*0Sstevel@tonic-gate  * required to bring the two configurations into sync.
418*0Sstevel@tonic-gate  *
419*0Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL.
420*0Sstevel@tonic-gate  */
421*0Sstevel@tonic-gate static int
422*0Sstevel@tonic-gate diff_and_fix(pool_conf_t *stc, pool_conf_t *dyn)
423*0Sstevel@tonic-gate {
424*0Sstevel@tonic-gate 	/*
425*0Sstevel@tonic-gate 	 * The ordering of the operations is significant, we must
426*0Sstevel@tonic-gate 	 * process the system element, then the pools elements, then
427*0Sstevel@tonic-gate 	 * the resource elements, then the pools elements again and
428*0Sstevel@tonic-gate 	 * finally the resource components.
429*0Sstevel@tonic-gate 	 *
430*0Sstevel@tonic-gate 	 * TODO
431*0Sstevel@tonic-gate 	 * PEC_RES_COMP are the only type of resources
432*0Sstevel@tonic-gate 	 * currently. When PEC_RES_AGG resources are added they must
433*0Sstevel@tonic-gate 	 * also be processed.
434*0Sstevel@tonic-gate 	 */
435*0Sstevel@tonic-gate 	if (process_lists(PEC_SYSTEM, stc, dyn, 0) != PO_SUCCESS) {
436*0Sstevel@tonic-gate 		return (PO_FAIL);
437*0Sstevel@tonic-gate 	}
438*0Sstevel@tonic-gate 	if (process_lists(PEC_POOL, stc, dyn, 0) != PO_SUCCESS) {
439*0Sstevel@tonic-gate 		return (PO_FAIL);
440*0Sstevel@tonic-gate 	}
441*0Sstevel@tonic-gate 	if (process_lists(PEC_RES_COMP, stc, dyn, 0) != PO_SUCCESS) {
442*0Sstevel@tonic-gate 		return (PO_FAIL);
443*0Sstevel@tonic-gate 	}
444*0Sstevel@tonic-gate 	if (process_lists(PEC_COMP, stc, dyn, 0) != PO_SUCCESS) {
445*0Sstevel@tonic-gate 		return (PO_FAIL);
446*0Sstevel@tonic-gate 	}
447*0Sstevel@tonic-gate 	if (process_lists(PEC_POOL, stc, dyn, 1) != PO_SUCCESS) {
448*0Sstevel@tonic-gate 		return (PO_FAIL);
449*0Sstevel@tonic-gate 	}
450*0Sstevel@tonic-gate 	/*
451*0Sstevel@tonic-gate 	 * Share the resources. It has to be called for both
452*0Sstevel@tonic-gate 	 * configurations to ensure that the configurations still look
453*0Sstevel@tonic-gate 	 * the same.
454*0Sstevel@tonic-gate 	 */
455*0Sstevel@tonic-gate 	if (share_resources(dyn) != PO_SUCCESS) {
456*0Sstevel@tonic-gate 		return (PO_FAIL);
457*0Sstevel@tonic-gate 	}
458*0Sstevel@tonic-gate 	if (share_resources(stc) != PO_SUCCESS) {
459*0Sstevel@tonic-gate 		return (PO_FAIL);
460*0Sstevel@tonic-gate 	}
461*0Sstevel@tonic-gate 	return (PO_SUCCESS);
462*0Sstevel@tonic-gate }
463*0Sstevel@tonic-gate 
464*0Sstevel@tonic-gate static int
465*0Sstevel@tonic-gate process_elem_lt(pool_elem_t *pe, pool_conf_t *dyn)
466*0Sstevel@tonic-gate {
467*0Sstevel@tonic-gate 	if (pool_elem_class(pe) == PEC_COMP) {
468*0Sstevel@tonic-gate 		if (pool_component_destroy(pool_elem_comp(pe)) == PO_FAIL) {
469*0Sstevel@tonic-gate 			return (PO_FAIL);
470*0Sstevel@tonic-gate 		}
471*0Sstevel@tonic-gate 	} else if (! elem_is_default(pe)) {
472*0Sstevel@tonic-gate 		if (commit_create(dyn, &pe) != PO_SUCCESS) {
473*0Sstevel@tonic-gate 			return (PO_FAIL);
474*0Sstevel@tonic-gate 		}
475*0Sstevel@tonic-gate 	}
476*0Sstevel@tonic-gate 	return (PO_SUCCESS);
477*0Sstevel@tonic-gate }
478*0Sstevel@tonic-gate 
479*0Sstevel@tonic-gate static int
480*0Sstevel@tonic-gate process_elem_gt(pool_elem_t *pe, pool_conf_t *stc, pool_conf_t *dyn)
481*0Sstevel@tonic-gate {
482*0Sstevel@tonic-gate 	if (pool_elem_class(pe) == PEC_COMP) {
483*0Sstevel@tonic-gate 		pool_resource_t *owner;
484*0Sstevel@tonic-gate 		const pool_resource_t *parent_res;
485*0Sstevel@tonic-gate 		pool_value_t val = POOL_VALUE_INITIALIZER;
486*0Sstevel@tonic-gate 		const pool_component_t *newcomp;
487*0Sstevel@tonic-gate 		const char *resname;
488*0Sstevel@tonic-gate 		const char *restype;
489*0Sstevel@tonic-gate 		/*
490*0Sstevel@tonic-gate 		 * I have to find the right parent in the static
491*0Sstevel@tonic-gate 		 * configuration. It may not exist, in which case it's
492*0Sstevel@tonic-gate 		 * correct to put it in the default
493*0Sstevel@tonic-gate 		 */
494*0Sstevel@tonic-gate 		owner = pool_get_owning_resource(dyn,
495*0Sstevel@tonic-gate 		    pool_elem_comp(pe));
496*0Sstevel@tonic-gate 		if (pool_get_ns_property(TO_ELEM(owner), "name", &val) ==
497*0Sstevel@tonic-gate 		    POC_INVAL)
498*0Sstevel@tonic-gate 			return (PO_FAIL);
499*0Sstevel@tonic-gate 
500*0Sstevel@tonic-gate 		if (pool_value_get_string(&val, &resname) == PO_FAIL)
501*0Sstevel@tonic-gate 			return (PO_FAIL);
502*0Sstevel@tonic-gate 
503*0Sstevel@tonic-gate 		if ((resname = strdup(resname)) == NULL)
504*0Sstevel@tonic-gate 			return (PO_FAIL);
505*0Sstevel@tonic-gate 
506*0Sstevel@tonic-gate 		restype = pool_elem_class_string(TO_ELEM(owner));
507*0Sstevel@tonic-gate 		parent_res = pool_get_resource(stc, restype, resname);
508*0Sstevel@tonic-gate 		free((void *)resname);
509*0Sstevel@tonic-gate 		if (parent_res == NULL)
510*0Sstevel@tonic-gate 			parent_res = resource_by_sysid(stc, PS_NONE, restype);
511*0Sstevel@tonic-gate 		/*
512*0Sstevel@tonic-gate 		 * Now need to make a copy of the component in the
513*0Sstevel@tonic-gate 		 * dynamic configuration in the static configuration.
514*0Sstevel@tonic-gate 		 */
515*0Sstevel@tonic-gate 		if ((newcomp = pool_component_create(stc, parent_res,
516*0Sstevel@tonic-gate 		    elem_get_sysid(pe))) == NULL)
517*0Sstevel@tonic-gate 			return (PO_FAIL);
518*0Sstevel@tonic-gate 
519*0Sstevel@tonic-gate 		if (pool_walk_properties(TO_CONF(pe), pe, TO_ELEM(newcomp),
520*0Sstevel@tonic-gate 		    clone_element) != PO_SUCCESS)
521*0Sstevel@tonic-gate 			return (PO_FAIL);
522*0Sstevel@tonic-gate 	} else if (elem_is_default(pe)) {
523*0Sstevel@tonic-gate 		pool_resource_t *newres;
524*0Sstevel@tonic-gate 		pool_t *newpool;
525*0Sstevel@tonic-gate 		char *name;
526*0Sstevel@tonic-gate 
527*0Sstevel@tonic-gate 		if ((name = elem_get_name(pe)) == NULL)
528*0Sstevel@tonic-gate 			return (PO_FAIL);
529*0Sstevel@tonic-gate 		switch (pool_elem_class(pe)) {
530*0Sstevel@tonic-gate 		case PEC_POOL:
531*0Sstevel@tonic-gate 			if ((newpool = pool_create(stc, name)) == NULL) {
532*0Sstevel@tonic-gate 				free(name);
533*0Sstevel@tonic-gate 				return (PO_FAIL);
534*0Sstevel@tonic-gate 			}
535*0Sstevel@tonic-gate 			free(name);
536*0Sstevel@tonic-gate 			if (pool_walk_properties(TO_CONF(pe), pe,
537*0Sstevel@tonic-gate 			    TO_ELEM(newpool), clone_element) != PO_SUCCESS)
538*0Sstevel@tonic-gate 				return (PO_FAIL);
539*0Sstevel@tonic-gate 			break;
540*0Sstevel@tonic-gate 		case PEC_RES_AGG:
541*0Sstevel@tonic-gate 		case PEC_RES_COMP:
542*0Sstevel@tonic-gate 			if ((newres = pool_resource_create(stc,
543*0Sstevel@tonic-gate 			    pool_elem_class_string(pe), name)) ==
544*0Sstevel@tonic-gate 			    NULL) {
545*0Sstevel@tonic-gate 				free(name);
546*0Sstevel@tonic-gate 				return (PO_FAIL);
547*0Sstevel@tonic-gate 			}
548*0Sstevel@tonic-gate 			free(name);
549*0Sstevel@tonic-gate 			if (pool_walk_properties(TO_CONF(pe), pe,
550*0Sstevel@tonic-gate 			    TO_ELEM(newres), clone_element) != PO_SUCCESS)
551*0Sstevel@tonic-gate 				return (PO_FAIL);
552*0Sstevel@tonic-gate 			break;
553*0Sstevel@tonic-gate 		default:
554*0Sstevel@tonic-gate 			free(name);
555*0Sstevel@tonic-gate 			break;
556*0Sstevel@tonic-gate 		}
557*0Sstevel@tonic-gate 	} else {
558*0Sstevel@tonic-gate 		if (commit_delete(pe) != PO_SUCCESS)
559*0Sstevel@tonic-gate 			return (PO_FAIL);
560*0Sstevel@tonic-gate 	}
561*0Sstevel@tonic-gate 	return (PO_SUCCESS);
562*0Sstevel@tonic-gate }
563*0Sstevel@tonic-gate 
564*0Sstevel@tonic-gate /*
565*0Sstevel@tonic-gate  * This function compares the elements of the supplied type in the
566*0Sstevel@tonic-gate  * static and dynamic configurations supplied. The lists of elements
567*0Sstevel@tonic-gate  * are compared and used to create, delete and updated elements in
568*0Sstevel@tonic-gate  * both the static and dynamic configurations. The pass parameter is
569*0Sstevel@tonic-gate  * used to indicate to commit_update() whether property updates or
570*0Sstevel@tonic-gate  * association updates should be performed.
571*0Sstevel@tonic-gate  */
572*0Sstevel@tonic-gate static int
573*0Sstevel@tonic-gate process_lists(int type, pool_conf_t *stc, pool_conf_t *dyn, int pass)
574*0Sstevel@tonic-gate {
575*0Sstevel@tonic-gate 	uint_t stc_nelem = 0, dyn_nelem = 0;
576*0Sstevel@tonic-gate 	pool_elem_t **stc_elems, **dyn_elems;
577*0Sstevel@tonic-gate 	int i, j;
578*0Sstevel@tonic-gate 	int status = PO_SUCCESS;
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 	if ((stc_elems = get_elem_list(stc, type, &stc_nelem)) == NULL)
581*0Sstevel@tonic-gate 		return (PO_FAIL);
582*0Sstevel@tonic-gate 
583*0Sstevel@tonic-gate 	qsort(stc_elems, stc_nelem, sizeof (pool_elem_t *),
584*0Sstevel@tonic-gate 	    qsort_elem_compare);
585*0Sstevel@tonic-gate 
586*0Sstevel@tonic-gate 	if ((dyn_elems = get_elem_list(dyn, type, &dyn_nelem)) == NULL) {
587*0Sstevel@tonic-gate 		free(stc_elems);
588*0Sstevel@tonic-gate 		return (PO_FAIL);
589*0Sstevel@tonic-gate 	}
590*0Sstevel@tonic-gate 
591*0Sstevel@tonic-gate 	qsort(dyn_elems, dyn_nelem, sizeof (pool_elem_t *),
592*0Sstevel@tonic-gate 	    qsort_elem_compare);
593*0Sstevel@tonic-gate 	/*
594*0Sstevel@tonic-gate 	 * Step through and do the updating, remember that we are
595*0Sstevel@tonic-gate 	 * comparing using the compare function for the configuration
596*0Sstevel@tonic-gate 	 * and that is fixed.
597*0Sstevel@tonic-gate 	 */
598*0Sstevel@tonic-gate 	i = j = 0;
599*0Sstevel@tonic-gate 	while (status == PO_SUCCESS && i < stc_nelem && j < dyn_nelem) {
600*0Sstevel@tonic-gate 		int compare;
601*0Sstevel@tonic-gate 		/*
602*0Sstevel@tonic-gate 		 * We are going to do this by stepping through the static
603*0Sstevel@tonic-gate 		 * list first.
604*0Sstevel@tonic-gate 		 */
605*0Sstevel@tonic-gate 		if (elem_is_default(stc_elems[i]) &&
606*0Sstevel@tonic-gate 		    elem_is_default(dyn_elems[j]))
607*0Sstevel@tonic-gate 			compare = 0;
608*0Sstevel@tonic-gate 		else
609*0Sstevel@tonic-gate 			compare = pool_elem_compare_name(stc_elems[i],
610*0Sstevel@tonic-gate 			    dyn_elems[j]);
611*0Sstevel@tonic-gate 		if (compare < 0) {
612*0Sstevel@tonic-gate 			status = process_elem_lt(stc_elems[i], dyn);
613*0Sstevel@tonic-gate 			i++;
614*0Sstevel@tonic-gate 		} else if (compare > 0) {
615*0Sstevel@tonic-gate 			status = process_elem_gt(dyn_elems[j], stc, dyn);
616*0Sstevel@tonic-gate 			j++;
617*0Sstevel@tonic-gate 		} else {	/* compare == 0 */
618*0Sstevel@tonic-gate 			if (commit_update(stc_elems[i], dyn_elems[j], pass)
619*0Sstevel@tonic-gate 			    != PO_SUCCESS) {
620*0Sstevel@tonic-gate 				status = PO_FAIL;
621*0Sstevel@tonic-gate 			}
622*0Sstevel@tonic-gate 			i++;
623*0Sstevel@tonic-gate 			j++;
624*0Sstevel@tonic-gate 		}
625*0Sstevel@tonic-gate 	}
626*0Sstevel@tonic-gate 	if (status == PO_FAIL) {
627*0Sstevel@tonic-gate 		free(stc_elems);
628*0Sstevel@tonic-gate 		free(dyn_elems);
629*0Sstevel@tonic-gate 		return (PO_FAIL);
630*0Sstevel@tonic-gate 	}
631*0Sstevel@tonic-gate 	while (status == PO_SUCCESS && i < stc_nelem) {
632*0Sstevel@tonic-gate 		status = process_elem_lt(stc_elems[i], dyn);
633*0Sstevel@tonic-gate 		i++;
634*0Sstevel@tonic-gate 	}
635*0Sstevel@tonic-gate 	if (status == PO_FAIL) {
636*0Sstevel@tonic-gate 		free(stc_elems);
637*0Sstevel@tonic-gate 		free(dyn_elems);
638*0Sstevel@tonic-gate 		return (PO_FAIL);
639*0Sstevel@tonic-gate 	}
640*0Sstevel@tonic-gate 	while (status == PO_SUCCESS && j < dyn_nelem) {
641*0Sstevel@tonic-gate 		status = process_elem_gt(dyn_elems[j], stc, dyn);
642*0Sstevel@tonic-gate 		j++;
643*0Sstevel@tonic-gate 	}
644*0Sstevel@tonic-gate 	free(stc_elems);
645*0Sstevel@tonic-gate 	free(dyn_elems);
646*0Sstevel@tonic-gate 	return (status);
647*0Sstevel@tonic-gate }
648*0Sstevel@tonic-gate 
649*0Sstevel@tonic-gate /*
650*0Sstevel@tonic-gate  * get_elem_list() returns a list of pool_elem_t's. The size of the
651*0Sstevel@tonic-gate  * list is written into nelem. The list contains elements of all types
652*0Sstevel@tonic-gate  * that pools is interested in: i.e. system, pool, resources and
653*0Sstevel@tonic-gate  * resource components. It is the caller's responsibility to free the
654*0Sstevel@tonic-gate  * list when it is finished with.
655*0Sstevel@tonic-gate  *
656*0Sstevel@tonic-gate  * The array of pointers returned by the type specific query can be
657*0Sstevel@tonic-gate  * safely cast to be an array of pool_elem_t pointers. In the case of
658*0Sstevel@tonic-gate  * PEC_RES_COMP some additional processing is required to qualify the
659*0Sstevel@tonic-gate  * list of elements.
660*0Sstevel@tonic-gate  *
661*0Sstevel@tonic-gate  * Returns a pointer to a list of pool_elem_t's or NULL on failure.
662*0Sstevel@tonic-gate  */
663*0Sstevel@tonic-gate static pool_elem_t **
664*0Sstevel@tonic-gate get_elem_list(const pool_conf_t *conf, int type, uint_t *nelem)
665*0Sstevel@tonic-gate {
666*0Sstevel@tonic-gate 	pool_resource_t **rl;
667*0Sstevel@tonic-gate 	pool_t **pl;
668*0Sstevel@tonic-gate 	pool_component_t **cl;
669*0Sstevel@tonic-gate 	pool_elem_t **elems = NULL;
670*0Sstevel@tonic-gate 	int i;
671*0Sstevel@tonic-gate 
672*0Sstevel@tonic-gate 	switch (type) {
673*0Sstevel@tonic-gate 	case PEC_SYSTEM:
674*0Sstevel@tonic-gate 		if ((elems = malloc(sizeof (pool_elem_t *))) == NULL)
675*0Sstevel@tonic-gate 			return (NULL);
676*0Sstevel@tonic-gate 		*nelem = 1;
677*0Sstevel@tonic-gate 		elems[0] = pool_conf_to_elem(conf);
678*0Sstevel@tonic-gate 		break;
679*0Sstevel@tonic-gate 	case PEC_POOL:
680*0Sstevel@tonic-gate 		if ((pl = pool_query_pools(conf, nelem, NULL)) != NULL) {
681*0Sstevel@tonic-gate 			elems = (pool_elem_t **)pl;
682*0Sstevel@tonic-gate 		}
683*0Sstevel@tonic-gate 		break;
684*0Sstevel@tonic-gate 	case PEC_RES_COMP:
685*0Sstevel@tonic-gate 		if ((rl = pool_query_resources(conf, nelem, NULL)) != NULL) {
686*0Sstevel@tonic-gate 			int j = 0;
687*0Sstevel@tonic-gate 			elems = (pool_elem_t **)rl;
688*0Sstevel@tonic-gate 			for (i = 0; i < *nelem; i++) {
689*0Sstevel@tonic-gate 				if (pool_elem_class(TO_ELEM(rl[i])) ==
690*0Sstevel@tonic-gate 				    PEC_RES_COMP)
691*0Sstevel@tonic-gate 					elems[j++] = TO_ELEM(rl[i]);
692*0Sstevel@tonic-gate 			}
693*0Sstevel@tonic-gate 			*nelem = j;
694*0Sstevel@tonic-gate 		}
695*0Sstevel@tonic-gate 		break;
696*0Sstevel@tonic-gate 	case PEC_COMP:
697*0Sstevel@tonic-gate 		if ((cl = pool_query_components(conf, nelem, NULL)) != NULL) {
698*0Sstevel@tonic-gate 			elems = (pool_elem_t **)cl;
699*0Sstevel@tonic-gate 		}
700*0Sstevel@tonic-gate 		break;
701*0Sstevel@tonic-gate 	default:
702*0Sstevel@tonic-gate 		abort();
703*0Sstevel@tonic-gate 		break;
704*0Sstevel@tonic-gate 	}
705*0Sstevel@tonic-gate 	return (elems);
706*0Sstevel@tonic-gate }
707*0Sstevel@tonic-gate 
708*0Sstevel@tonic-gate /*
709*0Sstevel@tonic-gate  * share_resources() sets up the allocation of resources by each
710*0Sstevel@tonic-gate  * provider.  Firstly all resources are updated with the importance of
711*0Sstevel@tonic-gate  * each pool, then each resource provider is invoked in turn with a
712*0Sstevel@tonic-gate  * list of it's own resources.  Finally, the pool importance details
713*0Sstevel@tonic-gate  * are removed from the resources.
714*0Sstevel@tonic-gate  *
715*0Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
716*0Sstevel@tonic-gate  */
717*0Sstevel@tonic-gate static int
718*0Sstevel@tonic-gate share_resources(pool_conf_t *conf)
719*0Sstevel@tonic-gate {
720*0Sstevel@tonic-gate 	pool_resource_t **resources;
721*0Sstevel@tonic-gate 	uint_t nelem;
722*0Sstevel@tonic-gate 	pool_value_t *props[] = { NULL, NULL };
723*0Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
724*0Sstevel@tonic-gate 
725*0Sstevel@tonic-gate 	props[0] = &val;
726*0Sstevel@tonic-gate 
727*0Sstevel@tonic-gate 	/*
728*0Sstevel@tonic-gate 	 * Call an allocation function for each type of supported resource.
729*0Sstevel@tonic-gate 	 * This function is responsible for "sharing" resources to resource
730*0Sstevel@tonic-gate 	 * sets as determined by the system.allocate-method.
731*0Sstevel@tonic-gate 	 */
732*0Sstevel@tonic-gate 
733*0Sstevel@tonic-gate 	if (pool_value_set_string(props[0], "pset") != PO_SUCCESS ||
734*0Sstevel@tonic-gate 	    pool_value_set_name(props[0], c_type) != PO_SUCCESS)
735*0Sstevel@tonic-gate 		return (PO_FAIL);
736*0Sstevel@tonic-gate 
737*0Sstevel@tonic-gate 	if (add_importance_props(conf) != PO_SUCCESS) {
738*0Sstevel@tonic-gate 		(void) remove_importance_props(conf);
739*0Sstevel@tonic-gate 		return (PO_FAIL);
740*0Sstevel@tonic-gate 	}
741*0Sstevel@tonic-gate 
742*0Sstevel@tonic-gate 	if ((resources = pool_query_resources(conf, &nelem, props)) != NULL) {
743*0Sstevel@tonic-gate 		/*
744*0Sstevel@tonic-gate 		 * 'pool.importance' defines the importance of a pool;
745*0Sstevel@tonic-gate 		 * resources inherit the importance of the pool that
746*0Sstevel@tonic-gate 		 * is associated with them. If more than one pool is
747*0Sstevel@tonic-gate 		 * associated with a resource, the importance of the
748*0Sstevel@tonic-gate 		 * resource is the maximum importance of all
749*0Sstevel@tonic-gate 		 * associated pools.  Use '_importance' on resources
750*0Sstevel@tonic-gate 		 * to determine who gets extra.
751*0Sstevel@tonic-gate 		 */
752*0Sstevel@tonic-gate 		if (resource_allocate("pset", resources, nelem) != PO_SUCCESS) {
753*0Sstevel@tonic-gate 			free(resources);
754*0Sstevel@tonic-gate 			(void) remove_importance_props(conf);
755*0Sstevel@tonic-gate 			return (PO_FAIL);
756*0Sstevel@tonic-gate 		}
757*0Sstevel@tonic-gate 	}
758*0Sstevel@tonic-gate 	free(resources);
759*0Sstevel@tonic-gate 	(void) remove_importance_props(conf);
760*0Sstevel@tonic-gate 	return (PO_SUCCESS);
761*0Sstevel@tonic-gate }
762*0Sstevel@tonic-gate 
763*0Sstevel@tonic-gate 
764*0Sstevel@tonic-gate /*
765*0Sstevel@tonic-gate  * Work out which allocation method to use based on the value of the
766*0Sstevel@tonic-gate  * system.allocate-method property.
767*0Sstevel@tonic-gate  */
768*0Sstevel@tonic-gate int
769*0Sstevel@tonic-gate resource_allocate(const char *type, pool_resource_t **res, uint_t nelem)
770*0Sstevel@tonic-gate {
771*0Sstevel@tonic-gate 	pool_elem_t *pe;
772*0Sstevel@tonic-gate 	const char *method_name;
773*0Sstevel@tonic-gate 	uint64_t method;
774*0Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
775*0Sstevel@tonic-gate 	int ret;
776*0Sstevel@tonic-gate 
777*0Sstevel@tonic-gate 	pe = pool_conf_to_elem(TO_CONF(TO_ELEM(res[0])));
778*0Sstevel@tonic-gate 
779*0Sstevel@tonic-gate 	if (pool_get_ns_property(pe, "allocate-method", &val) != POC_STRING)
780*0Sstevel@tonic-gate 		method_name = POA_IMPORTANCE;
781*0Sstevel@tonic-gate 	else {
782*0Sstevel@tonic-gate 		(void) pool_value_get_string(&val, &method_name);
783*0Sstevel@tonic-gate 	}
784*0Sstevel@tonic-gate 	if (strcmp(POA_IMPORTANCE, method_name) != 0) {
785*0Sstevel@tonic-gate 		if (strcmp(POA_SURPLUS_TO_DEFAULT, method_name) != 0) {
786*0Sstevel@tonic-gate 			pool_seterror(POE_INVALID_CONF);
787*0Sstevel@tonic-gate 			return (PO_FAIL);
788*0Sstevel@tonic-gate 		} else {
789*0Sstevel@tonic-gate 			method = POA_SURPLUS_TO_DEFAULT_NUM;
790*0Sstevel@tonic-gate 		}
791*0Sstevel@tonic-gate 	} else {
792*0Sstevel@tonic-gate 		method = POA_IMPORTANCE_NUM;
793*0Sstevel@tonic-gate 	}
794*0Sstevel@tonic-gate 	switch (method) {
795*0Sstevel@tonic-gate 	case POA_IMPORTANCE_NUM:
796*0Sstevel@tonic-gate 		/*
797*0Sstevel@tonic-gate 		 * TODO: Add support for new resource types
798*0Sstevel@tonic-gate 		 */
799*0Sstevel@tonic-gate 		switch (pool_resource_elem_class_from_string(type)) {
800*0Sstevel@tonic-gate 		case PREC_PSET:
801*0Sstevel@tonic-gate 			ret = pset_allocate_imp(res, nelem);
802*0Sstevel@tonic-gate 			break;
803*0Sstevel@tonic-gate 		default:
804*0Sstevel@tonic-gate 			ret = PO_FAIL;
805*0Sstevel@tonic-gate 			break;
806*0Sstevel@tonic-gate 		}
807*0Sstevel@tonic-gate 		break;
808*0Sstevel@tonic-gate 	case POA_SURPLUS_TO_DEFAULT_NUM:
809*0Sstevel@tonic-gate 		ret = resource_allocate_default(res, nelem);
810*0Sstevel@tonic-gate 		break;
811*0Sstevel@tonic-gate 	}
812*0Sstevel@tonic-gate 
813*0Sstevel@tonic-gate 	return (ret);
814*0Sstevel@tonic-gate }
815*0Sstevel@tonic-gate 
816*0Sstevel@tonic-gate /*
817*0Sstevel@tonic-gate  * Each set will get its minimum, however if there is more than the
818*0Sstevel@tonic-gate  * total minimum available, then leave this in the default set.
819*0Sstevel@tonic-gate  */
820*0Sstevel@tonic-gate int
821*0Sstevel@tonic-gate resource_allocate_default(pool_resource_t **res, uint_t nelem)
822*0Sstevel@tonic-gate {
823*0Sstevel@tonic-gate 	res_info_t *res_info;
824*0Sstevel@tonic-gate 	uint_t j;
825*0Sstevel@tonic-gate 	pool_resource_t *default_res = NULL;
826*0Sstevel@tonic-gate 
827*0Sstevel@tonic-gate 	if (nelem == 1)
828*0Sstevel@tonic-gate 		return (PO_SUCCESS);
829*0Sstevel@tonic-gate 
830*0Sstevel@tonic-gate 	if ((res_info = calloc(nelem, sizeof (res_info_t))) == NULL) {
831*0Sstevel@tonic-gate 		return (PO_FAIL);
832*0Sstevel@tonic-gate 	}
833*0Sstevel@tonic-gate 
834*0Sstevel@tonic-gate 	/* Load current resource values. */
835*0Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
836*0Sstevel@tonic-gate 
837*0Sstevel@tonic-gate 		if (default_res == NULL &&
838*0Sstevel@tonic-gate 		    resource_is_default(res[j]) == PO_TRUE)
839*0Sstevel@tonic-gate 			default_res = res[j];
840*0Sstevel@tonic-gate 
841*0Sstevel@tonic-gate 		if (resource_get_max(res[j],
842*0Sstevel@tonic-gate 		    &res_info[j].ri_max) == PO_FAIL ||
843*0Sstevel@tonic-gate 		    resource_get_min(res[j],
844*0Sstevel@tonic-gate 			&res_info[j].ri_min) == PO_FAIL ||
845*0Sstevel@tonic-gate 		    resource_get_size(res[j],
846*0Sstevel@tonic-gate 			&res_info[j].ri_oldsize) == PO_FAIL ||
847*0Sstevel@tonic-gate 		    resource_get_pinned(res[j],
848*0Sstevel@tonic-gate 			&res_info[j].ri_pinned) == PO_FAIL) {
849*0Sstevel@tonic-gate 			free(res_info);
850*0Sstevel@tonic-gate 			return (PO_FAIL);
851*0Sstevel@tonic-gate 		}
852*0Sstevel@tonic-gate 		res_info[j].ri_res = res[j];
853*0Sstevel@tonic-gate 	}
854*0Sstevel@tonic-gate 
855*0Sstevel@tonic-gate 	/*
856*0Sstevel@tonic-gate 	 * Firstly, for all resources that have size greater than min,
857*0Sstevel@tonic-gate 	 * transfer all movable size above min to the default resource.
858*0Sstevel@tonic-gate 	 */
859*0Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
860*0Sstevel@tonic-gate 
861*0Sstevel@tonic-gate 		uint64_t real_min;
862*0Sstevel@tonic-gate 
863*0Sstevel@tonic-gate 		/* compute the real minimum number of resources */
864*0Sstevel@tonic-gate 		real_min = MAX(res_info[j].ri_pinned, res_info[j].ri_min);
865*0Sstevel@tonic-gate 		if (res_info[j].ri_res != default_res &&
866*0Sstevel@tonic-gate 		    res_info[j].ri_oldsize > real_min) {
867*0Sstevel@tonic-gate 
868*0Sstevel@tonic-gate 			uint64_t num;
869*0Sstevel@tonic-gate 
870*0Sstevel@tonic-gate 			num = res_info[j].ri_oldsize - real_min;
871*0Sstevel@tonic-gate 			if (pool_resource_transfer(
872*0Sstevel@tonic-gate 			    TO_CONF(TO_ELEM(default_res)),
873*0Sstevel@tonic-gate 				res_info[j].ri_res, default_res, num) !=
874*0Sstevel@tonic-gate 			    PO_SUCCESS) {
875*0Sstevel@tonic-gate 				free(res_info);
876*0Sstevel@tonic-gate 				return (PO_FAIL);
877*0Sstevel@tonic-gate 			}
878*0Sstevel@tonic-gate 		}
879*0Sstevel@tonic-gate 	}
880*0Sstevel@tonic-gate 	/*
881*0Sstevel@tonic-gate 	 * Now, transfer resources below min from the default.
882*0Sstevel@tonic-gate 	 */
883*0Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
884*0Sstevel@tonic-gate 		/*
885*0Sstevel@tonic-gate 		 * We don't want to interfere with resources which are reserved
886*0Sstevel@tonic-gate 		 */
887*0Sstevel@tonic-gate 		if (res_info[j].ri_res != default_res &&
888*0Sstevel@tonic-gate 		    res_info[j].ri_oldsize < res_info[j].ri_min) {
889*0Sstevel@tonic-gate 			if (pool_resource_transfer(
890*0Sstevel@tonic-gate 			    TO_CONF(TO_ELEM(default_res)),
891*0Sstevel@tonic-gate 			    default_res, res_info[j].ri_res,
892*0Sstevel@tonic-gate 			    res_info[j].ri_min - res_info[j].ri_oldsize) !=
893*0Sstevel@tonic-gate 			    PO_SUCCESS) {
894*0Sstevel@tonic-gate 				free(res_info);
895*0Sstevel@tonic-gate 				return (PO_FAIL);
896*0Sstevel@tonic-gate 			}
897*0Sstevel@tonic-gate 		}
898*0Sstevel@tonic-gate 	}
899*0Sstevel@tonic-gate 	free(res_info);
900*0Sstevel@tonic-gate 	return (PO_SUCCESS);
901*0Sstevel@tonic-gate }
902*0Sstevel@tonic-gate 
903*0Sstevel@tonic-gate /*
904*0Sstevel@tonic-gate  * Allocate cpus to pset resource sets, favoring sets with higher importance.
905*0Sstevel@tonic-gate  *
906*0Sstevel@tonic-gate  * Step 1: Sort resource sets by decreasing importance, and load each sets
907*0Sstevel@tonic-gate  *	   current size (oldsize), min, max, and number of pinned cpus.
908*0Sstevel@tonic-gate  *	   Compute the total number of cpus by totaling oldsize.
909*0Sstevel@tonic-gate  *
910*0Sstevel@tonic-gate  * Step 2: Compute the newsize for each set:
911*0Sstevel@tonic-gate  *
912*0Sstevel@tonic-gate  * 	Give each set its min number of cpus.  This min may be greater than
913*0Sstevel@tonic-gate  *	its pset.min due to pinned cpus. If there are more cpus than the total
914*0Sstevel@tonic-gate  *	of all mins, then the surplus cpus are dealt round-robin to all sets
915*0Sstevel@tonic-gate  *	(up to their max) in order of decreasing importance.  A set may be
916*0Sstevel@tonic-gate  *	skipped during dealing because it started with more than its min due to
917*0Sstevel@tonic-gate  * 	pinned cpus.  The dealing stops when there are no more cpus or all
918*0Sstevel@tonic-gate  *	sets are at their max. If all sets are at their max, any remaining cpus
919*0Sstevel@tonic-gate  *	are given to the default set.
920*0Sstevel@tonic-gate  *
921*0Sstevel@tonic-gate  * Step 3: Transfer cpus from sets with (oldsize > newsize) to sets with
922*0Sstevel@tonic-gate  *	   (oldsize < newsize).
923*0Sstevel@tonic-gate  */
924*0Sstevel@tonic-gate int
925*0Sstevel@tonic-gate pset_allocate_imp(pool_resource_t **res, uint_t nelem)
926*0Sstevel@tonic-gate {
927*0Sstevel@tonic-gate 	res_info_t *res_info;
928*0Sstevel@tonic-gate 	res_info_t *default_res_info;
929*0Sstevel@tonic-gate 	const pool_resource_t *default_res = NULL;
930*0Sstevel@tonic-gate 	uint64_t tot_resources = 0;	/* total count of resources */
931*0Sstevel@tonic-gate 	uint64_t tot_min = 0;		/* total of all resource set mins */
932*0Sstevel@tonic-gate 	uint64_t num_to_deal = 0;	/* total resources above mins to deal */
933*0Sstevel@tonic-gate 	uint64_t sets_maxed = 0;	/* number of resource sets dealt to  */
934*0Sstevel@tonic-gate 					/* their max */
935*0Sstevel@tonic-gate 	uint64_t sets_finished = 0;	/* number of resource sets that have */
936*0Sstevel@tonic-gate 					/* size == newsize */
937*0Sstevel@tonic-gate 	int donor, receiver;
938*0Sstevel@tonic-gate 	int deal;
939*0Sstevel@tonic-gate 	int j;
940*0Sstevel@tonic-gate 	int ret = PO_SUCCESS;
941*0Sstevel@tonic-gate 
942*0Sstevel@tonic-gate 	/*
943*0Sstevel@tonic-gate 	 * Build list of res_info_t's
944*0Sstevel@tonic-gate 	 */
945*0Sstevel@tonic-gate 	if ((res_info = calloc(nelem, sizeof (res_info_t))) == NULL) {
946*0Sstevel@tonic-gate 		pool_seterror(POE_SYSTEM);
947*0Sstevel@tonic-gate 		return (PO_FAIL);
948*0Sstevel@tonic-gate 	}
949*0Sstevel@tonic-gate 
950*0Sstevel@tonic-gate 	/* Order resources by importance, most important being first */
951*0Sstevel@tonic-gate 	qsort(res, nelem, sizeof (pool_resource_t *),
952*0Sstevel@tonic-gate 	    resource_compare_by_descending_importance);
953*0Sstevel@tonic-gate 
954*0Sstevel@tonic-gate 	for (j = 0; j < nelem; j++) {
955*0Sstevel@tonic-gate 
956*0Sstevel@tonic-gate 		/* Track which resource is the default */
957*0Sstevel@tonic-gate 		if (default_res == NULL &&
958*0Sstevel@tonic-gate 		    resource_is_default(res[j]) == PO_TRUE) {
959*0Sstevel@tonic-gate 			default_res = res[j];
960*0Sstevel@tonic-gate 			default_res_info = &(res_info[j]);
961*0Sstevel@tonic-gate 		}
962*0Sstevel@tonic-gate 
963*0Sstevel@tonic-gate 		/* Load sets' current values */
964*0Sstevel@tonic-gate 		if (resource_get_max(res[j], &res_info[j].ri_max) == PO_FAIL ||
965*0Sstevel@tonic-gate 		    resource_get_min(res[j], &res_info[j].ri_min) == PO_FAIL ||
966*0Sstevel@tonic-gate 		    resource_get_size(res[j], &res_info[j].ri_oldsize) ==
967*0Sstevel@tonic-gate 		    PO_FAIL ||
968*0Sstevel@tonic-gate 		    resource_get_pinned(res[j],
969*0Sstevel@tonic-gate 		    &res_info[j].ri_pinned) == PO_FAIL) {
970*0Sstevel@tonic-gate 			free(res_info);
971*0Sstevel@tonic-gate 			return (PO_FAIL);
972*0Sstevel@tonic-gate 		}
973*0Sstevel@tonic-gate 
974*0Sstevel@tonic-gate 		/* Start each set's newsize out at their min. */
975*0Sstevel@tonic-gate 		res_info[j].ri_newsize = res_info[j].ri_min;
976*0Sstevel@tonic-gate 
977*0Sstevel@tonic-gate 		/* pre-deal pinned resources that exceed min */
978*0Sstevel@tonic-gate 		if (res_info[j].ri_pinned > res_info[j].ri_min) {
979*0Sstevel@tonic-gate 			res_info[j].ri_newsize = res_info[j].ri_pinned;
980*0Sstevel@tonic-gate 			res_info[j].ri_dealt =
981*0Sstevel@tonic-gate 			    res_info[j].ri_newsize - res_info[j].ri_min;
982*0Sstevel@tonic-gate 		}
983*0Sstevel@tonic-gate 		res_info[j].ri_res = res[j];
984*0Sstevel@tonic-gate 
985*0Sstevel@tonic-gate 		/* Compute total number of resources to deal out */
986*0Sstevel@tonic-gate 		tot_resources += res_info[j].ri_oldsize;
987*0Sstevel@tonic-gate 		tot_min += res_info[j].ri_newsize;
988*0Sstevel@tonic-gate 
989*0Sstevel@tonic-gate #ifdef DEBUG
990*0Sstevel@tonic-gate 		dprintf("res allocation details\n");
991*0Sstevel@tonic-gate 		pool_elem_dprintf(TO_ELEM(res[j]));
992*0Sstevel@tonic-gate 		dprintf("size=%llu\n", res_info[j].ri_oldsize);
993*0Sstevel@tonic-gate #endif	/* DEBUG */
994*0Sstevel@tonic-gate 	}
995*0Sstevel@tonic-gate 
996*0Sstevel@tonic-gate 	num_to_deal = tot_resources - tot_min;
997*0Sstevel@tonic-gate 
998*0Sstevel@tonic-gate 	/*
999*0Sstevel@tonic-gate 	 * Deal one resource to each set, and then another, until all
1000*0Sstevel@tonic-gate 	 * resources are dealt or all sets are at their max.
1001*0Sstevel@tonic-gate 	 */
1002*0Sstevel@tonic-gate 	for (deal = 1; num_to_deal > 0 && sets_maxed < nelem; deal++) {
1003*0Sstevel@tonic-gate 		for (j = 0; j < nelem; j++) {
1004*0Sstevel@tonic-gate 
1005*0Sstevel@tonic-gate 			/*
1006*0Sstevel@tonic-gate 			 * Skip this resource set if it has already been
1007*0Sstevel@tonic-gate 			 * pre-dealt a resource due to pinned resources.
1008*0Sstevel@tonic-gate 			 */
1009*0Sstevel@tonic-gate 			if (res_info[j].ri_dealt >= deal)
1010*0Sstevel@tonic-gate 				continue;
1011*0Sstevel@tonic-gate 
1012*0Sstevel@tonic-gate 			if (res_info[j].ri_newsize < res_info[j].ri_max) {
1013*0Sstevel@tonic-gate 
1014*0Sstevel@tonic-gate 				res_info[j].ri_dealt++;
1015*0Sstevel@tonic-gate 				res_info[j].ri_newsize++;
1016*0Sstevel@tonic-gate 				if (res_info[j].ri_newsize ==
1017*0Sstevel@tonic-gate 				    res_info[j].ri_max)
1018*0Sstevel@tonic-gate 					sets_maxed++;
1019*0Sstevel@tonic-gate 
1020*0Sstevel@tonic-gate 				num_to_deal--;
1021*0Sstevel@tonic-gate 				if (num_to_deal == 0)
1022*0Sstevel@tonic-gate 					break;
1023*0Sstevel@tonic-gate 			}
1024*0Sstevel@tonic-gate 		}
1025*0Sstevel@tonic-gate 	}
1026*0Sstevel@tonic-gate 
1027*0Sstevel@tonic-gate 	/*
1028*0Sstevel@tonic-gate 	 * If all resource sets are at their max, deal the remaining to the
1029*0Sstevel@tonic-gate 	 * default resource set.
1030*0Sstevel@tonic-gate 	 */
1031*0Sstevel@tonic-gate 	if ((sets_maxed == nelem) && (num_to_deal > 0)) {
1032*0Sstevel@tonic-gate 		default_res_info->ri_dealt += num_to_deal;
1033*0Sstevel@tonic-gate 		default_res_info->ri_newsize += num_to_deal;
1034*0Sstevel@tonic-gate 	}
1035*0Sstevel@tonic-gate 
1036*0Sstevel@tonic-gate 	/*
1037*0Sstevel@tonic-gate 	 * Sort so that resource sets needing resources preced resource sets
1038*0Sstevel@tonic-gate 	 * that have extra resources.  The sort function will also compute
1039*0Sstevel@tonic-gate 	 * The quantity of resources that need to be transfered into or out
1040*0Sstevel@tonic-gate 	 * of each set so that it's size == newsize.
1041*0Sstevel@tonic-gate 	 */
1042*0Sstevel@tonic-gate 	qsort(res_info, nelem, sizeof (res_info_t),
1043*0Sstevel@tonic-gate 	    compute_size_to_transfer);
1044*0Sstevel@tonic-gate 
1045*0Sstevel@tonic-gate 	/*
1046*0Sstevel@tonic-gate 	 * The donor index starts at the end of the resource set list and
1047*0Sstevel@tonic-gate 	 * walks up.  The receiver index starts at the beginning of the
1048*0Sstevel@tonic-gate 	 * resource set list and walks down.  Cpu's are transfered from the
1049*0Sstevel@tonic-gate 	 * donors to the receivers until all sets have transfer == 0).
1050*0Sstevel@tonic-gate 	 */
1051*0Sstevel@tonic-gate 	donor = nelem - 1;
1052*0Sstevel@tonic-gate 	receiver = 0;
1053*0Sstevel@tonic-gate 
1054*0Sstevel@tonic-gate 	/* Number of sets with transfer == 0 */
1055*0Sstevel@tonic-gate 	sets_finished = 0;
1056*0Sstevel@tonic-gate 
1057*0Sstevel@tonic-gate 	/* Tranfer resources so that each set's size becomes newsize */
1058*0Sstevel@tonic-gate 	for (;;) {
1059*0Sstevel@tonic-gate 
1060*0Sstevel@tonic-gate 		uint64_t ntrans;
1061*0Sstevel@tonic-gate 		if (donor == receiver) {
1062*0Sstevel@tonic-gate 			if (res_info[donor].ri_transfer != 0) {
1063*0Sstevel@tonic-gate 				free(res_info);
1064*0Sstevel@tonic-gate 				return (PO_FAIL);
1065*0Sstevel@tonic-gate 			}
1066*0Sstevel@tonic-gate 			sets_finished++;
1067*0Sstevel@tonic-gate 			break;
1068*0Sstevel@tonic-gate 		}
1069*0Sstevel@tonic-gate 		if (res_info[donor].ri_transfer == 0) {
1070*0Sstevel@tonic-gate 			sets_finished++;
1071*0Sstevel@tonic-gate 			donor--;
1072*0Sstevel@tonic-gate 			continue;
1073*0Sstevel@tonic-gate 		}
1074*0Sstevel@tonic-gate 		if (res_info[receiver].ri_transfer == 0) {
1075*0Sstevel@tonic-gate 			sets_finished++;
1076*0Sstevel@tonic-gate 			receiver++;
1077*0Sstevel@tonic-gate 			continue;
1078*0Sstevel@tonic-gate 		}
1079*0Sstevel@tonic-gate 
1080*0Sstevel@tonic-gate 		/* Transfer resources from the donor set to the receiver */
1081*0Sstevel@tonic-gate 		ntrans = MIN(res_info[donor].ri_transfer,
1082*0Sstevel@tonic-gate 			    -res_info[receiver].ri_transfer);
1083*0Sstevel@tonic-gate 
1084*0Sstevel@tonic-gate 		if (pool_resource_transfer(
1085*0Sstevel@tonic-gate 			TO_CONF(TO_ELEM(res_info[donor].ri_res)),
1086*0Sstevel@tonic-gate 			    res_info[donor].ri_res, res_info[receiver].ri_res,
1087*0Sstevel@tonic-gate 			    ntrans) != PO_SUCCESS) {
1088*0Sstevel@tonic-gate 				free(res_info);
1089*0Sstevel@tonic-gate 				return (PO_FAIL);
1090*0Sstevel@tonic-gate 			}
1091*0Sstevel@tonic-gate 		res_info[donor].ri_transfer -= ntrans;
1092*0Sstevel@tonic-gate 		res_info[receiver].ri_transfer += ntrans;
1093*0Sstevel@tonic-gate 	}
1094*0Sstevel@tonic-gate 
1095*0Sstevel@tonic-gate 	if (sets_finished != nelem)
1096*0Sstevel@tonic-gate 		ret = PO_FAIL;
1097*0Sstevel@tonic-gate 
1098*0Sstevel@tonic-gate 	free(res_info);
1099*0Sstevel@tonic-gate 	return (ret);
1100*0Sstevel@tonic-gate }
1101*0Sstevel@tonic-gate 
1102*0Sstevel@tonic-gate /*
1103*0Sstevel@tonic-gate  * Used as a qsort parameter to help order resources in terms of their
1104*0Sstevel@tonic-gate  * importance, higher importance being first.
1105*0Sstevel@tonic-gate  */
1106*0Sstevel@tonic-gate int
1107*0Sstevel@tonic-gate resource_compare_by_descending_importance(const void *arg1, const void *arg2)
1108*0Sstevel@tonic-gate {
1109*0Sstevel@tonic-gate 	pool_elem_t *elem1;
1110*0Sstevel@tonic-gate 	pool_elem_t *elem2;
1111*0Sstevel@tonic-gate 	pool_resource_t **res1 = (pool_resource_t **)arg1;
1112*0Sstevel@tonic-gate 	pool_resource_t **res2 = (pool_resource_t **)arg2;
1113*0Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
1114*0Sstevel@tonic-gate 	int64_t i1 = 0, i2 = 0;
1115*0Sstevel@tonic-gate 
1116*0Sstevel@tonic-gate 	elem1 = TO_ELEM(*res1);
1117*0Sstevel@tonic-gate 	elem2 = TO_ELEM(*res2);
1118*0Sstevel@tonic-gate 
1119*0Sstevel@tonic-gate 	if (pool_get_property(TO_CONF(elem1), elem1, "_importance", &val) ==
1120*0Sstevel@tonic-gate 	    POC_INT)
1121*0Sstevel@tonic-gate 		(void) pool_value_get_int64(&val, &i1);
1122*0Sstevel@tonic-gate 
1123*0Sstevel@tonic-gate 	if (pool_get_property(TO_CONF(elem2), elem2, "_importance", &val) ==
1124*0Sstevel@tonic-gate 	    POC_INT)
1125*0Sstevel@tonic-gate 		(void) pool_value_get_int64(&val, &i2);
1126*0Sstevel@tonic-gate 	return (i1 > i2 ? -1 : (i1 < i2 ? 1 : 0));
1127*0Sstevel@tonic-gate }
1128*0Sstevel@tonic-gate 
1129*0Sstevel@tonic-gate /*
1130*0Sstevel@tonic-gate  * Sort in increasing order so that resource sets with extra resources are at
1131*0Sstevel@tonic-gate  * the end and resource sets needing resources are at the beginning.
1132*0Sstevel@tonic-gate  */
1133*0Sstevel@tonic-gate int
1134*0Sstevel@tonic-gate compute_size_to_transfer(const void *arg1, const void *arg2)
1135*0Sstevel@tonic-gate {
1136*0Sstevel@tonic-gate 	res_info_t *r1 = (res_info_t *)arg1, *r2 = (res_info_t *)arg2;
1137*0Sstevel@tonic-gate 	r1->ri_transfer = (int64_t)r1->ri_oldsize - (int64_t)r1->ri_newsize;
1138*0Sstevel@tonic-gate 	r2->ri_transfer = (int64_t)r2->ri_oldsize - (int64_t)r2->ri_newsize;
1139*0Sstevel@tonic-gate 	return (r1->ri_transfer > r2->ri_transfer ? 1 :
1140*0Sstevel@tonic-gate 	    (r1->ri_transfer < r2->ri_transfer ? -1 : 0));
1141*0Sstevel@tonic-gate }
1142*0Sstevel@tonic-gate 
1143*0Sstevel@tonic-gate /*
1144*0Sstevel@tonic-gate  * set_importance_cb() is used to create "_importance" props on each
1145*0Sstevel@tonic-gate  * resource associated with a pool.
1146*0Sstevel@tonic-gate  *
1147*0Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
1148*0Sstevel@tonic-gate  */
1149*0Sstevel@tonic-gate /*ARGSUSED*/
1150*0Sstevel@tonic-gate static int
1151*0Sstevel@tonic-gate set_importance_cb(pool_conf_t *conf, pool_t *pool, void *unused)
1152*0Sstevel@tonic-gate {
1153*0Sstevel@tonic-gate 	pool_value_t val = POOL_VALUE_INITIALIZER;
1154*0Sstevel@tonic-gate 	int64_t importance;
1155*0Sstevel@tonic-gate 	pool_resource_t **res;
1156*0Sstevel@tonic-gate 	uint_t nelem, i;
1157*0Sstevel@tonic-gate 
1158*0Sstevel@tonic-gate 	if (pool_get_property(conf, TO_ELEM(pool), "pool.importance", &val) !=
1159*0Sstevel@tonic-gate 	    POC_INT) {
1160*0Sstevel@tonic-gate 		pool_seterror(POE_INVALID_CONF);
1161*0Sstevel@tonic-gate 		return (PO_FAIL);
1162*0Sstevel@tonic-gate 	}
1163*0Sstevel@tonic-gate 	(void) pool_value_get_int64(&val, &importance);
1164*0Sstevel@tonic-gate 	if ((res = pool_query_pool_resources(conf, pool, &nelem, NULL)) ==
1165*0Sstevel@tonic-gate 	    NULL) {
1166*0Sstevel@tonic-gate 		return (PO_FAIL);
1167*0Sstevel@tonic-gate 	}
1168*0Sstevel@tonic-gate 	for (i = 0; res[i] != NULL; i++) {
1169*0Sstevel@tonic-gate 		int64_t old_importance = INT64_MIN;
1170*0Sstevel@tonic-gate 		pool_elem_t *elem = TO_ELEM(res[i]);
1171*0Sstevel@tonic-gate 
1172*0Sstevel@tonic-gate 		if (pool_get_property(conf, elem, "_importance", &val) ==
1173*0Sstevel@tonic-gate 		    POC_INT)
1174*0Sstevel@tonic-gate 			(void) pool_value_get_int64(&val, &old_importance);
1175*0Sstevel@tonic-gate 		if (old_importance <= importance) {
1176*0Sstevel@tonic-gate 			(void) pool_value_set_int64(&val, importance);
1177*0Sstevel@tonic-gate 			(void) pool_put_property(conf, elem, "_importance",
1178*0Sstevel@tonic-gate 			    &val);
1179*0Sstevel@tonic-gate 		}
1180*0Sstevel@tonic-gate 	}
1181*0Sstevel@tonic-gate 	free(res);
1182*0Sstevel@tonic-gate 	return (PO_SUCCESS);
1183*0Sstevel@tonic-gate }
1184*0Sstevel@tonic-gate 
1185*0Sstevel@tonic-gate /*
1186*0Sstevel@tonic-gate  * unset_importance_cb() is used to remove "_importance" props from
1187*0Sstevel@tonic-gate  * each resource associated with a pool.
1188*0Sstevel@tonic-gate  *
1189*0Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
1190*0Sstevel@tonic-gate  */
1191*0Sstevel@tonic-gate /*ARGSUSED*/
1192*0Sstevel@tonic-gate static int
1193*0Sstevel@tonic-gate unset_importance_cb(pool_conf_t *conf, pool_t *pool, void *unused)
1194*0Sstevel@tonic-gate {
1195*0Sstevel@tonic-gate 	pool_resource_t **res;
1196*0Sstevel@tonic-gate 	uint_t nelem, i;
1197*0Sstevel@tonic-gate 
1198*0Sstevel@tonic-gate 	if ((res = pool_query_pool_resources(conf, pool, &nelem, NULL)) ==
1199*0Sstevel@tonic-gate 	    NULL) {
1200*0Sstevel@tonic-gate 		return (PO_FAIL);
1201*0Sstevel@tonic-gate 	}
1202*0Sstevel@tonic-gate 	for (i = 0; res[i] != NULL; i++) {
1203*0Sstevel@tonic-gate 		if (pool_rm_property(conf, TO_ELEM(res[i]), "_importance") ==
1204*0Sstevel@tonic-gate 		    PO_FAIL) {
1205*0Sstevel@tonic-gate 			free(res);
1206*0Sstevel@tonic-gate 			return (PO_FAIL);
1207*0Sstevel@tonic-gate 		}
1208*0Sstevel@tonic-gate 	}
1209*0Sstevel@tonic-gate 	free(res);
1210*0Sstevel@tonic-gate 	return (PO_SUCCESS);
1211*0Sstevel@tonic-gate }
1212*0Sstevel@tonic-gate 
1213*0Sstevel@tonic-gate /*
1214*0Sstevel@tonic-gate  * add_importance_props() is used to create "_importance" props on
1215*0Sstevel@tonic-gate  * each resource associated with a pool.
1216*0Sstevel@tonic-gate  *
1217*0Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
1218*0Sstevel@tonic-gate  */
1219*0Sstevel@tonic-gate static int
1220*0Sstevel@tonic-gate add_importance_props(pool_conf_t *conf)
1221*0Sstevel@tonic-gate {
1222*0Sstevel@tonic-gate 	return (pool_walk_pools(conf, NULL, set_importance_cb));
1223*0Sstevel@tonic-gate }
1224*0Sstevel@tonic-gate 
1225*0Sstevel@tonic-gate /*
1226*0Sstevel@tonic-gate  * remove_importance_props() is used to remove "_importance" props on
1227*0Sstevel@tonic-gate  * each resource associated with a pool.
1228*0Sstevel@tonic-gate  *
1229*0Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
1230*0Sstevel@tonic-gate  */
1231*0Sstevel@tonic-gate static int
1232*0Sstevel@tonic-gate remove_importance_props(pool_conf_t *conf)
1233*0Sstevel@tonic-gate {
1234*0Sstevel@tonic-gate 	return (pool_walk_pools(conf, NULL, unset_importance_cb));
1235*0Sstevel@tonic-gate }
1236*0Sstevel@tonic-gate 
1237*0Sstevel@tonic-gate /*
1238*0Sstevel@tonic-gate  * pool_conf_commit_sys() takes a configuration and modifies both the
1239*0Sstevel@tonic-gate  * supplied configuration and the dynamic configuration. The goal of
1240*0Sstevel@tonic-gate  * this modification is to generate a dynamic configuration which best
1241*0Sstevel@tonic-gate  * represents the constraints laid down in the static configuration
1242*0Sstevel@tonic-gate  * and to update the static configuration with the results of this
1243*0Sstevel@tonic-gate  * process.
1244*0Sstevel@tonic-gate  *
1245*0Sstevel@tonic-gate  * Returns PO_SUCCESS/PO_FAIL
1246*0Sstevel@tonic-gate  */
1247*0Sstevel@tonic-gate int
1248*0Sstevel@tonic-gate pool_conf_commit_sys(pool_conf_t *conf, int validate)
1249*0Sstevel@tonic-gate {
1250*0Sstevel@tonic-gate 	pool_conf_t *dyn;
1251*0Sstevel@tonic-gate 
1252*0Sstevel@tonic-gate 	if ((dyn = pool_conf_alloc()) == NULL)
1253*0Sstevel@tonic-gate 		return (PO_FAIL);
1254*0Sstevel@tonic-gate 	if (pool_conf_open(dyn, pool_dynamic_location(), PO_RDWR) !=
1255*0Sstevel@tonic-gate 	    PO_SUCCESS) {
1256*0Sstevel@tonic-gate 		pool_conf_free(dyn);
1257*0Sstevel@tonic-gate 		return (PO_FAIL);
1258*0Sstevel@tonic-gate 	}
1259*0Sstevel@tonic-gate 	if (validate == PO_TRUE) {
1260*0Sstevel@tonic-gate 		if (pool_conf_validate(conf, POV_RUNTIME) != PO_SUCCESS) {
1261*0Sstevel@tonic-gate 			(void) pool_conf_close(dyn);
1262*0Sstevel@tonic-gate 			pool_conf_free(dyn);
1263*0Sstevel@tonic-gate 			return (PO_FAIL);
1264*0Sstevel@tonic-gate 		}
1265*0Sstevel@tonic-gate 	}
1266*0Sstevel@tonic-gate 	/*
1267*0Sstevel@tonic-gate 	 * Now try to make the two things "the same".
1268*0Sstevel@tonic-gate 	 */
1269*0Sstevel@tonic-gate 	if (diff_and_fix(conf, dyn) != PO_SUCCESS) {
1270*0Sstevel@tonic-gate 		(void) pool_conf_close(dyn);
1271*0Sstevel@tonic-gate 		pool_conf_free(dyn);
1272*0Sstevel@tonic-gate 		pool_seterror(POE_INVALID_CONF);
1273*0Sstevel@tonic-gate 		return (PO_FAIL);
1274*0Sstevel@tonic-gate 	}
1275*0Sstevel@tonic-gate 	if (dyn->pc_prov->pc_commit(dyn) != PO_SUCCESS) {
1276*0Sstevel@tonic-gate 		(void) pool_conf_close(dyn);
1277*0Sstevel@tonic-gate 		pool_conf_free(dyn);
1278*0Sstevel@tonic-gate 		return (PO_FAIL);
1279*0Sstevel@tonic-gate 	}
1280*0Sstevel@tonic-gate 	(void) pool_conf_close(dyn);
1281*0Sstevel@tonic-gate 	pool_conf_free(dyn);
1282*0Sstevel@tonic-gate 	return (PO_SUCCESS);
1283*0Sstevel@tonic-gate }
1284*0Sstevel@tonic-gate 
1285*0Sstevel@tonic-gate /*
1286*0Sstevel@tonic-gate  * Copies all properties from one element to another. If the property
1287*0Sstevel@tonic-gate  * is a readonly property, then don't copy it.
1288*0Sstevel@tonic-gate  */
1289*0Sstevel@tonic-gate /* ARGSUSED */
1290*0Sstevel@tonic-gate static int
1291*0Sstevel@tonic-gate clone_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
1292*0Sstevel@tonic-gate     pool_value_t *pv, void *user)
1293*0Sstevel@tonic-gate {
1294*0Sstevel@tonic-gate 	pool_elem_t *tgt = (pool_elem_t *)user;
1295*0Sstevel@tonic-gate 	const pool_prop_t *prop;
1296*0Sstevel@tonic-gate #ifdef DEBUG
1297*0Sstevel@tonic-gate 	dprintf("Cloning %s from %s\n",
1298*0Sstevel@tonic-gate 	    pool_conf_location(TO_CONF(TO_ELEM(tgt))),
1299*0Sstevel@tonic-gate 	    pool_conf_location(TO_CONF(pe)));
1300*0Sstevel@tonic-gate 	assert(TO_CONF(TO_ELEM(tgt)) != TO_CONF(pe));
1301*0Sstevel@tonic-gate 	dprintf("clone_element: Processing %s\n", name);
1302*0Sstevel@tonic-gate 	pool_value_dprintf(pv);
1303*0Sstevel@tonic-gate #endif	/* DEBUG */
1304*0Sstevel@tonic-gate 	/*
1305*0Sstevel@tonic-gate 	 * Some properties should be ignored
1306*0Sstevel@tonic-gate 	 */
1307*0Sstevel@tonic-gate 	if ((prop = provider_get_prop(pe, name)) != NULL &&
1308*0Sstevel@tonic-gate 	    prop_is_readonly(prop) == PO_TRUE)
1309*0Sstevel@tonic-gate 		return (PO_SUCCESS);
1310*0Sstevel@tonic-gate 	return (pool_put_property(TO_CONF(tgt), tgt, name, pv) == PO_FAIL);
1311*0Sstevel@tonic-gate }
1312*0Sstevel@tonic-gate 
1313*0Sstevel@tonic-gate /*
1314*0Sstevel@tonic-gate  * Removes all properties from one element. Properties which are
1315*0Sstevel@tonic-gate  * managed by the configuration are ignored.
1316*0Sstevel@tonic-gate  */
1317*0Sstevel@tonic-gate /* ARGSUSED3 */
1318*0Sstevel@tonic-gate static int
1319*0Sstevel@tonic-gate clean_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
1320*0Sstevel@tonic-gate     pool_value_t *pv, void *user)
1321*0Sstevel@tonic-gate {
1322*0Sstevel@tonic-gate 	const pool_prop_t *prop;
1323*0Sstevel@tonic-gate 	/*
1324*0Sstevel@tonic-gate 	 * Some properties should be ignored
1325*0Sstevel@tonic-gate 	 */
1326*0Sstevel@tonic-gate 	if ((prop = provider_get_prop(pe, name)) != NULL &&
1327*0Sstevel@tonic-gate 	    prop_is_optional(prop) == PO_FALSE)
1328*0Sstevel@tonic-gate 		return (PO_SUCCESS);
1329*0Sstevel@tonic-gate 	return (pool_rm_property(conf, (pool_elem_t *)pe, name) == PO_FAIL);
1330*0Sstevel@tonic-gate }
1331