Home | History | Annotate | Download | only in common
      1     0    stevel /*
      2     0    stevel  * CDDL HEADER START
      3     0    stevel  *
      4     0    stevel  * The contents of this file are subject to the terms of the
      5  2324   sdussud  * Common Development and Distribution License (the "License").
      6  2324   sdussud  * You may not use this file except in compliance with the License.
      7     0    stevel  *
      8     0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0    stevel  * or http://www.opensolaris.org/os/licensing.
     10     0    stevel  * See the License for the specific language governing permissions
     11     0    stevel  * and limitations under the License.
     12     0    stevel  *
     13     0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0    stevel  *
     19     0    stevel  * CDDL HEADER END
     20     0    stevel  */
     21     0    stevel /*
     22  2324   sdussud  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23     0    stevel  * Use is subject to license terms.
     24     0    stevel  */
     25     0    stevel 
     26     0    stevel #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27     0    stevel 
     28     0    stevel /*
     29     0    stevel  * These functions implement the process of commitment for a pool
     30     0    stevel  * configuration. This process can be described as taking instructions
     31     0    stevel  * from a static configuration file and using the information about
     32     0    stevel  * the target system contained in the dynamic configuration to make
     33     0    stevel  * decisions about how best to allocate resources to meet the
     34     0    stevel  * constraints specified in the static configuration file.
     35     0    stevel  *
     36     0    stevel  * Mechanically, this process relies upon ordering the individual
     37     0    stevel  * components of the file and stepping through the lists of components
     38     0    stevel  * and taking actions depending on their type and which file they are
     39     0    stevel  * part of.
     40     0    stevel  *
     41     0    stevel  * Configuration components can be broken down into different types
     42     0    stevel  * which are then treated according to the following table:
     43     0    stevel  *
     44     0    stevel  * Element Type		Action
     45     0    stevel  * system || pool ||
     46     0    stevel  * res_comp || res_agg	If the element is a required element, then create or
     47     0    stevel  *			update it (don't destroy required elements in the
     48     0    stevel  *			static configuration) otherwise manipulate the
     49     0    stevel  *			dynamic configuration to create, destroy or update
     50     0    stevel  *			the element on the system.
     51     0    stevel  * comp			Create, destroy or update the static configuration
     52     0    stevel  *			component.
     53     0    stevel  *
     54     0    stevel  * The treatment of the different elements reflects the fact that all
     55     0    stevel  * elements other than comp are configurable and thus libpool can
     56     0    stevel  * create, destroy and modify these elements at will. comp elements
     57     0    stevel  * reflect the disposition of the system, these elements can be moved
     58     0    stevel  * around but they can't be created or destroyed in the dynamic
     59     0    stevel  * configuration in the commit process. comp elements can be created
     60     0    stevel  * and destroyed in the static configuration file as a result of a
     61     0    stevel  * commit operation, since it's possible for a comp to not appear in
     62     0    stevel  * the dynamic configuration. For instance, if the static
     63     0    stevel  * configuration file was created on a different machine or after a DR
     64     0    stevel  * operation which has removed or added components.
     65     0    stevel  *
     66     0    stevel  */
     67     0    stevel #include <assert.h>
     68     0    stevel #include <stdio.h>
     69     0    stevel #include <stdlib.h>
     70     0    stevel #include <sys/types.h>
     71     0    stevel #include <errno.h>
     72     0    stevel #include <string.h>
     73     0    stevel #include <limits.h>
     74     0    stevel #include <unistd.h>
     75     0    stevel 
     76     0    stevel #include <pool.h>
     77     0    stevel #include "pool_internal.h"
     78     0    stevel #include "pool_impl.h"
     79     0    stevel 
     80     0    stevel #define	MIN(x, y) ((x) < (y) ? (x) : (y))
     81     0    stevel #define	MAX(x, y) ((x) > (y) ? (x) : (y))
     82     0    stevel #define	POA_IMPORTANCE_NUM	0
     83     0    stevel #define	POA_SURPLUS_TO_DEFAULT_NUM	1
     84     0    stevel 
     85     0    stevel /*
     86     0    stevel  * This resource specific structure is used to determine allocation of resources
     87     0    stevel  * during resource set allocation.  Each set will receive its min, plus
     88     0    stevel  * some number of dealt resources based on the global allocation policy.
     89     0    stevel  */
     90     0    stevel typedef struct res_info {
     91     0    stevel 	pool_resource_t	*ri_res;	/* Resource set */
     92     0    stevel 	uint64_t	ri_min;		/* Resource set's low watermark */
     93     0    stevel 	uint64_t	ri_max;		/* Resource set's high watermark */
     94     0    stevel 	uint64_t	ri_oldsize;	/* Size of resource set at the start */
     95     0    stevel 	uint64_t	ri_newsize;	/* New resource set size allocated */
     96     0    stevel 	uint64_t	ri_pinned;	/* Count of pinned resources in set */
     97     0    stevel 	uint64_t	ri_dealt;	/* Count of resources dealt to set */
     98     0    stevel 	int64_t		ri_transfer;	/* oldsize - newsize */
     99     0    stevel 					/* The signed quantity of resources */
    100     0    stevel 					/* to tranfer into or out of this */
    101     0    stevel 					/* resource set */
    102     0    stevel 					/* + transfer: tranfer resources out */
    103     0    stevel 					/* - transfer: tranfer resources in */
    104     0    stevel } res_info_t;
    105     0    stevel 
    106     0    stevel /*
    107     0    stevel  * diff_and_fix operations
    108     0    stevel  */
    109     0    stevel static int		commit_create(pool_conf_t *, pool_elem_t **);
    110     0    stevel static int		commit_delete(pool_elem_t *);
    111     0    stevel static int		commit_update(pool_elem_t *, pool_elem_t *, int);
    112     0    stevel 
    113     0    stevel /*
    114     0    stevel  * configuration commit processing
    115     0    stevel  */
    116     0    stevel static int		diff_and_fix(pool_conf_t *, pool_conf_t *);
    117     0    stevel static int		process_elem_lt(pool_elem_t *, pool_conf_t *);
    118     0    stevel static int		process_elem_gt(pool_elem_t *, pool_conf_t *,
    119     0    stevel     pool_conf_t *);
    120     0    stevel static int		process_lists(int, pool_conf_t *,
    121     0    stevel     pool_conf_t *, int);
    122     0    stevel static pool_elem_t	**get_elem_list(const pool_conf_t *, int, uint_t *);
    123     0    stevel static int		share_resources(pool_conf_t *);
    124     0    stevel static int		resource_allocate(const char *, pool_resource_t **,
    125     0    stevel     uint_t);
    126     0    stevel static int		resource_allocate_default(pool_resource_t **, uint_t);
    127     0    stevel static int		pset_allocate_imp(pool_resource_t **, uint_t);
    128     0    stevel static int		resource_compare_by_descending_importance(const void *,
    129     0    stevel     const void *);
    130     0    stevel static int		compute_size_to_transfer(const void *, const void *);
    131     0    stevel static int		set_importance_cb(pool_conf_t *, pool_t *, void *);
    132     0    stevel static int		unset_importance_cb(pool_conf_t *, pool_t *, void *);
    133     0    stevel static int		add_importance_props(pool_conf_t *);
    134     0    stevel static int		remove_importance_props(pool_conf_t *);
    135     0    stevel static int		clone_element(pool_conf_t *, pool_elem_t *,
    136     0    stevel     const char *, pool_value_t *, void *);
    137     0    stevel static int		clean_element(pool_conf_t *, pool_elem_t *,
    138     0    stevel     const char *, pool_value_t *, void *);
    139     0    stevel 
    140     0    stevel /*
    141     0    stevel  * commit_create() is used to create a configuration element upon the
    142     0    stevel  * system.  Since only pools and resource actually need to perform any
    143     0    stevel  * action, other elements are ignored as a no-op.
    144     0    stevel  */
    145     0    stevel static int
    146     0    stevel commit_create(pool_conf_t *conf, pool_elem_t **e1)
    147     0    stevel {
    148     0    stevel 	pool_resource_t *res;
    149     0    stevel 	pool_t *pool;
    150     0    stevel 	const char *res_type;
    151     0    stevel 	pool_elem_t *src = *e1;
    152     0    stevel 	uint64_t smin, smax, dmax;
    153     0    stevel 	pool_value_t val = POOL_VALUE_INITIALIZER;
    154     0    stevel 	char *name;
    155     0    stevel 
    156     0    stevel 	switch (pool_elem_class(src)) {
    157     0    stevel 	case PEC_SYSTEM:	/* NO-OP */
    158     0    stevel 		break;
    159     0    stevel 	case PEC_POOL:
    160     0    stevel 		name = elem_get_name(src);
    161     0    stevel 		if ((pool = pool_create(conf, name)) == NULL) {
    162     0    stevel 			free(name);
    163     0    stevel 			return (PO_FAIL);
    164     0    stevel 		}
    165     0    stevel 		free(name);
    166     0    stevel 		/*
    167     0    stevel 		 * Now copy the properties from the original pool to the
    168     0    stevel 		 * new one
    169     0    stevel 		 */
    170     0    stevel 		if (pool_walk_properties(TO_CONF(src), src, TO_ELEM(pool),
    171     0    stevel 		    clone_element) != PO_SUCCESS)
    172     0    stevel 			return (PO_FAIL);
    173     0    stevel 		/*
    174     0    stevel 		 * Add a pointer to the src element which can be
    175     0    stevel 		 * updated with a sys_id when the sys_id is allocated
    176     0    stevel 		 * to the created element.
    177     0    stevel 		 */
    178     0    stevel 		pool_set_pair(TO_ELEM(pool), src);
    179     0    stevel 		*e1 = TO_ELEM(pool);
    180     0    stevel 		break;
    181     0    stevel 	case PEC_RES_COMP:
    182     0    stevel 	case PEC_RES_AGG:
    183     0    stevel 		name = elem_get_name(src);
    184     0    stevel 		res_type = pool_elem_class_string(src);
    185     0    stevel 		if ((res = pool_resource_create(conf, res_type, name)) ==
    186     0    stevel 		    NULL) {
    187     0    stevel 			free(name);
    188     0    stevel 			return (PO_FAIL);
    189     0    stevel 		}
    190     0    stevel 		free(name);
    191     0    stevel 		/*
    192     0    stevel 		 * Need to do some ordering of property updates.
    193     0    stevel 		 * Compare the values of source min/max and
    194     0    stevel 		 * destination min/max. If smin < dmax then update the
    195     0    stevel 		 * smin first, else update the max first.
    196     0    stevel 		 */
    197     0    stevel 		if (resource_get_min(pool_elem_res(src), &smin) != PO_SUCCESS ||
    198     0    stevel 		    resource_get_max(pool_elem_res(src), &smax) != PO_SUCCESS ||
    199     0    stevel 		    resource_get_max(res, &dmax) != PO_SUCCESS)
    200     0    stevel 			return (PO_FAIL);
    201     0    stevel 		if (smin < dmax) {
    202     0    stevel 			pool_value_set_uint64(&val, smin);
    203     0    stevel 			if (pool_put_ns_property(TO_ELEM(res), c_min_prop,
    204     0    stevel 			    &val) != PO_SUCCESS)
    205     0    stevel 				return (PO_FAIL);
    206     0    stevel 		} else {
    207     0    stevel 			pool_value_set_uint64(&val, smax);
    208     0    stevel 			if (pool_put_ns_property(TO_ELEM(res), c_max_prop,
    209     0    stevel 			    &val) != PO_SUCCESS)
    210     0    stevel 				return (PO_FAIL);
    211     0    stevel 		}
    212     0    stevel 		/*
    213     0    stevel 		 * Now copy the properties from the original resource
    214     0    stevel 		 * to the new one
    215     0    stevel 		 */
    216     0    stevel 		if (pool_walk_properties(TO_CONF(src), src, TO_ELEM(res),
    217     0    stevel 		    clone_element) != PO_SUCCESS)
    218     0    stevel 			return (PO_FAIL);
    219     0    stevel 		/*
    220     0    stevel 		 * Add a pointer to the src element which can be
    221     0    stevel 		 * updated with a sys_id when the sys_id is allocated
    222     0    stevel 		 * to the created element.
    223     0    stevel 		 */
    224     0    stevel 		pool_set_pair(TO_ELEM(res), src);
    225     0    stevel 		*e1 = TO_ELEM(res);
    226     0    stevel 		break;
    227     0    stevel 	case PEC_COMP:		/* NO-OP */
    228     0    stevel 		break;
    229     0    stevel 	default:
    230     0    stevel 		return (PO_FAIL);
    231     0    stevel 	}
    232     0    stevel 	return (PO_SUCCESS);
    233     0    stevel }
    234     0    stevel 
    235     0    stevel 
    236     0    stevel /*
    237     0    stevel  * commit_delete() is used to delete a configuration element upon the
    238     0    stevel  * system.  Since only pools and resources actually need to perform
    239     0    stevel  * any action, other elements are ignored as a no-op.
    240     0    stevel  */
    241     0    stevel static int
    242     0    stevel commit_delete(pool_elem_t *pe)
    243     0    stevel {
    244     0    stevel 	pool_resource_t *res;
    245     0    stevel 	pool_t *pool;
    246     0    stevel 	int ret = 0;
    247  3247  gjelinek 
    248  3247  gjelinek 	if (elem_is_tmp(pe))
    249  3247  gjelinek 		return (PO_SUCCESS);
    250     0    stevel 
    251     0    stevel 	switch (pool_elem_class(pe)) {
    252     0    stevel 	case PEC_SYSTEM:	/* NO-OP */
    253     0    stevel 		break;
    254     0    stevel 	case PEC_POOL:
    255     0    stevel 		pool = pool_elem_pool(pe);
    256     0    stevel 		ret = pool_destroy(TO_CONF(pe), pool);
    257     0    stevel 		break;
    258     0    stevel 	case PEC_RES_COMP:
    259     0    stevel 	case PEC_RES_AGG:
    260     0    stevel 		res = pool_elem_res(pe);
    261     0    stevel 		ret = pool_resource_destroy(TO_CONF(pe), res);
    262     0    stevel 		break;
    263     0    stevel 	case PEC_COMP:		/* NO-OP */
    264     0    stevel 		break;
    265     0    stevel 	default:
    266     0    stevel 		return (PO_FAIL);
    267     0    stevel 	}
    268     0    stevel 	return (ret);
    269     0    stevel }
    270     0    stevel 
    271     0    stevel /*
    272     0    stevel  * commit_update() is used to update a configuration element upon the
    273     0    stevel  * system or in a static configuration file. The pass parameter
    274     0    stevel  * governs whether properties are being updated or associations.  In
    275     0    stevel  * pass 0, properties are updated. If the element is of class
    276     0    stevel  * PEC_COMP, then make sure that the element in the static
    277     0    stevel  * configuration file is correctly located before proceeding with the
    278     0    stevel  * update. Then, the element in the dynamic configuration file is
    279     0    stevel  * updated. In pass 1, ie. pass != 0, any pool components have their
    280     0    stevel  * associations updated in the dynamic configuration.
    281     0    stevel  */
    282     0    stevel static int
    283     0    stevel commit_update(pool_elem_t *e1, pool_elem_t *e2, int pass)
    284     0    stevel {
    285     0    stevel 	if (pass == 0) {
    286     0    stevel 		pool_resource_t *res1;
    287     0    stevel 		pool_resource_t *res2;
    288     0    stevel 		if (pool_elem_class(e1) == PEC_COMP) {
    289     0    stevel 			res1 = pool_get_owning_resource(TO_CONF(e1),
    290     0    stevel 			    pool_elem_comp(e1));
    291     0    stevel 			res2 = pool_get_owning_resource(TO_CONF(e2),
    292     0    stevel 			    pool_elem_comp(e2));
    293     0    stevel 			if (pool_elem_compare_name(TO_ELEM(res1),
    294     0    stevel 			    TO_ELEM(res2)) != 0) {
    295     0    stevel 				char *name;
    296     0    stevel 				const pool_resource_t *newres;
    297     0    stevel 				pool_component_t *comps[2] = { NULL };
    298     0    stevel 
    299     0    stevel 				comps[0] = pool_elem_comp(e2);
    300  2324   sdussud 				name = elem_get_name(TO_ELEM(res1));
    301     0    stevel 				newres = pool_get_resource(TO_CONF(e2),
    302     0    stevel 				    pool_elem_class_string(TO_ELEM(res1)),
    303     0    stevel 				    name);
    304     0    stevel 				free(name);
    305     0    stevel 				assert(newres);
    306     0    stevel #ifdef DEBUG
    307     0    stevel 				dprintf("transferring: res, comp\n");
    308     0    stevel 				pool_elem_dprintf(TO_ELEM(newres));
    309     0    stevel 				pool_elem_dprintf(e2);
    310     0    stevel #endif	/* DEBUG */
    311     0    stevel 				(void) pool_resource_xtransfer(TO_CONF(e2),
    312     0    stevel 				    res2, (pool_resource_t *)newres, comps);
    313     0    stevel 			}
    314     0    stevel 		}
    315     0    stevel 		if (pool_walk_properties(TO_CONF(e2), e2, NULL,
    316     0    stevel 		    clean_element) != PO_SUCCESS) {
    317     0    stevel 			return (PO_FAIL);
    318     0    stevel 		}
    319     0    stevel 		/*
    320     0    stevel 		 * Need to do some ordering of property updates if the
    321     0    stevel 		 * element to be updated is a resource.  Compare the
    322     0    stevel 		 * values of source min/max and destination
    323     0    stevel 		 * min/max. If smin < dmax then update the smin first,
    324     0    stevel 		 * else update the max first.
    325     0    stevel 		 */
    326     0    stevel 		if (pool_elem_class(e1) == PEC_RES_COMP ||
    327     0    stevel 		    pool_elem_class(e1) == PEC_RES_AGG) {
    328     0    stevel 			uint64_t smin, smax, dmax;
    329     0    stevel 			pool_value_t val = POOL_VALUE_INITIALIZER;
    330     0    stevel 
    331     0    stevel 			if (resource_get_min(pool_elem_res(e1), &smin) !=
    332     0    stevel 			    PO_SUCCESS ||
    333     0    stevel 			    resource_get_max(pool_elem_res(e1), &smax) !=
    334     0    stevel 			    PO_SUCCESS ||
    335     0    stevel 			    resource_get_max(pool_elem_res(e2), &dmax) !=
    336     0    stevel 			    PO_SUCCESS)
    337     0    stevel 				return (PO_FAIL);
    338     0    stevel 			if (smin < dmax) {
    339     0    stevel 				pool_value_set_uint64(&val, smin);
    340     0    stevel 				if (pool_put_ns_property(e2, c_min_prop,
    341     0    stevel 				    &val) != PO_SUCCESS)
    342     0    stevel 					return (PO_FAIL);
    343     0    stevel 			} else {
    344     0    stevel 				pool_value_set_uint64(&val, smax);
    345     0    stevel 				if (pool_put_ns_property(e2, c_max_prop,
    346     0    stevel 				    &val) != PO_SUCCESS)
    347     0    stevel 					return (PO_FAIL);
    348     0    stevel 			}
    349     0    stevel 		}
    350     0    stevel 		/*
    351     0    stevel 		 * This next couple of steps needs some
    352     0    stevel 		 * explanation. The first walk, copies all the
    353     0    stevel 		 * properties that are writeable from the static
    354     0    stevel 		 * configuration to the dynamic configuration. The
    355     0    stevel 		 * second walk copies all properties (writeable or
    356     0    stevel 		 * not) from the dynamic configuration element back to
    357     0    stevel 		 * the static configuration element. This ensures that
    358     0    stevel 		 * updates from the static configuration element are
    359     0    stevel 		 * correctly applied to the dynamic configuration and
    360     0    stevel 		 * then the static configuration element is updated
    361     0    stevel 		 * with the latest values of the read-only xproperties
    362     0    stevel 		 * from the dynamic configuration element. The
    363     0    stevel 		 * enforcing of permisssions is performed in
    364     0    stevel 		 * clone_element by its choice of property
    365     0    stevel 		 * manipulation function.
    366     0    stevel 		 */
    367     0    stevel 		if (pool_walk_properties(TO_CONF(e1), e1, e2, clone_element) !=
    368     0    stevel 		    PO_SUCCESS) {
    369     0    stevel 			return (PO_FAIL);
    370     0    stevel 		}
    371     0    stevel 		if (pool_walk_properties(TO_CONF(e2), e2, e1, clone_element) !=
    372     0    stevel 		    PO_SUCCESS) {
    373     0    stevel 			return (PO_FAIL);
    374     0    stevel 		}
    375     0    stevel 	} else {
    376     0    stevel 		if (pool_elem_class(e1) == PEC_POOL) {
    377     0    stevel 			pool_resource_t **rs;
    378     0    stevel 			uint_t nelem;
    379     0    stevel 			int i;
    380     0    stevel 			pool_value_t val = POOL_VALUE_INITIALIZER;
    381     0    stevel 			pool_value_t *pvals[] = { NULL, NULL };
    382     0    stevel 
    383     0    stevel 			pvals[0] = &val;
    384     0    stevel 			if (pool_value_set_string(&val, "pset") != PO_SUCCESS ||
    385     0    stevel 			    pool_value_set_name(&val, c_type) != PO_SUCCESS)
    386     0    stevel 				return (PO_FAIL);
    387     0    stevel 			if ((rs = pool_query_pool_resources(TO_CONF(e1),
    388     0    stevel 			    pool_elem_pool(e1), &nelem, pvals)) != NULL) {
    389     0    stevel 				for (i = 0; i < nelem; i++) {
    390     0    stevel 					const pool_resource_t *tgt_res;
    391     0    stevel 					char *res_name =
    392     0    stevel 					    elem_get_name(TO_ELEM(rs[i]));
    393     0    stevel 
    394     0    stevel 					if ((tgt_res = pool_get_resource(
    395     0    stevel 					    TO_CONF(e2), pool_elem_class_string(
    396     0    stevel 					    TO_ELEM(rs[i])), res_name)) ==
    397     0    stevel 					    NULL) {
    398     0    stevel 						tgt_res = get_default_resource(
    399     0    stevel 						    rs[i]);
    400     0    stevel 					}
    401     0    stevel 					free(res_name);
    402     0    stevel 					if (pool_associate(TO_CONF(e2),
    403     0    stevel 					    pool_elem_pool(e2), tgt_res) !=
    404     0    stevel 					    PO_SUCCESS) {
    405     0    stevel 						free(rs);
    406     0    stevel 						return (PO_FAIL);
    407     0    stevel 					}
    408     0    stevel 				}
    409     0    stevel 				free(rs);
    410     0    stevel 			}
    411     0    stevel 		}
    412     0    stevel 	}
    413     0    stevel 	return (PO_SUCCESS);
    414     0    stevel }
    415     0    stevel 
    416     0    stevel /*
    417     0    stevel  * diff_and_fix() works out the differences between two configurations
    418     0    stevel  * and modifies the state of the system to match the operations
    419     0    stevel  * required to bring the two configurations into sync.
    420     0    stevel  *
    421     0    stevel  * Returns PO_SUCCESS/PO_FAIL.
    422     0    stevel  */
    423     0    stevel static int
    424     0    stevel diff_and_fix(pool_conf_t *stc, pool_conf_t *dyn)
    425     0    stevel {
    426     0    stevel 	/*
    427     0    stevel 	 * The ordering of the operations is significant, we must
    428     0    stevel 	 * process the system element, then the pools elements, then
    429     0    stevel 	 * the resource elements, then the pools elements again and
    430     0    stevel 	 * finally the resource components.
    431     0    stevel 	 *
    432     0    stevel 	 * TODO
    433     0    stevel 	 * PEC_RES_COMP are the only type of resources
    434     0    stevel 	 * currently. When PEC_RES_AGG resources are added they must
    435     0    stevel 	 * also be processed.
    436     0    stevel 	 */
    437     0    stevel 	if (process_lists(PEC_SYSTEM, stc, dyn, 0) != PO_SUCCESS) {
    438     0    stevel 		return (PO_FAIL);
    439     0    stevel 	}
    440     0    stevel 	if (process_lists(PEC_POOL, stc, dyn, 0) != PO_SUCCESS) {
    441     0    stevel 		return (PO_FAIL);
    442     0    stevel 	}
    443     0    stevel 	if (process_lists(PEC_RES_COMP, stc, dyn, 0) != PO_SUCCESS) {
    444     0    stevel 		return (PO_FAIL);
    445     0    stevel 	}
    446     0    stevel 	if (process_lists(PEC_COMP, stc, dyn, 0) != PO_SUCCESS) {
    447     0    stevel 		return (PO_FAIL);
    448     0    stevel 	}
    449     0    stevel 	if (process_lists(PEC_POOL, stc, dyn, 1) != PO_SUCCESS) {
    450     0    stevel 		return (PO_FAIL);
    451     0    stevel 	}
    452     0    stevel 	/*
    453     0    stevel 	 * Share the resources. It has to be called for both
    454     0    stevel 	 * configurations to ensure that the configurations still look
    455     0    stevel 	 * the same.
    456     0    stevel 	 */
    457     0    stevel 	if (share_resources(dyn) != PO_SUCCESS) {
    458     0    stevel 		return (PO_FAIL);
    459     0    stevel 	}
    460     0    stevel 	if (share_resources(stc) != PO_SUCCESS) {
    461     0    stevel 		return (PO_FAIL);
    462     0    stevel 	}
    463     0    stevel 	return (PO_SUCCESS);
    464     0    stevel }
    465     0    stevel 
    466     0    stevel static int
    467     0    stevel process_elem_lt(pool_elem_t *pe, pool_conf_t *dyn)
    468     0    stevel {
    469     0    stevel 	if (pool_elem_class(pe) == PEC_COMP) {
    470     0    stevel 		if (pool_component_destroy(pool_elem_comp(pe)) == PO_FAIL) {
    471     0    stevel 			return (PO_FAIL);
    472     0    stevel 		}
    473     0    stevel 	} else if (! elem_is_default(pe)) {
    474     0    stevel 		if (commit_create(dyn, &pe) != PO_SUCCESS) {
    475     0    stevel 			return (PO_FAIL);
    476     0    stevel 		}
    477     0    stevel 	}
    478     0    stevel 	return (PO_SUCCESS);
    479     0    stevel }
    480     0    stevel 
    481     0    stevel static int
    482     0    stevel process_elem_gt(pool_elem_t *pe, pool_conf_t *stc, pool_conf_t *dyn)
    483     0    stevel {
    484     0    stevel 	if (pool_elem_class(pe) == PEC_COMP) {
    485     0    stevel 		pool_resource_t *owner;
    486     0    stevel 		const pool_resource_t *parent_res;
    487     0    stevel 		pool_value_t val = POOL_VALUE_INITIALIZER;
    488     0    stevel 		const pool_component_t *newcomp;
    489     0    stevel 		const char *resname;
    490     0    stevel 		const char *restype;
    491     0    stevel 		/*
    492     0    stevel 		 * I have to find the right parent in the static
    493     0    stevel 		 * configuration. It may not exist, in which case it's
    494     0    stevel 		 * correct to put it in the default
    495     0    stevel 		 */
    496     0    stevel 		owner = pool_get_owning_resource(dyn,
    497     0    stevel 		    pool_elem_comp(pe));
    498     0    stevel 		if (pool_get_ns_property(TO_ELEM(owner), "name", &val) ==
    499     0    stevel 		    POC_INVAL)
    500     0    stevel 			return (PO_FAIL);
    501     0    stevel 
    502     0    stevel 		if (pool_value_get_string(&val, &resname) == PO_FAIL)
    503     0    stevel 			return (PO_FAIL);
    504     0    stevel 
    505     0    stevel 		if ((resname = strdup(resname)) == NULL)
    506     0    stevel 			return (PO_FAIL);
    507     0    stevel 
    508     0    stevel 		restype = pool_elem_class_string(TO_ELEM(owner));
    509     0    stevel 		parent_res = pool_get_resource(stc, restype, resname);
    510     0    stevel 		free((void *)resname);
    511     0    stevel 		if (parent_res == NULL)
    512     0    stevel 			parent_res = resource_by_sysid(stc, PS_NONE, restype);
    513     0    stevel 		/*
    514     0    stevel 		 * Now need to make a copy of the component in the
    515     0    stevel 		 * dynamic configuration in the static configuration.
    516     0    stevel 		 */
    517     0    stevel 		if ((newcomp = pool_component_create(stc, parent_res,
    518     0    stevel 		    elem_get_sysid(pe))) == NULL)
    519     0    stevel 			return (PO_FAIL);
    520     0    stevel 
    521     0    stevel 		if (pool_walk_properties(TO_CONF(pe), pe, TO_ELEM(newcomp),
    522     0    stevel 		    clone_element) != PO_SUCCESS)
    523     0    stevel 			return (PO_FAIL);
    524     0    stevel 	} else if (elem_is_default(pe)) {
    525     0    stevel 		pool_resource_t *newres;
    526     0    stevel 		pool_t *newpool;
    527     0    stevel 		char *name;
    528     0    stevel 
    529     0    stevel 		if ((name = elem_get_name(pe)) == NULL)
    530     0    stevel 			return (PO_FAIL);
    531     0    stevel 		switch (pool_elem_class(pe)) {
    532     0    stevel 		case PEC_POOL:
    533     0    stevel 			if ((newpool = pool_create(stc, name)) == NULL) {
    534     0    stevel 				free(name);
    535     0    stevel 				return (PO_FAIL);
    536     0    stevel 			}
    537     0    stevel 			free(name);
    538     0    stevel 			if (pool_walk_properties(TO_CONF(pe), pe,
    539     0    stevel 			    TO_ELEM(newpool), clone_element) != PO_SUCCESS)
    540     0    stevel 				return (PO_FAIL);
    541     0    stevel 			break;
    542     0    stevel 		case PEC_RES_AGG:
    543     0    stevel 		case PEC_RES_COMP:
    544     0    stevel 			if ((newres = pool_resource_create(stc,
    545     0    stevel 			    pool_elem_class_string(pe), name)) ==
    546     0    stevel 			    NULL) {
    547     0    stevel 				free(name);
    548     0    stevel 				return (PO_FAIL);
    549     0    stevel 			}
    550     0    stevel 			free(name);
    551     0    stevel 			if (pool_walk_properties(TO_CONF(pe), pe,
    552     0    stevel 			    TO_ELEM(newres), clone_element) != PO_SUCCESS)
    553     0    stevel 				return (PO_FAIL);
    554     0    stevel 			break;
    555     0    stevel 		default:
    556     0    stevel 			free(name);
    557     0    stevel 			break;
    558     0    stevel 		}
    559     0    stevel 	} else {
    560     0    stevel 		if (commit_delete(pe) != PO_SUCCESS)
    561     0    stevel 			return (PO_FAIL);
    562     0    stevel 	}
    563     0    stevel 	return (PO_SUCCESS);
    564     0    stevel }
    565     0    stevel 
    566     0    stevel /*
    567     0    stevel  * This function compares the elements of the supplied type in the
    568     0    stevel  * static and dynamic configurations supplied. The lists of elements
    569     0    stevel  * are compared and used to create, delete and updated elements in
    570     0    stevel  * both the static and dynamic configurations. The pass parameter is
    571     0    stevel  * used to indicate to commit_update() whether property updates or
    572     0    stevel  * association updates should be performed.
    573     0    stevel  */
    574     0    stevel static int
    575     0    stevel process_lists(int type, pool_conf_t *stc, pool_conf_t *dyn, int pass)
    576     0    stevel {
    577     0    stevel 	uint_t stc_nelem = 0, dyn_nelem = 0;
    578     0    stevel 	pool_elem_t **stc_elems, **dyn_elems;
    579     0    stevel 	int i, j;
    580     0    stevel 	int status = PO_SUCCESS;
    581     0    stevel 
    582     0    stevel 	if ((stc_elems = get_elem_list(stc, type, &stc_nelem)) == NULL)
    583     0    stevel 		return (PO_FAIL);
    584     0    stevel 
    585     0    stevel 	qsort(stc_elems, stc_nelem, sizeof (pool_elem_t *),
    586     0    stevel 	    qsort_elem_compare);
    587     0    stevel 
    588     0    stevel 	if ((dyn_elems = get_elem_list(dyn, type, &dyn_nelem)) == NULL) {
    589     0    stevel 		free(stc_elems);
    590     0    stevel 		return (PO_FAIL);
    591     0    stevel 	}
    592     0    stevel 
    593     0    stevel 	qsort(dyn_elems, dyn_nelem, sizeof (pool_elem_t *),
    594     0    stevel 	    qsort_elem_compare);
    595     0    stevel 	/*
    596     0    stevel 	 * Step through and do the updating, remember that we are
    597     0    stevel 	 * comparing using the compare function for the configuration
    598     0    stevel 	 * and that is fixed.
    599     0    stevel 	 */
    600     0    stevel 	i = j = 0;
    601     0    stevel 	while (status == PO_SUCCESS && i < stc_nelem && j < dyn_nelem) {
    602     0    stevel 		int compare;
    603     0    stevel 		/*
    604     0    stevel 		 * We are going to do this by stepping through the static
    605     0    stevel 		 * list first.
    606     0    stevel 		 */
    607     0    stevel 		if (elem_is_default(stc_elems[i]) &&
    608     0    stevel 		    elem_is_default(dyn_elems[j]))
    609     0    stevel 			compare = 0;
    610     0    stevel 		else
    611     0    stevel 			compare = pool_elem_compare_name(stc_elems[i],
    612     0    stevel 			    dyn_elems[j]);
    613     0    stevel 		if (compare < 0) {
    614     0    stevel 			status = process_elem_lt(stc_elems[i], dyn);
    615     0    stevel 			i++;
    616     0    stevel 		} else if (compare > 0) {
    617     0    stevel 			status = process_elem_gt(dyn_elems[j], stc, dyn);
    618     0    stevel 			j++;
    619     0    stevel 		} else {	/* compare == 0 */
    620     0    stevel 			if (commit_update(stc_elems[i], dyn_elems[j], pass)
    621     0    stevel 			    != PO_SUCCESS) {
    622     0    stevel 				status = PO_FAIL;
    623     0    stevel 			}
    624     0    stevel 			i++;
    625     0    stevel 			j++;
    626     0    stevel 		}
    627     0    stevel 	}
    628     0    stevel 	if (status == PO_FAIL) {
    629     0    stevel 		free(stc_elems);
    630     0    stevel 		free(dyn_elems);
    631     0    stevel 		return (PO_FAIL);
    632     0    stevel 	}
    633     0    stevel 	while (status == PO_SUCCESS && i < stc_nelem) {
    634     0    stevel 		status = process_elem_lt(stc_elems[i], dyn);
    635     0    stevel 		i++;
    636     0    stevel 	}
    637     0    stevel 	if (status == PO_FAIL) {
    638     0    stevel 		free(stc_elems);
    639     0    stevel 		free(dyn_elems);
    640     0    stevel 		return (PO_FAIL);
    641     0    stevel 	}
    642     0    stevel 	while (status == PO_SUCCESS && j < dyn_nelem) {
    643     0    stevel 		status = process_elem_gt(dyn_elems[j], stc, dyn);
    644     0    stevel 		j++;
    645     0    stevel 	}
    646     0    stevel 	free(stc_elems);
    647     0    stevel 	free(dyn_elems);
    648     0    stevel 	return (status);
    649     0    stevel }
    650     0    stevel 
    651     0    stevel /*
    652     0    stevel  * get_elem_list() returns a list of pool_elem_t's. The size of the
    653     0    stevel  * list is written into nelem. The list contains elements of all types
    654     0    stevel  * that pools is interested in: i.e. system, pool, resources and
    655     0    stevel  * resource components. It is the caller's responsibility to free the
    656     0    stevel  * list when it is finished with.
    657     0    stevel  *
    658     0    stevel  * The array of pointers returned by the type specific query can be
    659     0    stevel  * safely cast to be an array of pool_elem_t pointers. In the case of
    660     0    stevel  * PEC_RES_COMP some additional processing is required to qualify the
    661     0    stevel  * list of elements.
    662     0    stevel  *
    663     0    stevel  * Returns a pointer to a list of pool_elem_t's or NULL on failure.
    664     0    stevel  */
    665     0    stevel static pool_elem_t **
    666     0    stevel get_elem_list(const pool_conf_t *conf, int type, uint_t *nelem)
    667     0    stevel {
    668     0    stevel 	pool_resource_t **rl;
    669     0    stevel 	pool_t **pl;
    670     0    stevel 	pool_component_t **cl;
    671     0    stevel 	pool_elem_t **elems = NULL;
    672     0    stevel 	int i;
    673     0    stevel 
    674     0    stevel 	switch (type) {
    675     0    stevel 	case PEC_SYSTEM:
    676     0    stevel 		if ((elems = malloc(sizeof (pool_elem_t *))) == NULL)
    677     0    stevel 			return (NULL);
    678     0    stevel 		*nelem = 1;
    679     0    stevel 		elems[0] = pool_conf_to_elem(conf);
    680     0    stevel 		break;
    681     0    stevel 	case PEC_POOL:
    682     0    stevel 		if ((pl = pool_query_pools(conf, nelem, NULL)) != NULL) {
    683     0    stevel 			elems = (pool_elem_t **)pl;
    684     0    stevel 		}
    685     0    stevel 		break;
    686     0    stevel 	case PEC_RES_COMP:
    687     0    stevel 		if ((rl = pool_query_resources(conf, nelem, NULL)) != NULL) {
    688     0    stevel 			int j = 0;
    689     0    stevel 			elems = (pool_elem_t **)rl;
    690     0    stevel 			for (i = 0; i < *nelem; i++) {
    691     0    stevel 				if (pool_elem_class(TO_ELEM(rl[i])) ==
    692     0    stevel 				    PEC_RES_COMP)
    693     0    stevel 					elems[j++] = TO_ELEM(rl[i]);
    694     0    stevel 			}
    695     0    stevel 			*nelem = j;
    696     0    stevel 		}
    697     0    stevel 		break;
    698     0    stevel 	case PEC_COMP:
    699     0    stevel 		if ((cl = pool_query_components(conf, nelem, NULL)) != NULL) {
    700     0    stevel 			elems = (pool_elem_t **)cl;
    701     0    stevel 		}
    702     0    stevel 		break;
    703     0    stevel 	default:
    704     0    stevel 		abort();
    705     0    stevel 		break;
    706     0    stevel 	}
    707     0    stevel 	return (elems);
    708     0    stevel }
    709     0    stevel 
    710     0    stevel /*
    711     0    stevel  * share_resources() sets up the allocation of resources by each
    712     0    stevel  * provider.  Firstly all resources are updated with the importance of
    713     0    stevel  * each pool, then each resource provider is invoked in turn with a
    714     0    stevel  * list of it's own resources.  Finally, the pool importance details
    715     0    stevel  * are removed from the resources.
    716     0    stevel  *
    717     0    stevel  * Returns PO_SUCCESS/PO_FAIL
    718     0    stevel  */
    719     0    stevel static int
    720     0    stevel share_resources(pool_conf_t *conf)
    721     0    stevel {
    722     0    stevel 	pool_resource_t **resources;
    723     0    stevel 	uint_t nelem;
    724     0    stevel 	pool_value_t *props[] = { NULL, NULL };
    725     0    stevel 	pool_value_t val = POOL_VALUE_INITIALIZER;
    726     0    stevel 
    727     0    stevel 	props[0] = &val;
    728     0    stevel 
    729     0    stevel 	/*
    730     0    stevel 	 * Call an allocation function for each type of supported resource.
    731     0    stevel 	 * This function is responsible for "sharing" resources to resource
    732     0    stevel 	 * sets as determined by the system.allocate-method.
    733     0    stevel 	 */
    734     0    stevel 
    735     0    stevel 	if (pool_value_set_string(props[0], "pset") != PO_SUCCESS ||
    736     0    stevel 	    pool_value_set_name(props[0], c_type) != PO_SUCCESS)
    737     0    stevel 		return (PO_FAIL);
    738     0    stevel 
    739     0    stevel 	if (add_importance_props(conf) != PO_SUCCESS) {
    740     0    stevel 		(void) remove_importance_props(conf);
    741     0    stevel 		return (PO_FAIL);
    742     0    stevel 	}
    743     0    stevel 
    744     0    stevel 	if ((resources = pool_query_resources(conf, &nelem, props)) != NULL) {
    745     0    stevel 		/*
    746     0    stevel 		 * 'pool.importance' defines the importance of a pool;
    747     0    stevel 		 * resources inherit the importance of the pool that
    748     0    stevel 		 * is associated with them. If more than one pool is
    749     0    stevel 		 * associated with a resource, the importance of the
    750     0    stevel 		 * resource is the maximum importance of all
    751     0    stevel 		 * associated pools.  Use '_importance' on resources
    752     0    stevel 		 * to determine who gets extra.
    753     0    stevel 		 */
    754     0    stevel 		if (resource_allocate("pset", resources, nelem) != PO_SUCCESS) {
    755     0    stevel 			free(resources);
    756     0    stevel 			(void) remove_importance_props(conf);
    757     0    stevel 			return (PO_FAIL);
    758     0    stevel 		}
    759     0    stevel 	}
    760     0    stevel 	free(resources);
    761     0    stevel 	(void) remove_importance_props(conf);
    762     0    stevel 	return (PO_SUCCESS);
    763     0    stevel }
    764     0    stevel 
    765     0    stevel 
    766     0    stevel /*
    767     0    stevel  * Work out which allocation method to use based on the value of the
    768     0    stevel  * system.allocate-method property.
    769     0    stevel  */
    770     0    stevel int
    771     0    stevel resource_allocate(const char *type, pool_resource_t **res, uint_t nelem)
    772     0    stevel {
    773     0    stevel 	pool_elem_t *pe;
    774     0    stevel 	const char *method_name;
    775     0    stevel 	uint64_t method;
    776     0    stevel 	pool_value_t val = POOL_VALUE_INITIALIZER;
    777     0    stevel 	int ret;
    778     0    stevel 
    779     0    stevel 	pe = pool_conf_to_elem(TO_CONF(TO_ELEM(res[0])));
    780     0    stevel 
    781     0    stevel 	if (pool_get_ns_property(pe, "allocate-method", &val) != POC_STRING)
    782     0    stevel 		method_name = POA_IMPORTANCE;
    783     0    stevel 	else {
    784     0    stevel 		(void) pool_value_get_string(&val, &method_name);
    785     0    stevel 	}
    786     0    stevel 	if (strcmp(POA_IMPORTANCE, method_name) != 0) {
    787     0    stevel 		if (strcmp(POA_SURPLUS_TO_DEFAULT, method_name) != 0) {
    788     0    stevel 			pool_seterror(POE_INVALID_CONF);
    789     0    stevel 			return (PO_FAIL);
    790     0    stevel 		} else {
    791     0    stevel 			method = POA_SURPLUS_TO_DEFAULT_NUM;
    792     0    stevel 		}
    793     0    stevel 	} else {
    794     0    stevel 		method = POA_IMPORTANCE_NUM;
    795     0    stevel 	}
    796     0    stevel 	switch (method) {
    797     0    stevel 	case POA_IMPORTANCE_NUM:
    798     0    stevel 		/*
    799     0    stevel 		 * TODO: Add support for new resource types
    800     0    stevel 		 */
    801     0    stevel 		switch (pool_resource_elem_class_from_string(type)) {
    802     0    stevel 		case PREC_PSET:
    803     0    stevel 			ret = pset_allocate_imp(res, nelem);
    804     0    stevel 			break;
    805     0    stevel 		default:
    806     0    stevel 			ret = PO_FAIL;
    807     0    stevel 			break;
    808     0    stevel 		}
    809     0    stevel 		break;
    810     0    stevel 	case POA_SURPLUS_TO_DEFAULT_NUM:
    811     0    stevel 		ret = resource_allocate_default(res, nelem);
    812     0    stevel 		break;
    813     0    stevel 	}
    814     0    stevel 
    815     0    stevel 	return (ret);
    816     0    stevel }
    817     0    stevel 
    818     0    stevel /*
    819     0    stevel  * Each set will get its minimum, however if there is more than the
    820     0    stevel  * total minimum available, then leave this in the default set.
    821     0    stevel  */
    822     0    stevel int
    823     0    stevel resource_allocate_default(pool_resource_t **res, uint_t nelem)
    824     0    stevel {
    825     0    stevel 	res_info_t *res_info;
    826     0    stevel 	uint_t j;
    827     0    stevel 	pool_resource_t *default_res = NULL;
    828     0    stevel 
    829     0    stevel 	if (nelem == 1)
    830     0    stevel 		return (PO_SUCCESS);
    831     0    stevel 
    832     0    stevel 	if ((res_info = calloc(nelem, sizeof (res_info_t))) == NULL) {
    833     0    stevel 		return (PO_FAIL);
    834     0    stevel 	}
    835     0    stevel 
    836     0    stevel 	/* Load current resource values. */
    837     0    stevel 	for (j = 0; j < nelem; j++) {
    838     0    stevel 
    839     0    stevel 		if (default_res == NULL &&
    840     0    stevel 		    resource_is_default(res[j]) == PO_TRUE)
    841     0    stevel 			default_res = res[j];
    842     0    stevel 
    843     0    stevel 		if (resource_get_max(res[j],
    844     0    stevel 		    &res_info[j].ri_max) == PO_FAIL ||
    845     0    stevel 		    resource_get_min(res[j],
    846     0    stevel 			&res_info[j].ri_min) == PO_FAIL ||
    847     0    stevel 		    resource_get_size(res[j],
    848     0    stevel 			&res_info[j].ri_oldsize) == PO_FAIL ||
    849     0    stevel 		    resource_get_pinned(res[j],
    850     0    stevel 			&res_info[j].ri_pinned) == PO_FAIL) {
    851     0    stevel 			free(res_info);
    852     0    stevel 			return (PO_FAIL);
    853     0    stevel 		}
    854     0    stevel 		res_info[j].ri_res = res[j];
    855     0    stevel 	}
    856     0    stevel 
    857     0    stevel 	/*
    858     0    stevel 	 * Firstly, for all resources that have size greater than min,
    859     0    stevel 	 * transfer all movable size above min to the default resource.
    860     0    stevel 	 */
    861     0    stevel 	for (j = 0; j < nelem; j++) {
    862     0    stevel 
    863     0    stevel 		uint64_t real_min;
    864     0    stevel 
    865     0    stevel 		/* compute the real minimum number of resources */
    866     0    stevel 		real_min = MAX(res_info[j].ri_pinned, res_info[j].ri_min);
    867     0    stevel 		if (res_info[j].ri_res != default_res &&
    868     0    stevel 		    res_info[j].ri_oldsize > real_min) {
    869     0    stevel 
    870     0    stevel 			uint64_t num;
    871     0    stevel 
    872     0    stevel 			num = res_info[j].ri_oldsize - real_min;
    873     0    stevel 			if (pool_resource_transfer(
    874     0    stevel 			    TO_CONF(TO_ELEM(default_res)),
    875     0    stevel 				res_info[j].ri_res, default_res, num) !=
    876     0    stevel 			    PO_SUCCESS) {
    877     0    stevel 				free(res_info);
    878     0    stevel 				return (PO_FAIL);
    879     0    stevel 			}
    880     0    stevel 		}
    881     0    stevel 	}
    882     0    stevel 	/*
    883     0    stevel 	 * Now, transfer resources below min from the default.
    884     0    stevel 	 */
    885     0    stevel 	for (j = 0; j < nelem; j++) {
    886     0    stevel 		/*
    887     0    stevel 		 * We don't want to interfere with resources which are reserved
    888     0    stevel 		 */
    889     0    stevel 		if (res_info[j].ri_res != default_res &&
    890     0    stevel 		    res_info[j].ri_oldsize < res_info[j].ri_min) {
    891     0    stevel 			if (pool_resource_transfer(
    892     0    stevel 			    TO_CONF(TO_ELEM(default_res)),
    893     0    stevel 			    default_res, res_info[j].ri_res,
    894     0    stevel 			    res_info[j].ri_min - res_info[j].ri_oldsize) !=
    895     0    stevel 			    PO_SUCCESS) {
    896     0    stevel 				free(res_info);
    897     0    stevel 				return (PO_FAIL);
    898     0    stevel 			}
    899     0    stevel 		}
    900     0    stevel 	}
    901     0    stevel 	free(res_info);
    902     0    stevel 	return (PO_SUCCESS);
    903     0    stevel }
    904     0    stevel 
    905     0    stevel /*
    906     0    stevel  * Allocate cpus to pset resource sets, favoring sets with higher importance.
    907     0    stevel  *
    908     0    stevel  * Step 1: Sort resource sets by decreasing importance, and load each sets
    909     0    stevel  *	   current size (oldsize), min, max, and number of pinned cpus.
    910     0    stevel  *	   Compute the total number of cpus by totaling oldsize.
    911     0    stevel  *
    912     0    stevel  * Step 2: Compute the newsize for each set:
    913     0    stevel  *
    914     0    stevel  * 	Give each set its min number of cpus.  This min may be greater than
    915     0    stevel  *	its pset.min due to pinned cpus. If there are more cpus than the total
    916     0    stevel  *	of all mins, then the surplus cpus are dealt round-robin to all sets
    917     0    stevel  *	(up to their max) in order of decreasing importance.  A set may be
    918     0    stevel  *	skipped during dealing because it started with more than its min due to
    919     0    stevel  * 	pinned cpus.  The dealing stops when there are no more cpus or all
    920     0    stevel  *	sets are at their max. If all sets are at their max, any remaining cpus
    921     0    stevel  *	are given to the default set.
    922     0    stevel  *
    923     0    stevel  * Step 3: Transfer cpus from sets with (oldsize > newsize) to sets with
    924     0    stevel  *	   (oldsize < newsize).
    925     0    stevel  */
    926     0    stevel int
    927     0    stevel pset_allocate_imp(pool_resource_t **res, uint_t nelem)
    928     0    stevel {
    929     0    stevel 	res_info_t *res_info;
    930     0    stevel 	res_info_t *default_res_info;
    931     0    stevel 	const pool_resource_t *default_res = NULL;
    932     0    stevel 	uint64_t tot_resources = 0;	/* total count of resources */
    933     0    stevel 	uint64_t tot_min = 0;		/* total of all resource set mins */
    934     0    stevel 	uint64_t num_to_deal = 0;	/* total resources above mins to deal */
    935     0    stevel 	uint64_t sets_maxed = 0;	/* number of resource sets dealt to  */
    936     0    stevel 					/* their max */
    937     0    stevel 	uint64_t sets_finished = 0;	/* number of resource sets that have */
    938     0    stevel 					/* size == newsize */
    939     0    stevel 	int donor, receiver;
    940     0    stevel 	int deal;
    941     0    stevel 	int j;
    942     0    stevel 	int ret = PO_SUCCESS;
    943     0    stevel 
    944     0    stevel 	/*
    945     0    stevel 	 * Build list of res_info_t's
    946     0    stevel 	 */
    947     0    stevel 	if ((res_info = calloc(nelem, sizeof (res_info_t))) == NULL) {
    948     0    stevel 		pool_seterror(POE_SYSTEM);
    949     0    stevel 		return (PO_FAIL);
    950     0    stevel 	}
    951     0    stevel 
    952     0    stevel 	/* Order resources by importance, most important being first */
    953     0    stevel 	qsort(res, nelem, sizeof (pool_resource_t *),
    954     0    stevel 	    resource_compare_by_descending_importance);
    955     0    stevel 
    956     0    stevel 	for (j = 0; j < nelem; j++) {
    957     0    stevel 
    958     0    stevel 		/* Track which resource is the default */
    959     0    stevel 		if (default_res == NULL &&
    960     0    stevel 		    resource_is_default(res[j]) == PO_TRUE) {
    961     0    stevel 			default_res = res[j];
    962     0    stevel 			default_res_info = &(res_info[j]);
    963     0    stevel 		}
    964     0    stevel 
    965     0    stevel 		/* Load sets' current values */
    966     0    stevel 		if (resource_get_max(res[j], &res_info[j].ri_max) == PO_FAIL ||
    967     0    stevel 		    resource_get_min(res[j], &res_info[j].ri_min) == PO_FAIL ||
    968     0    stevel 		    resource_get_size(res[j], &res_info[j].ri_oldsize) ==
    969     0    stevel 		    PO_FAIL ||
    970     0    stevel 		    resource_get_pinned(res[j],
    971     0    stevel 		    &res_info[j].ri_pinned) == PO_FAIL) {
    972     0    stevel 			free(res_info);
    973     0    stevel 			return (PO_FAIL);
    974     0    stevel 		}
    975     0    stevel 
    976     0    stevel 		/* Start each set's newsize out at their min. */
    977     0    stevel 		res_info[j].ri_newsize = res_info[j].ri_min;
    978     0    stevel 
    979     0    stevel 		/* pre-deal pinned resources that exceed min */
    980     0    stevel 		if (res_info[j].ri_pinned > res_info[j].ri_min) {
    981     0    stevel 			res_info[j].ri_newsize = res_info[j].ri_pinned;
    982     0    stevel 			res_info[j].ri_dealt =
    983     0    stevel 			    res_info[j].ri_newsize - res_info[j].ri_min;
    984     0    stevel 		}
    985     0    stevel 		res_info[j].ri_res = res[j];
    986     0    stevel 
    987     0    stevel 		/* Compute total number of resources to deal out */
    988     0    stevel 		tot_resources += res_info[j].ri_oldsize;
    989     0    stevel 		tot_min += res_info[j].ri_newsize;
    990     0    stevel 
    991     0    stevel #ifdef DEBUG
    992     0    stevel 		dprintf("res allocation details\n");
    993     0    stevel 		pool_elem_dprintf(TO_ELEM(res[j]));
    994     0    stevel 		dprintf("size=%llu\n", res_info[j].ri_oldsize);
    995     0    stevel #endif	/* DEBUG */
    996     0    stevel 	}
    997     0    stevel 
    998     0    stevel 	num_to_deal = tot_resources - tot_min;
    999     0    stevel 
   1000     0    stevel 	/*
   1001     0    stevel 	 * Deal one resource to each set, and then another, until all
   1002     0    stevel 	 * resources are dealt or all sets are at their max.
   1003     0    stevel 	 */
   1004     0    stevel 	for (deal = 1; num_to_deal > 0 && sets_maxed < nelem; deal++) {
   1005     0    stevel 		for (j = 0; j < nelem; j++) {
   1006     0    stevel 
   1007     0    stevel 			/*
   1008     0    stevel 			 * Skip this resource set if it has already been
   1009     0    stevel 			 * pre-dealt a resource due to pinned resources.
   1010     0    stevel 			 */
   1011     0    stevel 			if (res_info[j].ri_dealt >= deal)
   1012     0    stevel 				continue;
   1013     0    stevel 
   1014     0    stevel 			if (res_info[j].ri_newsize < res_info[j].ri_max) {
   1015     0    stevel 
   1016     0    stevel 				res_info[j].ri_dealt++;
   1017     0    stevel 				res_info[j].ri_newsize++;
   1018     0    stevel 				if (res_info[j].ri_newsize ==
   1019     0    stevel 				    res_info[j].ri_max)
   1020     0    stevel 					sets_maxed++;
   1021     0    stevel 
   1022     0    stevel 				num_to_deal--;
   1023     0    stevel 				if (num_to_deal == 0)
   1024     0    stevel 					break;
   1025     0    stevel 			}
   1026     0    stevel 		}
   1027     0    stevel 	}
   1028     0    stevel 
   1029     0    stevel 	/*
   1030     0    stevel 	 * If all resource sets are at their max, deal the remaining to the
   1031     0    stevel 	 * default resource set.
   1032     0    stevel 	 */
   1033     0    stevel 	if ((sets_maxed == nelem) && (num_to_deal > 0)) {
   1034     0    stevel 		default_res_info->ri_dealt += num_to_deal;
   1035     0    stevel 		default_res_info->ri_newsize += num_to_deal;
   1036     0    stevel 	}
   1037     0    stevel 
   1038     0    stevel 	/*
   1039     0    stevel 	 * Sort so that resource sets needing resources preced resource sets
   1040     0    stevel 	 * that have extra resources.  The sort function will also compute
   1041     0    stevel 	 * The quantity of resources that need to be transfered into or out
   1042     0    stevel 	 * of each set so that it's size == newsize.
   1043     0    stevel 	 */
   1044     0    stevel 	qsort(res_info, nelem, sizeof (res_info_t),
   1045     0    stevel 	    compute_size_to_transfer);
   1046     0    stevel 
   1047     0    stevel 	/*
   1048     0    stevel 	 * The donor index starts at the end of the resource set list and
   1049     0    stevel 	 * walks up.  The receiver index starts at the beginning of the
   1050     0    stevel 	 * resource set list and walks down.  Cpu's are transfered from the
   1051     0    stevel 	 * donors to the receivers until all sets have transfer == 0).
   1052     0    stevel 	 */
   1053     0    stevel 	donor = nelem - 1;
   1054     0    stevel 	receiver = 0;
   1055     0    stevel 
   1056     0    stevel 	/* Number of sets with transfer == 0 */
   1057     0    stevel 	sets_finished = 0;
   1058     0    stevel 
   1059     0    stevel 	/* Tranfer resources so that each set's size becomes newsize */
   1060     0    stevel 	for (;;) {
   1061     0    stevel 
   1062     0    stevel 		uint64_t ntrans;
   1063     0    stevel 		if (donor == receiver) {
   1064     0    stevel 			if (res_info[donor].ri_transfer != 0) {
   1065     0    stevel 				free(res_info);
   1066     0    stevel 				return (PO_FAIL);
   1067     0    stevel 			}
   1068     0    stevel 			sets_finished++;
   1069     0    stevel 			break;
   1070     0    stevel 		}
   1071     0    stevel 		if (res_info[donor].ri_transfer == 0) {
   1072     0    stevel 			sets_finished++;
   1073     0    stevel 			donor--;
   1074     0    stevel 			continue;
   1075     0    stevel 		}
   1076     0    stevel 		if (res_info[receiver].ri_transfer == 0) {
   1077     0    stevel 			sets_finished++;
   1078     0    stevel 			receiver++;
   1079     0    stevel 			continue;
   1080     0    stevel 		}
   1081     0    stevel 
   1082     0    stevel 		/* Transfer resources from the donor set to the receiver */
   1083     0    stevel 		ntrans = MIN(res_info[donor].ri_transfer,
   1084     0    stevel 			    -res_info[receiver].ri_transfer);
   1085     0    stevel 
   1086     0    stevel 		if (pool_resource_transfer(
   1087     0    stevel 			TO_CONF(TO_ELEM(res_info[donor].ri_res)),
   1088     0    stevel 			    res_info[donor].ri_res, res_info[receiver].ri_res,
   1089     0    stevel 			    ntrans) != PO_SUCCESS) {
   1090     0    stevel 				free(res_info);
   1091     0    stevel 				return (PO_FAIL);
   1092     0    stevel 			}
   1093     0    stevel 		res_info[donor].ri_transfer -= ntrans;
   1094     0    stevel 		res_info[receiver].ri_transfer += ntrans;
   1095     0    stevel 	}
   1096     0    stevel 
   1097     0    stevel 	if (sets_finished != nelem)
   1098     0    stevel 		ret = PO_FAIL;
   1099     0    stevel 
   1100     0    stevel 	free(res_info);
   1101     0    stevel 	return (ret);
   1102     0    stevel }
   1103     0    stevel 
   1104     0    stevel /*
   1105     0    stevel  * Used as a qsort parameter to help order resources in terms of their
   1106     0    stevel  * importance, higher importance being first.
   1107     0    stevel  */
   1108     0    stevel int
   1109     0    stevel resource_compare_by_descending_importance(const void *arg1, const void *arg2)
   1110     0    stevel {
   1111     0    stevel 	pool_elem_t *elem1;
   1112     0    stevel 	pool_elem_t *elem2;
   1113     0    stevel 	pool_resource_t **res1 = (pool_resource_t **)arg1;
   1114     0    stevel 	pool_resource_t **res2 = (pool_resource_t **)arg2;
   1115     0    stevel 	pool_value_t val = POOL_VALUE_INITIALIZER;
   1116     0    stevel 	int64_t i1 = 0, i2 = 0;
   1117     0    stevel 
   1118     0    stevel 	elem1 = TO_ELEM(*res1);
   1119     0    stevel 	elem2 = TO_ELEM(*res2);
   1120     0    stevel 
   1121     0    stevel 	if (pool_get_property(TO_CONF(elem1), elem1, "_importance", &val) ==
   1122     0    stevel 	    POC_INT)
   1123     0    stevel 		(void) pool_value_get_int64(&val, &i1);
   1124     0    stevel 
   1125     0    stevel 	if (pool_get_property(TO_CONF(elem2), elem2, "_importance", &val) ==
   1126     0    stevel 	    POC_INT)
   1127     0    stevel 		(void) pool_value_get_int64(&val, &i2);
   1128     0    stevel 	return (i1 > i2 ? -1 : (i1 < i2 ? 1 : 0));
   1129     0    stevel }
   1130     0    stevel 
   1131     0    stevel /*
   1132     0    stevel  * Sort in increasing order so that resource sets with extra resources are at
   1133     0    stevel  * the end and resource sets needing resources are at the beginning.
   1134     0    stevel  */
   1135     0    stevel int
   1136     0    stevel compute_size_to_transfer(const void *arg1, const void *arg2)
   1137     0    stevel {
   1138     0    stevel 	res_info_t *r1 = (res_info_t *)arg1, *r2 = (res_info_t *)arg2;
   1139     0    stevel 	r1->ri_transfer = (int64_t)r1->ri_oldsize - (int64_t)r1->ri_newsize;
   1140     0    stevel 	r2->ri_transfer = (int64_t)r2->ri_oldsize - (int64_t)r2->ri_newsize;
   1141     0    stevel 	return (r1->ri_transfer > r2->ri_transfer ? 1 :
   1142     0    stevel 	    (r1->ri_transfer < r2->ri_transfer ? -1 : 0));
   1143     0    stevel }
   1144     0    stevel 
   1145     0    stevel /*
   1146     0    stevel  * set_importance_cb() is used to create "_importance" props on each
   1147     0    stevel  * resource associated with a pool.
   1148     0    stevel  *
   1149     0    stevel  * Returns PO_SUCCESS/PO_FAIL
   1150     0    stevel  */
   1151     0    stevel /*ARGSUSED*/
   1152     0    stevel static int
   1153     0    stevel set_importance_cb(pool_conf_t *conf, pool_t *pool, void *unused)
   1154     0    stevel {
   1155     0    stevel 	pool_value_t val = POOL_VALUE_INITIALIZER;
   1156     0    stevel 	int64_t importance;
   1157     0    stevel 	pool_resource_t **res;
   1158     0    stevel 	uint_t nelem, i;
   1159     0    stevel 
   1160     0    stevel 	if (pool_get_property(conf, TO_ELEM(pool), "pool.importance", &val) !=
   1161     0    stevel 	    POC_INT) {
   1162     0    stevel 		pool_seterror(POE_INVALID_CONF);
   1163     0    stevel 		return (PO_FAIL);
   1164     0    stevel 	}
   1165     0    stevel 	(void) pool_value_get_int64(&val, &importance);
   1166     0    stevel 	if ((res = pool_query_pool_resources(conf, pool, &nelem, NULL)) ==
   1167     0    stevel 	    NULL) {
   1168     0    stevel 		return (PO_FAIL);
   1169     0    stevel 	}
   1170     0    stevel 	for (i = 0; res[i] != NULL; i++) {
   1171     0    stevel 		int64_t old_importance = INT64_MIN;
   1172     0    stevel 		pool_elem_t *elem = TO_ELEM(res[i]);
   1173     0    stevel 
   1174     0    stevel 		if (pool_get_property(conf, elem, "_importance", &val) ==
   1175     0    stevel 		    POC_INT)
   1176     0    stevel 			(void) pool_value_get_int64(&val, &old_importance);
   1177     0    stevel 		if (old_importance <= importance) {
   1178     0    stevel 			(void) pool_value_set_int64(&val, importance);
   1179     0    stevel 			(void) pool_put_property(conf, elem, "_importance",
   1180     0    stevel 			    &val);
   1181     0    stevel 		}
   1182     0    stevel 	}
   1183     0    stevel 	free(res);
   1184     0    stevel 	return (PO_SUCCESS);
   1185     0    stevel }
   1186     0    stevel 
   1187     0    stevel /*
   1188     0    stevel  * unset_importance_cb() is used to remove "_importance" props from
   1189     0    stevel  * each resource associated with a pool.
   1190     0    stevel  *
   1191     0    stevel  * Returns PO_SUCCESS/PO_FAIL
   1192     0    stevel  */
   1193     0    stevel /*ARGSUSED*/
   1194     0    stevel static int
   1195     0    stevel unset_importance_cb(pool_conf_t *conf, pool_t *pool, void *unused)
   1196     0    stevel {
   1197     0    stevel 	pool_resource_t **res;
   1198     0    stevel 	uint_t nelem, i;
   1199     0    stevel 
   1200     0    stevel 	if ((res = pool_query_pool_resources(conf, pool, &nelem, NULL)) ==
   1201     0    stevel 	    NULL) {
   1202     0    stevel 		return (PO_FAIL);
   1203     0    stevel 	}
   1204     0    stevel 	for (i = 0; res[i] != NULL; i++) {
   1205     0    stevel 		if (pool_rm_property(conf, TO_ELEM(res[i]), "_importance") ==
   1206     0    stevel 		    PO_FAIL) {
   1207     0    stevel 			free(res);
   1208     0    stevel 			return (PO_FAIL);
   1209     0    stevel 		}
   1210     0    stevel 	}
   1211     0    stevel 	free(res);
   1212     0    stevel 	return (PO_SUCCESS);
   1213     0    stevel }
   1214     0    stevel 
   1215     0    stevel /*
   1216     0    stevel  * add_importance_props() is used to create "_importance" props on
   1217     0    stevel  * each resource associated with a pool.
   1218     0    stevel  *
   1219     0    stevel  * Returns PO_SUCCESS/PO_FAIL
   1220     0    stevel  */
   1221     0    stevel static int
   1222     0    stevel add_importance_props(pool_conf_t *conf)
   1223     0    stevel {
   1224     0    stevel 	return (pool_walk_pools(conf, NULL, set_importance_cb));
   1225     0    stevel }
   1226     0    stevel 
   1227     0    stevel /*
   1228     0    stevel  * remove_importance_props() is used to remove "_importance" props on
   1229     0    stevel  * each resource associated with a pool.
   1230     0    stevel  *
   1231     0    stevel  * Returns PO_SUCCESS/PO_FAIL
   1232     0    stevel  */
   1233     0    stevel static int
   1234     0    stevel remove_importance_props(pool_conf_t *conf)
   1235     0    stevel {
   1236     0    stevel 	return (pool_walk_pools(conf, NULL, unset_importance_cb));
   1237     0    stevel }
   1238     0    stevel 
   1239     0    stevel /*
   1240     0    stevel  * pool_conf_commit_sys() takes a configuration and modifies both the
   1241     0    stevel  * supplied configuration and the dynamic configuration. The goal of
   1242     0    stevel  * this modification is to generate a dynamic configuration which best
   1243     0    stevel  * represents the constraints laid down in the static configuration
   1244     0    stevel  * and to update the static configuration with the results of this
   1245     0    stevel  * process.
   1246     0    stevel  *
   1247     0    stevel  * Returns PO_SUCCESS/PO_FAIL
   1248     0    stevel  */
   1249     0    stevel int
   1250     0    stevel pool_conf_commit_sys(pool_conf_t *conf, int validate)
   1251     0    stevel {
   1252     0    stevel 	pool_conf_t *dyn;
   1253     0    stevel 
   1254     0    stevel 	if ((dyn = pool_conf_alloc()) == NULL)
   1255     0    stevel 		return (PO_FAIL);
   1256     0    stevel 	if (pool_conf_open(dyn, pool_dynamic_location(), PO_RDWR) !=
   1257     0    stevel 	    PO_SUCCESS) {
   1258     0    stevel 		pool_conf_free(dyn);
   1259     0    stevel 		return (PO_FAIL);
   1260     0    stevel 	}
   1261     0    stevel 	if (validate == PO_TRUE) {
   1262     0    stevel 		if (pool_conf_validate(conf, POV_RUNTIME) != PO_SUCCESS) {
   1263     0    stevel 			(void) pool_conf_close(dyn);
   1264     0    stevel 			pool_conf_free(dyn);
   1265     0    stevel 			return (PO_FAIL);
   1266     0    stevel 		}
   1267     0    stevel 	}
   1268     0    stevel 	/*
   1269     0    stevel 	 * Now try to make the two things "the same".
   1270     0    stevel 	 */
   1271     0    stevel 	if (diff_and_fix(conf, dyn) != PO_SUCCESS) {
   1272     0    stevel 		(void) pool_conf_close(dyn);
   1273     0    stevel 		pool_conf_free(dyn);
   1274     0    stevel 		pool_seterror(POE_INVALID_CONF);
   1275     0    stevel 		return (PO_FAIL);
   1276     0    stevel 	}
   1277     0    stevel 	if (dyn->pc_prov->pc_commit(dyn) != PO_SUCCESS) {
   1278     0    stevel 		(void) pool_conf_close(dyn);
   1279     0    stevel 		pool_conf_free(dyn);
   1280     0    stevel 		return (PO_FAIL);
   1281     0    stevel 	}
   1282     0    stevel 	(void) pool_conf_close(dyn);
   1283     0    stevel 	pool_conf_free(dyn);
   1284     0    stevel 	return (PO_SUCCESS);
   1285     0    stevel }
   1286     0    stevel 
   1287     0    stevel /*
   1288     0    stevel  * Copies all properties from one element to another. If the property
   1289     0    stevel  * is a readonly property, then don't copy it.
   1290     0    stevel  */
   1291     0    stevel /* ARGSUSED */
   1292     0    stevel static int
   1293     0    stevel clone_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
   1294     0    stevel     pool_value_t *pv, void *user)
   1295     0    stevel {
   1296     0    stevel 	pool_elem_t *tgt = (pool_elem_t *)user;
   1297     0    stevel 	const pool_prop_t *prop;
   1298     0    stevel #ifdef DEBUG
   1299     0    stevel 	dprintf("Cloning %s from %s\n",
   1300     0    stevel 	    pool_conf_location(TO_CONF(TO_ELEM(tgt))),
   1301     0    stevel 	    pool_conf_location(TO_CONF(pe)));
   1302     0    stevel 	assert(TO_CONF(TO_ELEM(tgt)) != TO_CONF(pe));
   1303     0    stevel 	dprintf("clone_element: Processing %s\n", name);
   1304     0    stevel 	pool_value_dprintf(pv);
   1305     0    stevel #endif	/* DEBUG */
   1306     0    stevel 	/*
   1307     0    stevel 	 * Some properties should be ignored
   1308     0    stevel 	 */
   1309     0    stevel 	if ((prop = provider_get_prop(pe, name)) != NULL &&
   1310     0    stevel 	    prop_is_readonly(prop) == PO_TRUE)
   1311     0    stevel 		return (PO_SUCCESS);
   1312  3247  gjelinek 
   1313  3247  gjelinek 	/* The temporary property needs special handling */
   1314  3247  gjelinek 	if (strstr(name, ".temporary") != NULL)
   1315  3247  gjelinek 		return (pool_set_temporary(TO_CONF(tgt), tgt) ==
   1316  3247  gjelinek 		    PO_FAIL ?  PO_FAIL : PO_SUCCESS);
   1317  3247  gjelinek 	else
   1318  3247  gjelinek 		return (pool_put_property(TO_CONF(tgt), tgt, name, pv) ==
   1319  3247  gjelinek 		    PO_FAIL ? PO_FAIL : PO_SUCCESS);
   1320     0    stevel }
   1321     0    stevel 
   1322     0    stevel /*
   1323     0    stevel  * Removes all properties from one element. Properties which are
   1324     0    stevel  * managed by the configuration are ignored.
   1325     0    stevel  */
   1326     0    stevel /* ARGSUSED3 */
   1327     0    stevel static int
   1328     0    stevel clean_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
   1329     0    stevel     pool_value_t *pv, void *user)
   1330     0    stevel {
   1331     0    stevel 	const pool_prop_t *prop;
   1332     0    stevel 	/*
   1333     0    stevel 	 * Some properties should be ignored
   1334     0    stevel 	 */
   1335  3247  gjelinek 	if (strstr(name, ".temporary") != NULL ||
   1336  3247  gjelinek 	    ((prop = provider_get_prop(pe, name)) != NULL &&
   1337  3247  gjelinek 	    prop_is_optional(prop) == PO_FALSE))
   1338     0    stevel 		return (PO_SUCCESS);
   1339     0    stevel 	return (pool_rm_property(conf, (pool_elem_t *)pe, name) == PO_FAIL);
   1340     0    stevel }
   1341