Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/atomic.h>
     27 #include <sys/cmn_err.h>
     28 #include <sys/id_space.h>
     29 #include <sys/kmem.h>
     30 #include <sys/kstat.h>
     31 #include <sys/log.h>
     32 #include <sys/modctl.h>
     33 #include <sys/modhash.h>
     34 #include <sys/mutex.h>
     35 #include <sys/proc.h>
     36 #include <sys/procset.h>
     37 #include <sys/project.h>
     38 #include <sys/resource.h>
     39 #include <sys/rctl.h>
     40 #include <sys/siginfo.h>
     41 #include <sys/strlog.h>
     42 #include <sys/systm.h>
     43 #include <sys/task.h>
     44 #include <sys/types.h>
     45 #include <sys/policy.h>
     46 #include <sys/zone.h>
     47 
     48 /*
     49  * Resource controls (rctls)
     50  *
     51  *   The rctl subsystem provides a mechanism for kernel components to
     52  *   register their individual resource controls with the system as a whole,
     53  *   such that those controls can subscribe to specific actions while being
     54  *   associated with the various process-model entities provided by the kernel:
     55  *   the process, the task, the project, and the zone.  (In principle, only
     56  *   minor modifications would be required to connect the resource control
     57  *   functionality to non-process-model entities associated with the system.)
     58  *
     59  *   Subsystems register their rctls via rctl_register().  Subsystems
     60  *   also wishing to provide additional limits on a given rctl can modify
     61  *   them once they have the rctl handle.  Each subsystem should store the
     62  *   handle to their rctl for direct access.
     63  *
     64  *   A primary dictionary, rctl_dict, contains a hash of id to the default
     65  *   control definition for each controlled resource-entity pair on the system.
     66  *   A secondary dictionary, rctl_dict_by_name, contains a hash of name to
     67  *   resource control handles.  The resource control handles are distributed by
     68  *   the rctl_ids ID space.  The handles are private and not to be
     69  *   advertised to userland; all userland interactions are via the rctl
     70  *   names.
     71  *
     72  *   Entities inherit their rctls from their predecessor.  Since projects have
     73  *   no ancestor, they inherit their rctls from the rctl dict for project
     74  *   rctls.  It is expected that project controls will be set to their
     75  *   appropriate values shortly after project creation, presumably from a
     76  *   policy source such as the project database.
     77  *
     78  * Data structures
     79  *   The rctl_set_t attached to each of the process model entities is a simple
     80  *   hash table keyed on the rctl handle assigned at registration.  The entries
     81  *   in the hash table are rctl_t's, whose relationship with the active control
     82  *   values on that resource and with the global state of the resource we
     83  *   illustrate below:
     84  *
     85  *   rctl_dict[key] --> rctl_dict_entry
     86  *			   ^
     87  *			   |
     88  *			+--+---+
     89  *   rctl_set[key] ---> | rctl | --> value <-> value <-> system value --> NULL
     90  *			+--+---+		 ^
     91  *			   |			 |
     92  *			   +------- cursor ------+
     93  *
     94  *   That is, the rctl contains a back pointer to the global resource control
     95  *   state for this resource, which is also available in the rctl_dict hash
     96  *   table mentioned earlier.  The rctl contains two pointers to resource
     97  *   control values:  one, values, indicates the entire sequence of control
     98  *   values; the other, cursor, indicates the currently active control
     99  *   value--the next value to be enforced.  The value list itself is an open,
    100  *   doubly-linked list, the last non-NULL member of which is the system value
    101  *   for that resource (being the theoretical/conventional maximum allowable
    102  *   value for the resource on this OS instance).
    103  *
    104  * Ops Vector
    105  *   Subsystems publishing rctls need not provide instances of all of the
    106  *   functions specified by the ops vector.  In particular, if general
    107  *   rctl_*() entry points are not being called, certain functions can be
    108  *   omitted.  These align as follows:
    109  *
    110  *   rctl_set()
    111  *     You may wish to provide a set callback if locking circumstances prevent
    112  *     it or if the performance cost of requesting the enforced value from the
    113  *     resource control is prohibitively expensive.  For instance, the currently
    114  *     enforced file size limit is stored on the process in the p_fsz_ctl to
    115  *     maintain read()/write() performance.
    116  *
    117  *   rctl_test()
    118  *     You must provide a test callback if you are using the rctl_test()
    119  *     interface.  An action callback is optional.
    120  *
    121  *   rctl_action()
    122  *     You may wish to provide an action callback.
    123  *
    124  * Registration
    125  *   New resource controls can be added to a running instance by loaded modules
    126  *   via registration.  (The current implementation does not support unloadable
    127  *   modules; this functionality can be added if needed, via an
    128  *   activation/deactivation interface involving the manipulation of the
    129  *   ops vector for the resource control(s) needing to support unloading.)
    130  *
    131  * Control value ordering
    132  *   Because the rctl_val chain on each rctl must be navigable in a
    133  *   deterministic way, we have to define an ordering on the rctl_val_t's.  The
    134  *   defined order is (flags & [maximal], value, flags & [deny-action],
    135  *   privilege).
    136  *
    137  * Locking
    138  *   rctl_dict_lock must be acquired prior to rctl_lists_lock.  Since
    139  *   rctl_dict_lock or rctl_lists_lock can be called at the enforcement point
    140  *   of any subsystem, holding subsystem locks, it is at all times inappropriate
    141  *   to call kmem_alloc(., KM_SLEEP) while holding either of these locks.
    142  *   Traversing any of the various resource control entity lists requires
    143  *   holding rctl_lists_lock.
    144  *
    145  *   Each individual resource control set associated with an entity must have
    146  *   its rcs_lock held for the duration of any operations that would add
    147  *   resource controls or control values to the set.
    148  *
    149  *   The locking subsequence of interest is: p_lock, rctl_dict_lock,
    150  *   rctl_lists_lock, entity->rcs_lock.
    151  *
    152  * The projects(4) database and project entity resource controls
    153  *   A special case is made for RCENTITY_PROJECT values set through the
    154  *   setproject(3PROJECT) interface.  setproject() makes use of a private
    155  *   interface, setprojrctl(), which passes through an array of resource control
    156  *   blocks that need to be set while holding the entity->rcs_lock.  This
    157  *   ensures that the act of modifying a project's resource controls is
    158  *   "atomic" within the kernel.
    159  *
    160  *   Within the rctl sub-system, we provide two interfaces that are only used by
    161  *   the setprojrctl() code path - rctl_local_insert_all() and
    162  *   rctl_local_replace_all().  rctl_local_insert_all() will ensure that the
    163  *   resource values specified in *new_values are applied.
    164  *   rctl_local_replace_all() will purge the current rctl->rc_projdb and
    165  *   rctl->rc_values entries, and apply the *new_values.
    166  *
    167  *   These functions modify not only the linked list of active resource controls
    168  *   (rctl->rc_values), but also a "cached" linked list (rctl->rc_projdb) of
    169  *   values set through these interfaces.  To clarify:
    170  *
    171  *      rctl->rc_values - a linked list of rctl_val_t.  These are the active
    172  *      resource values associated with this rctl, and may have been set by
    173  *      setrctl() - via prctl(1M), or by setprojrctl() - via
    174  *      setproject(3PROJECT).
    175  *
    176  *      rctl->rc_projdb - a linked list of rctl_val_t.  These reflect the
    177  *      resource values set by the setprojrctl() code path.  rc_projdb is not
    178  *      referenced by any other component of the rctl sub-system.
    179  *
    180  *   As various locks are held when calling these functions, we ensure that all
    181  *   the possible memory allocations are performed prior to calling the
    182  *   function.  *alloc_values is a linked list of uninitialized rctl_val_t,
    183  *   which may be used to duplicate a new resource control value (passed in as
    184  *   one of the members of the *new_values linked list), in order to populate
    185  *   rctl->rc_values.
    186  */
    187 
    188 id_t max_rctl_hndl = 32768;
    189 int rctl_dict_size = 64;
    190 int rctl_set_size = 8;
    191 kmutex_t rctl_dict_lock;
    192 mod_hash_t *rctl_dict;
    193 mod_hash_t *rctl_dict_by_name;
    194 id_space_t *rctl_ids;
    195 kmem_cache_t *rctl_cache;	/* kmem cache for rctl structures */
    196 kmem_cache_t *rctl_val_cache;	/* kmem cache for rctl values */
    197 
    198 kmutex_t rctl_lists_lock;
    199 rctl_dict_entry_t *rctl_lists[RC_MAX_ENTITY + 1];
    200 
    201 /*
    202  * Default resource control operations and ops vector
    203  *   To be used if the particular rcontrol has no specific actions defined, or
    204  *   if the subsystem providing the control is quiescing (in preparation for
    205  *   unloading, presumably.)
    206  *
    207  *   Resource controls with callbacks should fill the unused operations with the
    208  *   appropriate default impotent callback.
    209  */
    210 /*ARGSUSED*/
    211 void
    212 rcop_no_action(struct rctl *r, struct proc *p, rctl_entity_p_t *e)
    213 {
    214 }
    215 
    216 /*ARGSUSED*/
    217 rctl_qty_t
    218 rcop_no_usage(struct rctl *r, struct proc *p)
    219 {
    220 	return (0);
    221 }
    222 
    223 /*ARGSUSED*/
    224 int
    225 rcop_no_set(struct rctl *r, struct proc *p, rctl_entity_p_t *e, rctl_qty_t l)
    226 {
    227 	return (0);
    228 }
    229 
    230 /*ARGSUSED*/
    231 int
    232 rcop_no_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e,
    233     struct rctl_val *rv, rctl_qty_t i, uint_t f)
    234 {
    235 	return (0);
    236 }
    237 
    238 rctl_ops_t rctl_default_ops = {
    239 	rcop_no_action,
    240 	rcop_no_usage,
    241 	rcop_no_set,
    242 	rcop_no_test
    243 };
    244 
    245 /*
    246  * Default "absolute" resource control operation and ops vector
    247  *   Useful if there is no usage associated with the
    248  *   resource control.
    249  */
    250 /*ARGSUSED*/
    251 int
    252 rcop_absolute_test(struct rctl *r, struct proc *p, rctl_entity_p_t *e,
    253     struct rctl_val *rv, rctl_qty_t i, uint_t f)
    254 {
    255 	return (i > rv->rcv_value);
    256 }
    257 
    258 rctl_ops_t rctl_absolute_ops = {
    259 	rcop_no_action,
    260 	rcop_no_usage,
    261 	rcop_no_set,
    262 	rcop_absolute_test
    263 };
    264 
    265 /*ARGSUSED*/
    266 static uint_t
    267 rctl_dict_hash_by_id(void *hash_data, mod_hash_key_t key)
    268 {
    269 	return ((uint_t)(uintptr_t)key % rctl_dict_size);
    270 }
    271 
    272 static int
    273 rctl_dict_id_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
    274 {
    275 	uint_t u1 = (uint_t)(uintptr_t)key1;
    276 	uint_t u2 = (uint_t)(uintptr_t)key2;
    277 
    278 	if (u1 > u2)
    279 		return (1);
    280 
    281 	if (u1 == u2)
    282 		return (0);
    283 
    284 	return (-1);
    285 }
    286 
    287 static void
    288 rctl_dict_val_dtor(mod_hash_val_t val)
    289 {
    290 	rctl_dict_entry_t *kr = (rctl_dict_entry_t *)val;
    291 
    292 	kmem_free(kr, sizeof (rctl_dict_entry_t));
    293 }
    294 
    295 /*
    296  * size_t rctl_build_name_buf()
    297  *
    298  * Overview
    299  *   rctl_build_name_buf() walks all active resource controls in the dictionary,
    300  *   building a buffer of continguous NUL-terminated strings.
    301  *
    302  * Return values
    303  *   The size of the buffer is returned, the passed pointer's contents are
    304  *   modified to that of the location of the buffer.
    305  *
    306  * Caller's context
    307  *   Caller must be in a context suitable for KM_SLEEP allocations.
    308  */
    309 size_t
    310 rctl_build_name_buf(char **rbufp)
    311 {
    312 	size_t req_size, cpy_size;
    313 	char *rbufloc;
    314 	int i;
    315 
    316 rctl_rebuild_name_buf:
    317 	req_size = cpy_size = 0;
    318 
    319 	/*
    320 	 * Calculate needed buffer length.
    321 	 */
    322 	mutex_enter(&rctl_lists_lock);
    323 	for (i = 0; i < RC_MAX_ENTITY + 1; i++) {
    324 		rctl_dict_entry_t *rde;
    325 
    326 		for (rde = rctl_lists[i];
    327 		    rde != NULL;
    328 		    rde = rde->rcd_next)
    329 			req_size += strlen(rde->rcd_name) + 1;
    330 	}
    331 	mutex_exit(&rctl_lists_lock);
    332 
    333 	rbufloc = *rbufp = kmem_alloc(req_size, KM_SLEEP);
    334 
    335 	/*
    336 	 * Copy rctl names into our buffer.  If the copy length exceeds the
    337 	 * allocate length (due to registration changes), stop copying, free the
    338 	 * buffer, and start again.
    339 	 */
    340 	mutex_enter(&rctl_lists_lock);
    341 	for (i = 0; i < RC_MAX_ENTITY + 1; i++) {
    342 		rctl_dict_entry_t *rde;
    343 
    344 		for (rde = rctl_lists[i];
    345 		    rde != NULL;
    346 		    rde = rde->rcd_next) {
    347 			size_t length = strlen(rde->rcd_name) + 1;
    348 
    349 			cpy_size += length;
    350 
    351 			if (cpy_size > req_size) {
    352 				kmem_free(*rbufp, req_size);
    353 				mutex_exit(&rctl_lists_lock);
    354 				goto rctl_rebuild_name_buf;
    355 			}
    356 
    357 			bcopy(rde->rcd_name, rbufloc, length);
    358 			rbufloc += length;
    359 		}
    360 	}
    361 	mutex_exit(&rctl_lists_lock);
    362 
    363 	return (req_size);
    364 }
    365 
    366 /*
    367  * rctl_dict_entry_t *rctl_dict_lookup(const char *)
    368  *
    369  * Overview
    370  *   rctl_dict_lookup() returns the resource control dictionary entry for the
    371  *   named resource control.
    372  *
    373  * Return values
    374  *   A pointer to the appropriate resource control dictionary entry, or NULL if
    375  *   no such named entry exists.
    376  *
    377  * Caller's context
    378  *   Caller must not be holding rctl_dict_lock.
    379  */
    380 rctl_dict_entry_t *
    381 rctl_dict_lookup(const char *name)
    382 {
    383 	rctl_dict_entry_t *rde;
    384 
    385 	mutex_enter(&rctl_dict_lock);
    386 
    387 	if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name,
    388 	    (mod_hash_val_t *)&rde) == MH_ERR_NOTFOUND) {
    389 		mutex_exit(&rctl_dict_lock);
    390 		return (NULL);
    391 	}
    392 
    393 	mutex_exit(&rctl_dict_lock);
    394 
    395 	return (rde);
    396 }
    397 
    398 /*
    399  * rctl_hndl_t rctl_hndl_lookup(const char *)
    400  *
    401  * Overview
    402  *   rctl_hndl_lookup() returns the resource control id (the "handle") for the
    403  *   named resource control.
    404  *
    405  * Return values
    406  *   The appropriate id, or -1 if no such named entry exists.
    407  *
    408  * Caller's context
    409  *   Caller must not be holding rctl_dict_lock.
    410  */
    411 rctl_hndl_t
    412 rctl_hndl_lookup(const char *name)
    413 {
    414 	rctl_dict_entry_t *rde;
    415 
    416 	if ((rde = rctl_dict_lookup(name)) == NULL)
    417 		return (-1);
    418 
    419 	return (rde->rcd_id);
    420 }
    421 
    422 /*
    423  * rctl_dict_entry_t * rctl_dict_lookup_hndl(rctl_hndl_t)
    424  *
    425  * Overview
    426  *   rctl_dict_lookup_hndl() completes the public lookup functions, by returning
    427  *   the resource control dictionary entry matching a given resource control id.
    428  *
    429  * Return values
    430  *   A pointer to the matching resource control dictionary entry, or NULL if the
    431  *   id does not match any existing entries.
    432  *
    433  * Caller's context
    434  *   Caller must not be holding rctl_lists_lock.
    435  */
    436 rctl_dict_entry_t *
    437 rctl_dict_lookup_hndl(rctl_hndl_t hndl)
    438 {
    439 	uint_t i;
    440 
    441 	mutex_enter(&rctl_lists_lock);
    442 	for (i = 0; i < RC_MAX_ENTITY + 1; i++) {
    443 		rctl_dict_entry_t *rde;
    444 
    445 		for (rde = rctl_lists[i];
    446 		    rde != NULL;
    447 		    rde = rde->rcd_next)
    448 			if (rde->rcd_id == hndl) {
    449 				mutex_exit(&rctl_lists_lock);
    450 				return (rde);
    451 			}
    452 	}
    453 	mutex_exit(&rctl_lists_lock);
    454 
    455 	return (NULL);
    456 }
    457 
    458 /*
    459  * void rctl_add_default_limit(const char *name, rctl_qty_t value,
    460  *     rctl_priv_t privilege, uint_t action)
    461  *
    462  * Overview
    463  *   Create a default limit with specified value, privilege, and action.
    464  *
    465  * Return value
    466  *   No value returned.
    467  */
    468 void
    469 rctl_add_default_limit(const char *name, rctl_qty_t value,
    470     rctl_priv_t privilege, uint_t action)
    471 {
    472 	rctl_val_t *dval;
    473 	rctl_dict_entry_t *rde;
    474 
    475 	dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
    476 	bzero(dval, sizeof (rctl_val_t));
    477 	dval->rcv_value = value;
    478 	dval->rcv_privilege = privilege;
    479 	dval->rcv_flagaction = action;
    480 	dval->rcv_action_recip_pid = -1;
    481 
    482 	rde = rctl_dict_lookup(name);
    483 	(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
    484 }
    485 
    486 /*
    487  * void rctl_add_legacy_limit(const char *name, const char *mname,
    488  *     const char *lname, rctl_qty_t dflt)
    489  *
    490  * Overview
    491  *   Create a default privileged limit, using the value obtained from
    492  *   /etc/system if it exists and is greater than the specified default
    493  *   value.  Exists primarily for System V IPC.
    494  *
    495  * Return value
    496  *   No value returned.
    497  */
    498 void
    499 rctl_add_legacy_limit(const char *name, const char *mname, const char *lname,
    500     rctl_qty_t dflt, rctl_qty_t max)
    501 {
    502 	rctl_qty_t qty;
    503 
    504 	if (!mod_sysvar(mname, lname, &qty) || (qty < dflt))
    505 		qty = dflt;
    506 
    507 	if (qty > max)
    508 		qty = max;
    509 
    510 	rctl_add_default_limit(name, qty, RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY);
    511 }
    512 
    513 rctl_set_t *
    514 rctl_entity_obtain_rset(rctl_dict_entry_t *rcd, struct proc *p)
    515 {
    516 	rctl_set_t *rset = NULL;
    517 
    518 	if (rcd == NULL)
    519 		return (NULL);
    520 
    521 	switch (rcd->rcd_entity) {
    522 	case RCENTITY_PROCESS:
    523 		rset = p->p_rctls;
    524 		break;
    525 	case RCENTITY_TASK:
    526 		ASSERT(MUTEX_HELD(&p->p_lock));
    527 		if (p->p_task != NULL)
    528 			rset = p->p_task->tk_rctls;
    529 		break;
    530 	case RCENTITY_PROJECT:
    531 		ASSERT(MUTEX_HELD(&p->p_lock));
    532 		if (p->p_task != NULL &&
    533 		    p->p_task->tk_proj != NULL)
    534 			rset = p->p_task->tk_proj->kpj_rctls;
    535 		break;
    536 	case RCENTITY_ZONE:
    537 		ASSERT(MUTEX_HELD(&p->p_lock));
    538 		if (p->p_zone != NULL)
    539 			rset = p->p_zone->zone_rctls;
    540 		break;
    541 	default:
    542 		panic("unknown rctl entity type %d seen", rcd->rcd_entity);
    543 		break;
    544 	}
    545 
    546 	return (rset);
    547 }
    548 
    549 static void
    550 rctl_entity_obtain_entity_p(rctl_entity_t entity, struct proc *p,
    551     rctl_entity_p_t *e)
    552 {
    553 	e->rcep_p.proc = NULL;
    554 	e->rcep_t = entity;
    555 
    556 	switch (entity) {
    557 	case RCENTITY_PROCESS:
    558 		e->rcep_p.proc = p;
    559 		break;
    560 	case RCENTITY_TASK:
    561 		ASSERT(MUTEX_HELD(&p->p_lock));
    562 		if (p->p_task != NULL)
    563 			e->rcep_p.task = p->p_task;
    564 		break;
    565 	case RCENTITY_PROJECT:
    566 		ASSERT(MUTEX_HELD(&p->p_lock));
    567 		if (p->p_task != NULL &&
    568 		    p->p_task->tk_proj != NULL)
    569 			e->rcep_p.proj = p->p_task->tk_proj;
    570 		break;
    571 	case RCENTITY_ZONE:
    572 		ASSERT(MUTEX_HELD(&p->p_lock));
    573 		if (p->p_zone != NULL)
    574 			e->rcep_p.zone = p->p_zone;
    575 		break;
    576 	default:
    577 		panic("unknown rctl entity type %d seen", entity);
    578 		break;
    579 	}
    580 }
    581 
    582 static void
    583 rctl_gp_alloc(rctl_alloc_gp_t *rcgp)
    584 {
    585 	uint_t i;
    586 
    587 	if (rcgp->rcag_nctls > 0) {
    588 		rctl_t *prev = kmem_cache_alloc(rctl_cache, KM_SLEEP);
    589 		rctl_t *rctl = prev;
    590 
    591 		rcgp->rcag_ctls = prev;
    592 
    593 		for (i = 1; i < rcgp->rcag_nctls; i++) {
    594 			rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP);
    595 			prev->rc_next = rctl;
    596 			prev = rctl;
    597 		}
    598 
    599 		rctl->rc_next = NULL;
    600 	}
    601 
    602 	if (rcgp->rcag_nvals > 0) {
    603 		rctl_val_t *prev = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
    604 		rctl_val_t *rval = prev;
    605 
    606 		rcgp->rcag_vals = prev;
    607 
    608 		for (i = 1; i < rcgp->rcag_nvals; i++) {
    609 			rval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
    610 			prev->rcv_next = rval;
    611 			prev = rval;
    612 		}
    613 
    614 		rval->rcv_next = NULL;
    615 	}
    616 
    617 }
    618 
    619 static rctl_val_t *
    620 rctl_gp_detach_val(rctl_alloc_gp_t *rcgp)
    621 {
    622 	rctl_val_t *rval = rcgp->rcag_vals;
    623 
    624 	ASSERT(rcgp->rcag_nvals > 0);
    625 	rcgp->rcag_nvals--;
    626 	rcgp->rcag_vals = rval->rcv_next;
    627 
    628 	rval->rcv_next = NULL;
    629 
    630 	return (rval);
    631 }
    632 
    633 static rctl_t *
    634 rctl_gp_detach_ctl(rctl_alloc_gp_t *rcgp)
    635 {
    636 	rctl_t *rctl = rcgp->rcag_ctls;
    637 
    638 	ASSERT(rcgp->rcag_nctls > 0);
    639 	rcgp->rcag_nctls--;
    640 	rcgp->rcag_ctls = rctl->rc_next;
    641 
    642 	rctl->rc_next = NULL;
    643 
    644 	return (rctl);
    645 
    646 }
    647 
    648 static void
    649 rctl_gp_free(rctl_alloc_gp_t *rcgp)
    650 {
    651 	rctl_val_t *rval = rcgp->rcag_vals;
    652 	rctl_t *rctl = rcgp->rcag_ctls;
    653 
    654 	while (rval != NULL) {
    655 		rctl_val_t *next = rval->rcv_next;
    656 
    657 		kmem_cache_free(rctl_val_cache, rval);
    658 		rval = next;
    659 	}
    660 
    661 	while (rctl != NULL) {
    662 		rctl_t *next = rctl->rc_next;
    663 
    664 		kmem_cache_free(rctl_cache, rctl);
    665 		rctl = next;
    666 	}
    667 }
    668 
    669 /*
    670  * void rctl_prealloc_destroy(rctl_alloc_gp_t *)
    671  *
    672  * Overview
    673  *   Release all unused memory allocated via one of the "prealloc" functions:
    674  *   rctl_set_init_prealloc, rctl_set_dup_prealloc, or rctl_rlimit_set_prealloc.
    675  *
    676  * Return values
    677  *   None.
    678  *
    679  * Caller's context
    680  *   No restrictions on context.
    681  */
    682 void
    683 rctl_prealloc_destroy(rctl_alloc_gp_t *gp)
    684 {
    685 	rctl_gp_free(gp);
    686 	kmem_free(gp, sizeof (rctl_alloc_gp_t));
    687 }
    688 
    689 /*
    690  * int rctl_val_cmp(rctl_val_t *, rctl_val_t *, int)
    691  *
    692  * Overview
    693  *   This function defines an ordering to rctl_val_t's in order to allow
    694  *   for correct placement in value lists. When the imprecise flag is set,
    695  *   the action recipient is ignored. This is to facilitate insert,
    696  *   delete, and replace operations by rctlsys.
    697  *
    698  * Return values
    699  *   0 if the val_t's are are considered identical
    700  *   -1 if a is ordered lower than b
    701  *   1 if a is lowered higher than b
    702  *
    703  * Caller's context
    704  *   No restrictions on context.
    705  */
    706 int
    707 rctl_val_cmp(rctl_val_t *a, rctl_val_t *b, int imprecise)
    708 {
    709 	if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) <
    710 	    (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL))
    711 		return (-1);
    712 
    713 	if ((a->rcv_flagaction & RCTL_LOCAL_MAXIMAL) >
    714 	    (b->rcv_flagaction & RCTL_LOCAL_MAXIMAL))
    715 		return (1);
    716 
    717 	if (a->rcv_value < b->rcv_value)
    718 		return (-1);
    719 
    720 	if (a->rcv_value > b->rcv_value)
    721 		return (1);
    722 
    723 	if ((a->rcv_flagaction & RCTL_LOCAL_DENY) <
    724 	    (b->rcv_flagaction & RCTL_LOCAL_DENY))
    725 		return (-1);
    726 
    727 	if ((a->rcv_flagaction & RCTL_LOCAL_DENY) >
    728 	    (b->rcv_flagaction & RCTL_LOCAL_DENY))
    729 		return (1);
    730 
    731 	if (a->rcv_privilege < b->rcv_privilege)
    732 		return (-1);
    733 
    734 	if (a->rcv_privilege > b->rcv_privilege)
    735 		return (1);
    736 
    737 	if (imprecise)
    738 		return (0);
    739 
    740 	if (a->rcv_action_recip_pid < b->rcv_action_recip_pid)
    741 		return (-1);
    742 
    743 	if (a->rcv_action_recip_pid > b->rcv_action_recip_pid)
    744 		return (1);
    745 
    746 	return (0);
    747 }
    748 
    749 static rctl_val_t *
    750 rctl_val_list_find(rctl_val_t **head, rctl_val_t *cval)
    751 {
    752 	rctl_val_t *rval = *head;
    753 
    754 	while (rval != NULL) {
    755 		if (rctl_val_cmp(cval, rval, 0) == 0)
    756 			return (rval);
    757 
    758 		rval = rval->rcv_next;
    759 	}
    760 
    761 	return (NULL);
    762 
    763 }
    764 
    765 /*
    766  * int rctl_val_list_insert(rctl_val_t **, rctl_val_t *)
    767  *
    768  * Overview
    769  *   This function inserts the rctl_val_t into the value list provided.
    770  *   The insert is always successful unless if the value is a duplicate
    771  *   of one already in the list.
    772  *
    773  * Return values
    774  *    1 if the value was a duplicate of an existing value in the list.
    775  *    0 if the insert was successful.
    776  */
    777 int
    778 rctl_val_list_insert(rctl_val_t **root, rctl_val_t *rval)
    779 {
    780 	rctl_val_t *prev;
    781 	int equiv;
    782 
    783 	rval->rcv_next = NULL;
    784 	rval->rcv_prev = NULL;
    785 
    786 	if (*root == NULL) {
    787 		*root = rval;
    788 		return (0);
    789 	}
    790 
    791 	equiv = rctl_val_cmp(rval, *root, 0);
    792 
    793 	if (equiv == 0)
    794 		return (1);
    795 
    796 	if (equiv < 0) {
    797 		rval->rcv_next = *root;
    798 		rval->rcv_next->rcv_prev = rval;
    799 		*root = rval;
    800 
    801 		return (0);
    802 	}
    803 
    804 	prev = *root;
    805 	while (prev->rcv_next != NULL &&
    806 	    (equiv = rctl_val_cmp(rval, prev->rcv_next, 0)) > 0) {
    807 		prev = prev->rcv_next;
    808 	}
    809 
    810 	if (equiv == 0)
    811 		return (1);
    812 
    813 	rval->rcv_next = prev->rcv_next;
    814 	if (rval->rcv_next != NULL)
    815 		rval->rcv_next->rcv_prev = rval;
    816 	prev->rcv_next = rval;
    817 	rval->rcv_prev = prev;
    818 
    819 	return (0);
    820 }
    821 
    822 static int
    823 rctl_val_list_delete(rctl_val_t **root, rctl_val_t *rval)
    824 {
    825 	rctl_val_t *prev;
    826 
    827 	if (*root == NULL)
    828 		return (-1);
    829 
    830 	prev = *root;
    831 	if (rctl_val_cmp(rval, prev, 0) == 0) {
    832 		*root = prev->rcv_next;
    833 		if (*root != NULL)
    834 			(*root)->rcv_prev = NULL;
    835 
    836 		kmem_cache_free(rctl_val_cache, prev);
    837 
    838 		return (0);
    839 	}
    840 
    841 	while (prev->rcv_next != NULL &&
    842 	    rctl_val_cmp(rval, prev->rcv_next, 0) != 0) {
    843 		prev = prev->rcv_next;
    844 	}
    845 
    846 	if (prev->rcv_next == NULL) {
    847 		/*
    848 		 * If we navigate the entire list and cannot find a match, then
    849 		 * return failure.
    850 		 */
    851 		return (-1);
    852 	}
    853 
    854 	prev = prev->rcv_next;
    855 	prev->rcv_prev->rcv_next = prev->rcv_next;
    856 	if (prev->rcv_next != NULL)
    857 		prev->rcv_next->rcv_prev = prev->rcv_prev;
    858 
    859 	kmem_cache_free(rctl_val_cache, prev);
    860 
    861 	return (0);
    862 }
    863 
    864 static rctl_val_t *
    865 rctl_val_list_dup(rctl_val_t *rval, rctl_alloc_gp_t *ragp, struct proc *oldp,
    866     struct proc *newp)
    867 {
    868 	rctl_val_t *head = NULL;
    869 
    870 	for (; rval != NULL; rval = rval->rcv_next) {
    871 		rctl_val_t *dval = rctl_gp_detach_val(ragp);
    872 
    873 		bcopy(rval, dval, sizeof (rctl_val_t));
    874 		dval->rcv_prev = dval->rcv_next = NULL;
    875 
    876 		if (oldp == NULL ||
    877 		    rval->rcv_action_recipient == NULL ||
    878 		    rval->rcv_action_recipient == oldp) {
    879 			if (rval->rcv_privilege == RCPRIV_BASIC) {
    880 				dval->rcv_action_recipient = newp;
    881 				dval->rcv_action_recip_pid = newp->p_pid;
    882 			} else {
    883 				dval->rcv_action_recipient = NULL;
    884 				dval->rcv_action_recip_pid = -1;
    885 			}
    886 
    887 			(void) rctl_val_list_insert(&head, dval);
    888 		} else {
    889 			kmem_cache_free(rctl_val_cache, dval);
    890 		}
    891 	}
    892 
    893 	return (head);
    894 }
    895 
    896 static void
    897 rctl_val_list_reset(rctl_val_t *rval)
    898 {
    899 	for (; rval != NULL; rval = rval->rcv_next)
    900 		rval->rcv_firing_time = 0;
    901 }
    902 
    903 static uint_t
    904 rctl_val_list_count(rctl_val_t *rval)
    905 {
    906 	uint_t n = 0;
    907 
    908 	for (; rval != NULL; rval = rval->rcv_next)
    909 		n++;
    910 
    911 	return (n);
    912 }
    913 
    914 
    915 static void
    916 rctl_val_list_free(rctl_val_t *rval)
    917 {
    918 	while (rval != NULL) {
    919 		rctl_val_t *next = rval->rcv_next;
    920 
    921 		kmem_cache_free(rctl_val_cache, rval);
    922 
    923 		rval = next;
    924 	}
    925 }
    926 
    927 /*
    928  * rctl_qty_t rctl_model_maximum(rctl_dict_entry_t *, struct proc *)
    929  *
    930  * Overview
    931  *   In cases where the operating system supports more than one process
    932  *   addressing model, the operating system capabilities will exceed those of
    933  *   one or more of these models.  Processes in a less capable model must have
    934  *   their resources accurately controlled, without diluting those of their
    935  *   descendants reached via exec().  rctl_model_maximum() returns the governing
    936  *   value for the specified process with respect to a resource control, such
    937  *   that the value can used for the RCTLOP_SET callback or compatability
    938  *   support.
    939  *
    940  * Return values
    941  *   The maximum value for the given process for the specified resource control.
    942  *
    943  * Caller's context
    944  *   No restrictions on context.
    945  */
    946 rctl_qty_t
    947 rctl_model_maximum(rctl_dict_entry_t *rde, struct proc *p)
    948 {
    949 	if (p->p_model == DATAMODEL_NATIVE)
    950 		return (rde->rcd_max_native);
    951 
    952 	return (rde->rcd_max_ilp32);
    953 }
    954 
    955 /*
    956  * rctl_qty_t rctl_model_value(rctl_dict_entry_t *, struct proc *, rctl_qty_t)
    957  *
    958  * Overview
    959  *   Convenience function wrapping the rctl_model_maximum() functionality.
    960  *
    961  * Return values
    962  *   The lesser of the process's maximum value and the given value for the
    963  *   specified resource control.
    964  *
    965  * Caller's context
    966  *   No restrictions on context.
    967  */
    968 rctl_qty_t
    969 rctl_model_value(rctl_dict_entry_t *rde, struct proc *p, rctl_qty_t value)
    970 {
    971 	rctl_qty_t max = rctl_model_maximum(rde, p);
    972 
    973 	return (value < max ? value : max);
    974 }
    975 
    976 static void
    977 rctl_set_insert(rctl_set_t *set, rctl_hndl_t hndl, rctl_t *rctl)
    978 {
    979 	uint_t index = hndl % rctl_set_size;
    980 	rctl_t *next_ctl, *prev_ctl;
    981 
    982 	ASSERT(MUTEX_HELD(&set->rcs_lock));
    983 
    984 	rctl->rc_next = NULL;
    985 
    986 	if (set->rcs_ctls[index] == NULL) {
    987 		set->rcs_ctls[index] = rctl;
    988 		return;
    989 	}
    990 
    991 	if (hndl < set->rcs_ctls[index]->rc_id) {
    992 		rctl->rc_next = set->rcs_ctls[index];
    993 		set->rcs_ctls[index] = rctl;
    994 
    995 		return;
    996 	}
    997 
    998 	for (next_ctl = set->rcs_ctls[index]->rc_next,
    999 	    prev_ctl = set->rcs_ctls[index];
   1000 	    next_ctl != NULL;
   1001 	    prev_ctl = next_ctl,
   1002 	    next_ctl = next_ctl->rc_next) {
   1003 		if (next_ctl->rc_id > hndl) {
   1004 			rctl->rc_next = next_ctl;
   1005 			prev_ctl->rc_next = rctl;
   1006 
   1007 			return;
   1008 		}
   1009 	}
   1010 
   1011 	rctl->rc_next = next_ctl;
   1012 	prev_ctl->rc_next = rctl;
   1013 }
   1014 
   1015 /*
   1016  * rctl_set_t *rctl_set_create()
   1017  *
   1018  * Overview
   1019  *   Create an empty resource control set, suitable for attaching to a
   1020  *   controlled entity.
   1021  *
   1022  * Return values
   1023  *   A pointer to the newly created set.
   1024  *
   1025  * Caller's context
   1026  *   Safe for KM_SLEEP allocations.
   1027  */
   1028 rctl_set_t *
   1029 rctl_set_create()
   1030 {
   1031 	rctl_set_t *rset = kmem_zalloc(sizeof (rctl_set_t), KM_SLEEP);
   1032 
   1033 	mutex_init(&rset->rcs_lock, NULL, MUTEX_DEFAULT, NULL);
   1034 	rset->rcs_ctls = kmem_zalloc(rctl_set_size * sizeof (rctl_t *),
   1035 	    KM_SLEEP);
   1036 	rset->rcs_entity = -1;
   1037 
   1038 	return (rset);
   1039 }
   1040 
   1041 /*
   1042  * rctl_gp_alloc_t *rctl_set_init_prealloc(rctl_entity_t)
   1043  *
   1044  * Overview
   1045  *    rctl_set_init_prealloc() examines the globally defined resource controls
   1046  *    and their default values and returns a resource control allocation group
   1047  *    populated with sufficient controls and values to form a representative
   1048  *    resource control set for the specified entity.
   1049  *
   1050  * Return values
   1051  *    A pointer to the newly created allocation group.
   1052  *
   1053  * Caller's context
   1054  *    Caller must be in a context suitable for KM_SLEEP allocations.
   1055  */
   1056 rctl_alloc_gp_t *
   1057 rctl_set_init_prealloc(rctl_entity_t entity)
   1058 {
   1059 	rctl_dict_entry_t *rde;
   1060 	rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP);
   1061 
   1062 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
   1063 
   1064 	if (rctl_lists[entity] == NULL)
   1065 		return (ragp);
   1066 
   1067 	mutex_enter(&rctl_lists_lock);
   1068 
   1069 	for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) {
   1070 		ragp->rcag_nctls++;
   1071 		ragp->rcag_nvals += rctl_val_list_count(rde->rcd_default_value);
   1072 	}
   1073 
   1074 	mutex_exit(&rctl_lists_lock);
   1075 
   1076 	rctl_gp_alloc(ragp);
   1077 
   1078 	return (ragp);
   1079 }
   1080 
   1081 /*
   1082  * rctl_set_t *rctl_set_init(rctl_entity_t)
   1083  *
   1084  * Overview
   1085  *   rctl_set_create() creates a resource control set, initialized with the
   1086  *   system infinite values on all registered controls, for attachment to a
   1087  *   system entity requiring resource controls, such as a process or a task.
   1088  *
   1089  * Return values
   1090  *   A pointer to the newly filled set.
   1091  *
   1092  * Caller's context
   1093  *   Caller must be holding p_lock on entry so that RCTLOP_SET() functions
   1094  *   may modify task and project members based on the proc structure
   1095  *   they are passed.
   1096  */
   1097 rctl_set_t *
   1098 rctl_set_init(rctl_entity_t entity, struct proc *p, rctl_entity_p_t *e,
   1099     rctl_set_t *rset, rctl_alloc_gp_t *ragp)
   1100 {
   1101 	rctl_dict_entry_t *rde;
   1102 
   1103 	ASSERT(MUTEX_HELD(&p->p_lock));
   1104 	ASSERT(e);
   1105 	rset->rcs_entity = entity;
   1106 
   1107 	if (rctl_lists[entity] == NULL)
   1108 		return (rset);
   1109 
   1110 	mutex_enter(&rctl_lists_lock);
   1111 	mutex_enter(&rset->rcs_lock);
   1112 
   1113 	for (rde = rctl_lists[entity]; rde != NULL; rde = rde->rcd_next) {
   1114 		rctl_t *rctl = rctl_gp_detach_ctl(ragp);
   1115 
   1116 		rctl->rc_dict_entry = rde;
   1117 		rctl->rc_id = rde->rcd_id;
   1118 		rctl->rc_projdb = NULL;
   1119 
   1120 		rctl->rc_values = rctl_val_list_dup(rde->rcd_default_value,
   1121 		    ragp, NULL, p);
   1122 		rctl->rc_cursor = rctl->rc_values;
   1123 
   1124 		ASSERT(rctl->rc_cursor != NULL);
   1125 
   1126 		rctl_set_insert(rset, rde->rcd_id, rctl);
   1127 
   1128 		RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p,
   1129 		    rctl->rc_cursor->rcv_value));
   1130 	}
   1131 
   1132 	mutex_exit(&rset->rcs_lock);
   1133 	mutex_exit(&rctl_lists_lock);
   1134 
   1135 	return (rset);
   1136 }
   1137 
   1138 static rctl_t *
   1139 rctl_dup(rctl_t *rctl, rctl_alloc_gp_t *ragp, struct proc *oldp,
   1140     struct proc *newp)
   1141 {
   1142 	rctl_t *dup = rctl_gp_detach_ctl(ragp);
   1143 	rctl_val_t *dval;
   1144 
   1145 	dup->rc_id = rctl->rc_id;
   1146 	dup->rc_dict_entry = rctl->rc_dict_entry;
   1147 	dup->rc_next = NULL;
   1148 	dup->rc_cursor = NULL;
   1149 	dup->rc_values = rctl_val_list_dup(rctl->rc_values, ragp, oldp, newp);
   1150 
   1151 	for (dval = dup->rc_values;
   1152 	    dval != NULL; dval = dval->rcv_next) {
   1153 		if (rctl_val_cmp(rctl->rc_cursor, dval, 0) >= 0) {
   1154 			dup->rc_cursor = dval;
   1155 			break;
   1156 		}
   1157 	}
   1158 
   1159 	if (dup->rc_cursor == NULL)
   1160 		dup->rc_cursor = dup->rc_values;
   1161 
   1162 	return (dup);
   1163 }
   1164 
   1165 static void
   1166 rctl_set_fill_alloc_gp(rctl_set_t *set, rctl_alloc_gp_t *ragp)
   1167 {
   1168 	uint_t i;
   1169 
   1170 	bzero(ragp, sizeof (rctl_alloc_gp_t));
   1171 
   1172 	for (i = 0; i < rctl_set_size; i++) {
   1173 		rctl_t *r = set->rcs_ctls[i];
   1174 
   1175 		while (r != NULL) {
   1176 			ragp->rcag_nctls++;
   1177 
   1178 			ragp->rcag_nvals += rctl_val_list_count(r->rc_values);
   1179 
   1180 			r = r->rc_next;
   1181 		}
   1182 	}
   1183 }
   1184 
   1185 /*
   1186  * rctl_alloc_gp_t *rctl_set_dup_prealloc(rctl_set_t *)
   1187  *
   1188  * Overview
   1189  *   Given a resource control set, allocate a sufficiently large allocation
   1190  *   group to contain a duplicate of the set.
   1191  *
   1192  * Return value
   1193  *   A pointer to the newly created allocation group.
   1194  *
   1195  * Caller's context
   1196  *   Safe for KM_SLEEP allocations.
   1197  */
   1198 rctl_alloc_gp_t *
   1199 rctl_set_dup_prealloc(rctl_set_t *set)
   1200 {
   1201 	rctl_alloc_gp_t *ragp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP);
   1202 
   1203 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
   1204 
   1205 	mutex_enter(&set->rcs_lock);
   1206 	rctl_set_fill_alloc_gp(set, ragp);
   1207 	mutex_exit(&set->rcs_lock);
   1208 
   1209 	rctl_gp_alloc(ragp);
   1210 
   1211 	return (ragp);
   1212 }
   1213 
   1214 /*
   1215  * int rctl_set_dup_ready(rctl_set_t *, rctl_alloc_gp_t *)
   1216  *
   1217  * Overview
   1218  *   Verify that the allocation group provided is large enough to allow a
   1219  *   duplicate of the given resource control set to be constructed from its
   1220  *   contents.
   1221  *
   1222  * Return values
   1223  *   1 if the allocation group is sufficiently large, 0 otherwise.
   1224  *
   1225  * Caller's context
   1226  *   rcs_lock must be held prior to entry.
   1227  */
   1228 int
   1229 rctl_set_dup_ready(rctl_set_t *set, rctl_alloc_gp_t *ragp)
   1230 {
   1231 	rctl_alloc_gp_t curr_gp;
   1232 
   1233 	ASSERT(MUTEX_HELD(&set->rcs_lock));
   1234 
   1235 	rctl_set_fill_alloc_gp(set, &curr_gp);
   1236 
   1237 	if (curr_gp.rcag_nctls <= ragp->rcag_nctls &&
   1238 	    curr_gp.rcag_nvals <= ragp->rcag_nvals)
   1239 		return (1);
   1240 
   1241 	return (0);
   1242 }
   1243 
   1244 /*
   1245  * rctl_set_t *rctl_set_dup(rctl_set_t *, struct proc *, struct proc *,
   1246  *   rctl_set_t *, rctl_alloc_gp_t *, int)
   1247  *
   1248  * Overview
   1249  *   Make a duplicate of the resource control set.  The proc pointers are those
   1250  *   of the owning process and of the process associated with the entity
   1251  *   receiving the duplicate.
   1252  *
   1253  *   Duplication is a 3 stage process. Stage 1 is memory allocation for
   1254  *   the duplicate set, which is taken care of by rctl_set_dup_prealloc().
   1255  *   Stage 2 consists of copying all rctls and values from the old set into
   1256  *   the new. Stage 3 completes the duplication by performing the appropriate
   1257  *   callbacks for each rctl in the new set.
   1258  *
   1259  *   Stages 2 and 3 are handled by calling rctl_set_dup with the RCD_DUP and
   1260  *   RCD_CALLBACK functions, respectively. The RCD_CALLBACK flag may only
   1261  *   be supplied if the newp proc structure reflects the new task and
   1262  *   project linkage.
   1263  *
   1264  * Return value
   1265  *   A pointer to the duplicate set.
   1266  *
   1267  * Caller's context
   1268  *   The rcs_lock of the set to be duplicated must be held prior to entry.
   1269  */
   1270 rctl_set_t *
   1271 rctl_set_dup(rctl_set_t *set, struct proc *oldp, struct proc *newp,
   1272     rctl_entity_p_t *e, rctl_set_t *dup, rctl_alloc_gp_t *ragp, int flag)
   1273 {
   1274 	uint_t i;
   1275 	rctl_set_t	*iter;
   1276 
   1277 	ASSERT((flag & RCD_DUP) || (flag & RCD_CALLBACK));
   1278 	ASSERT(e);
   1279 	/*
   1280 	 * When copying the old set, iterate over that. Otherwise, when
   1281 	 * only callbacks have been requested, iterate over the dup set.
   1282 	 */
   1283 	if (flag & RCD_DUP) {
   1284 		ASSERT(MUTEX_HELD(&set->rcs_lock));
   1285 		iter = set;
   1286 		dup->rcs_entity = set->rcs_entity;
   1287 	} else {
   1288 		iter = dup;
   1289 	}
   1290 
   1291 	mutex_enter(&dup->rcs_lock);
   1292 
   1293 	for (i = 0; i < rctl_set_size; i++) {
   1294 		rctl_t *r = iter->rcs_ctls[i];
   1295 		rctl_t *d;
   1296 
   1297 		while (r != NULL) {
   1298 			if (flag & RCD_DUP) {
   1299 				d = rctl_dup(r, ragp, oldp, newp);
   1300 				rctl_set_insert(dup, r->rc_id, d);
   1301 			} else {
   1302 				d = r;
   1303 			}
   1304 
   1305 			if (flag & RCD_CALLBACK)
   1306 				RCTLOP_SET(d, newp, e,
   1307 				    rctl_model_value(d->rc_dict_entry, newp,
   1308 				    d->rc_cursor->rcv_value));
   1309 
   1310 			r = r->rc_next;
   1311 		}
   1312 	}
   1313 
   1314 	mutex_exit(&dup->rcs_lock);
   1315 
   1316 	return (dup);
   1317 }
   1318 
   1319 /*
   1320  * void rctl_set_free(rctl_set_t *)
   1321  *
   1322  * Overview
   1323  *   Delete resource control set and all attached values.
   1324  *
   1325  * Return values
   1326  *   No value returned.
   1327  *
   1328  * Caller's context
   1329  *   No restrictions on context.
   1330  */
   1331 void
   1332 rctl_set_free(rctl_set_t *set)
   1333 {
   1334 	uint_t i;
   1335 
   1336 	mutex_enter(&set->rcs_lock);
   1337 	for (i = 0; i < rctl_set_size; i++) {
   1338 		rctl_t *r = set->rcs_ctls[i];
   1339 
   1340 		while (r != NULL) {
   1341 			rctl_val_t *v = r->rc_values;
   1342 			rctl_t *n = r->rc_next;
   1343 
   1344 			kmem_cache_free(rctl_cache, r);
   1345 
   1346 			rctl_val_list_free(v);
   1347 
   1348 			r = n;
   1349 		}
   1350 	}
   1351 	mutex_exit(&set->rcs_lock);
   1352 
   1353 	kmem_free(set->rcs_ctls, sizeof (rctl_t *) * rctl_set_size);
   1354 	kmem_free(set, sizeof (rctl_set_t));
   1355 }
   1356 
   1357 /*
   1358  * void rctl_set_reset(rctl_set_t *)
   1359  *
   1360  * Overview
   1361  *   Resets all rctls within the set such that the lowest value becomes active.
   1362  *
   1363  * Return values
   1364  *   No value returned.
   1365  *
   1366  * Caller's context
   1367  *   No restrictions on context.
   1368  */
   1369 void
   1370 rctl_set_reset(rctl_set_t *set, struct proc *p, rctl_entity_p_t *e)
   1371 {
   1372 	uint_t i;
   1373 
   1374 	ASSERT(e);
   1375 
   1376 	mutex_enter(&set->rcs_lock);
   1377 	for (i = 0; i < rctl_set_size; i++) {
   1378 		rctl_t *r = set->rcs_ctls[i];
   1379 
   1380 		while (r != NULL) {
   1381 			r->rc_cursor = r->rc_values;
   1382 			rctl_val_list_reset(r->rc_cursor);
   1383 			RCTLOP_SET(r, p, e, rctl_model_value(r->rc_dict_entry,
   1384 			    p, r->rc_cursor->rcv_value));
   1385 
   1386 			ASSERT(r->rc_cursor != NULL);
   1387 
   1388 			r = r->rc_next;
   1389 		}
   1390 	}
   1391 
   1392 	mutex_exit(&set->rcs_lock);
   1393 }
   1394 
   1395 /*
   1396  * void rctl_set_tearoff(rctl_set *, struct proc *)
   1397  *
   1398  * Overview
   1399  *   Tear off any resource control values on this set with an action recipient
   1400  *   equal to the specified process (as they are becoming invalid with the
   1401  *   process's departure from this set as an observer).
   1402  *
   1403  * Return values
   1404  *   No value returned.
   1405  *
   1406  * Caller's context
   1407  *   No restrictions on context
   1408  */
   1409 void
   1410 rctl_set_tearoff(rctl_set_t *set, struct proc *p)
   1411 {
   1412 	uint_t i;
   1413 
   1414 	mutex_enter(&set->rcs_lock);
   1415 	for (i = 0; i < rctl_set_size; i++) {
   1416 		rctl_t *r = set->rcs_ctls[i];
   1417 
   1418 		while (r != NULL) {
   1419 			rctl_val_t *rval;
   1420 
   1421 tearoff_rewalk_list:
   1422 			rval = r->rc_values;
   1423 
   1424 			while (rval != NULL) {
   1425 				if (rval->rcv_privilege == RCPRIV_BASIC &&
   1426 				    rval->rcv_action_recipient == p) {
   1427 					if (r->rc_cursor == rval)
   1428 						r->rc_cursor = rval->rcv_next;
   1429 
   1430 					(void) rctl_val_list_delete(
   1431 					    &r->rc_values, rval);
   1432 
   1433 					goto tearoff_rewalk_list;
   1434 				}
   1435 
   1436 				rval = rval->rcv_next;
   1437 			}
   1438 
   1439 			ASSERT(r->rc_cursor != NULL);
   1440 
   1441 			r = r->rc_next;
   1442 		}
   1443 	}
   1444 
   1445 	mutex_exit(&set->rcs_lock);
   1446 }
   1447 
   1448 int
   1449 rctl_set_find(rctl_set_t *set, rctl_hndl_t hndl, rctl_t **rctl)
   1450 {
   1451 	uint_t index = hndl % rctl_set_size;
   1452 	rctl_t *curr_ctl;
   1453 
   1454 	ASSERT(MUTEX_HELD(&set->rcs_lock));
   1455 
   1456 	for (curr_ctl = set->rcs_ctls[index]; curr_ctl != NULL;
   1457 	    curr_ctl = curr_ctl->rc_next) {
   1458 		if (curr_ctl->rc_id == hndl) {
   1459 			*rctl = curr_ctl;
   1460 
   1461 			return (0);
   1462 		}
   1463 	}
   1464 
   1465 	return (-1);
   1466 }
   1467 
   1468 /*
   1469  * rlim64_t rctl_enforced_value(rctl_hndl_t, rctl_set_t *, struct proc *)
   1470  *
   1471  * Overview
   1472  *   Given a process, get the next enforced value on the rctl of the specified
   1473  *   handle.
   1474  *
   1475  * Return value
   1476  *   The enforced value.
   1477  *
   1478  * Caller's context
   1479  *   For controls on process collectives, p->p_lock must be held across the
   1480  *   operation.
   1481  */
   1482 /*ARGSUSED*/
   1483 rctl_qty_t
   1484 rctl_enforced_value(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p)
   1485 {
   1486 	rctl_t *rctl;
   1487 	rlim64_t ret;
   1488 
   1489 	mutex_enter(&rset->rcs_lock);
   1490 
   1491 	if (rctl_set_find(rset, hndl, &rctl) == -1)
   1492 		panic("unknown resource control handle %d requested", hndl);
   1493 	else
   1494 		ret = rctl_model_value(rctl->rc_dict_entry, p,
   1495 		    rctl->rc_cursor->rcv_value);
   1496 
   1497 	mutex_exit(&rset->rcs_lock);
   1498 
   1499 	return (ret);
   1500 }
   1501 
   1502 /*
   1503  * int rctl_global_get(const char *, rctl_dict_entry_t *)
   1504  *
   1505  * Overview
   1506  *   Copy a sanitized version of the global rctl for a given resource control
   1507  *   name.  (By sanitization, we mean that the unsafe data pointers have been
   1508  *   zeroed.)
   1509  *
   1510  * Return value
   1511  *   -1 if name not defined, 0 otherwise.
   1512  *
   1513  * Caller's context
   1514  *   No restrictions on context.  rctl_dict_lock must not be held.
   1515  */
   1516 int
   1517 rctl_global_get(const char *name, rctl_dict_entry_t *drde)
   1518 {
   1519 	rctl_dict_entry_t *rde = rctl_dict_lookup(name);
   1520 
   1521 	if (rde == NULL)
   1522 		return (-1);
   1523 
   1524 	bcopy(rde, drde, sizeof (rctl_dict_entry_t));
   1525 
   1526 	drde->rcd_next = NULL;
   1527 	drde->rcd_ops = NULL;
   1528 
   1529 	return (0);
   1530 }
   1531 
   1532 /*
   1533  * int rctl_global_set(const char *, rctl_dict_entry_t *)
   1534  *
   1535  * Overview
   1536  *   Transfer the settable fields of the named rctl to the global rctl matching
   1537  *   the given resource control name.
   1538  *
   1539  * Return value
   1540  *   -1 if name not defined, 0 otherwise.
   1541  *
   1542  * Caller's context
   1543  *   No restrictions on context.  rctl_dict_lock must not be held.
   1544  */
   1545 int
   1546 rctl_global_set(const char *name, rctl_dict_entry_t *drde)
   1547 {
   1548 	rctl_dict_entry_t *rde = rctl_dict_lookup(name);
   1549 
   1550 	if (rde == NULL)
   1551 		return (-1);
   1552 
   1553 	rde->rcd_flagaction = drde->rcd_flagaction;
   1554 	rde->rcd_syslog_level = drde->rcd_syslog_level;
   1555 	rde->rcd_strlog_flags = drde->rcd_strlog_flags;
   1556 
   1557 	return (0);
   1558 }
   1559 
   1560 static int
   1561 rctl_local_op(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval,
   1562     int (*cbop)(rctl_hndl_t, struct proc *p, rctl_entity_p_t *e, rctl_t *,
   1563     rctl_val_t *, rctl_val_t *), struct proc *p)
   1564 {
   1565 	rctl_t *rctl;
   1566 	rctl_set_t *rset;
   1567 	rctl_entity_p_t e;
   1568 	int ret = 0;
   1569 	rctl_dict_entry_t *rde = rctl_dict_lookup_hndl(hndl);
   1570 
   1571 local_op_retry:
   1572 
   1573 	ASSERT(MUTEX_HELD(&p->p_lock));
   1574 
   1575 	rset = rctl_entity_obtain_rset(rde, p);
   1576 
   1577 	if (rset == NULL) {
   1578 		return (-1);
   1579 	}
   1580 	rctl_entity_obtain_entity_p(rset->rcs_entity, p, &e);
   1581 
   1582 	mutex_enter(&rset->rcs_lock);
   1583 
   1584 	/* using rctl's hndl, get rctl from local set */
   1585 	if (rctl_set_find(rset, hndl, &rctl) == -1) {
   1586 		mutex_exit(&rset->rcs_lock);
   1587 		return (-1);
   1588 	}
   1589 
   1590 	ret = cbop(hndl, p, &e, rctl, oval, nval);
   1591 
   1592 	mutex_exit(&rset->rcs_lock);
   1593 	return (ret);
   1594 }
   1595 
   1596 /*ARGSUSED*/
   1597 static int
   1598 rctl_local_get_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e,
   1599     rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval)
   1600 {
   1601 	if (oval == NULL) {
   1602 		/*
   1603 		 * RCTL_FIRST
   1604 		 */
   1605 		bcopy(rctl->rc_values, nval, sizeof (rctl_val_t));
   1606 	} else {
   1607 		/*
   1608 		 * RCTL_NEXT
   1609 		 */
   1610 		rctl_val_t *tval = rctl_val_list_find(&rctl->rc_values, oval);
   1611 
   1612 		if (tval == NULL)
   1613 			return (ESRCH);
   1614 		else if (tval->rcv_next == NULL)
   1615 			return (ENOENT);
   1616 		else
   1617 			bcopy(tval->rcv_next, nval, sizeof (rctl_val_t));
   1618 	}
   1619 
   1620 	return (0);
   1621 }
   1622 
   1623 /*
   1624  * int rctl_local_get(rctl_hndl_t, rctl_val_t *)
   1625  *
   1626  * Overview
   1627  *   Get the rctl value for the given flags.
   1628  *
   1629  * Return values
   1630  *   0 for successful get, errno otherwise.
   1631  */
   1632 int
   1633 rctl_local_get(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval,
   1634     struct proc *p)
   1635 {
   1636 	return (rctl_local_op(hndl, oval, nval, rctl_local_get_cb, p));
   1637 }
   1638 
   1639 /*ARGSUSED*/
   1640 static int
   1641 rctl_local_delete_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e,
   1642     rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval)
   1643 {
   1644 	if ((oval = rctl_val_list_find(&rctl->rc_values, nval)) == NULL)
   1645 		return (ESRCH);
   1646 
   1647 	if (rctl->rc_cursor == oval) {
   1648 		rctl->rc_cursor = oval->rcv_next;
   1649 		rctl_val_list_reset(rctl->rc_cursor);
   1650 		RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p,
   1651 		    rctl->rc_cursor->rcv_value));
   1652 
   1653 		ASSERT(rctl->rc_cursor != NULL);
   1654 	}
   1655 
   1656 	(void) rctl_val_list_delete(&rctl->rc_values, oval);
   1657 
   1658 	return (0);
   1659 }
   1660 
   1661 /*
   1662  * int rctl_local_delete(rctl_hndl_t, rctl_val_t *)
   1663  *
   1664  * Overview
   1665  *   Delete the rctl value for the given flags.
   1666  *
   1667  * Return values
   1668  *   0 for successful delete, errno otherwise.
   1669  */
   1670 int
   1671 rctl_local_delete(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p)
   1672 {
   1673 	return (rctl_local_op(hndl, NULL, val, rctl_local_delete_cb, p));
   1674 }
   1675 
   1676 /*
   1677  * rctl_local_insert_cb()
   1678  *
   1679  * Overview
   1680  *   Insert a new value into the rctl's val list. If an error occurs,
   1681  *   the val list must be left in the same state as when the function
   1682  *   was entered.
   1683  *
   1684  * Return Values
   1685  *   0 for successful insert, EINVAL if the value is duplicated in the
   1686  *   existing list.
   1687  */
   1688 /*ARGSUSED*/
   1689 static int
   1690 rctl_local_insert_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e,
   1691     rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval)
   1692 {
   1693 	/*
   1694 	 * Before inserting, confirm there are no duplicates of this value
   1695 	 * and flag level. If there is a duplicate, flag an error and do
   1696 	 * nothing.
   1697 	 */
   1698 	if (rctl_val_list_insert(&rctl->rc_values, nval) != 0)
   1699 		return (EINVAL);
   1700 
   1701 	if (rctl_val_cmp(nval, rctl->rc_cursor, 0) < 0) {
   1702 		rctl->rc_cursor = nval;
   1703 		rctl_val_list_reset(rctl->rc_cursor);
   1704 		RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p,
   1705 		    rctl->rc_cursor->rcv_value));
   1706 
   1707 		ASSERT(rctl->rc_cursor != NULL);
   1708 	}
   1709 
   1710 	return (0);
   1711 }
   1712 
   1713 /*
   1714  * int rctl_local_insert(rctl_hndl_t, rctl_val_t *)
   1715  *
   1716  * Overview
   1717  *   Insert the rctl value into the appropriate rctl set for the calling
   1718  *   process, given the handle.
   1719  */
   1720 int
   1721 rctl_local_insert(rctl_hndl_t hndl, rctl_val_t *val, struct proc *p)
   1722 {
   1723 	return (rctl_local_op(hndl, NULL, val, rctl_local_insert_cb, p));
   1724 }
   1725 
   1726 /*
   1727  * rctl_local_insert_all_cb()
   1728  *
   1729  * Overview
   1730  *   Called for RCENTITY_PROJECT rctls only, via rctlsys_projset().
   1731  *
   1732  *   Inserts new values from the project database (new_values).  alloc_values
   1733  *   should be a linked list of pre-allocated rctl_val_t, which are used to
   1734  *   populate (rc_projdb).
   1735  *
   1736  *   Should the *new_values linked list match the contents of the rctl's
   1737  *   rp_projdb then we do nothing.
   1738  *
   1739  * Return Values
   1740  *   0 is always returned.
   1741  */
   1742 /*ARGSUSED*/
   1743 static int
   1744 rctl_local_insert_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e,
   1745     rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values)
   1746 {
   1747 	rctl_val_t *val;
   1748 	rctl_val_t *tmp_val;
   1749 	rctl_val_t *next;
   1750 	int modified = 0;
   1751 
   1752 	/*
   1753 	 * If this the first time we've set this project rctl, then we delete
   1754 	 * all the privilege values.  These privilege values have been set by
   1755 	 * rctl_add_default_limit().
   1756 	 *
   1757 	 * We save some cycles here by not calling rctl_val_list_delete().
   1758 	 */
   1759 	if (rctl->rc_projdb == NULL) {
   1760 		val = rctl->rc_values;
   1761 
   1762 		while (val != NULL) {
   1763 			if (val->rcv_privilege == RCPRIV_PRIVILEGED) {
   1764 				if (val->rcv_prev != NULL)
   1765 					val->rcv_prev->rcv_next = val->rcv_next;
   1766 				else
   1767 					rctl->rc_values = val->rcv_next;
   1768 
   1769 				if (val->rcv_next != NULL)
   1770 					val->rcv_next->rcv_prev = val->rcv_prev;
   1771 
   1772 				tmp_val = val;
   1773 				val = val->rcv_next;
   1774 				kmem_cache_free(rctl_val_cache, tmp_val);
   1775 			} else {
   1776 				val = val->rcv_next;
   1777 			}
   1778 		}
   1779 		modified = 1;
   1780 	}
   1781 
   1782 	/*
   1783 	 * Delete active values previously set through the project database.
   1784 	 */
   1785 	val = rctl->rc_projdb;
   1786 
   1787 	while (val != NULL) {
   1788 
   1789 		/* Is the old value found in the new values? */
   1790 		if (rctl_val_list_find(&new_values, val) == NULL) {
   1791 
   1792 			/*
   1793 			 * Delete from the active values if it originated from
   1794 			 * the project database.
   1795 			 */
   1796 			if (((tmp_val = rctl_val_list_find(&rctl->rc_values,
   1797 			    val)) != NULL) &&
   1798 			    (tmp_val->rcv_flagaction & RCTL_LOCAL_PROJDB)) {
   1799 				(void) rctl_val_list_delete(&rctl->rc_values,
   1800 				    tmp_val);
   1801 			}
   1802 
   1803 			tmp_val = val->rcv_next;
   1804 			(void) rctl_val_list_delete(&rctl->rc_projdb, val);
   1805 			val = tmp_val;
   1806 			modified = 1;
   1807 
   1808 		} else
   1809 			val = val->rcv_next;
   1810 	}
   1811 
   1812 	/*
   1813 	 * Insert new values from the project database.
   1814 	 */
   1815 	while (new_values != NULL) {
   1816 		next = new_values->rcv_next;
   1817 
   1818 		/*
   1819 		 * Insert this new value into the rc_projdb, and duplicate this
   1820 		 * entry to the active list.
   1821 		 */
   1822 		if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) {
   1823 
   1824 			tmp_val = alloc_values->rcv_next;
   1825 			bcopy(new_values, alloc_values, sizeof (rctl_val_t));
   1826 			alloc_values->rcv_next = tmp_val;
   1827 
   1828 			if (rctl_val_list_insert(&rctl->rc_values,
   1829 			    alloc_values) == 0) {
   1830 				/* inserted move alloc_values on */
   1831 				alloc_values = tmp_val;
   1832 				modified = 1;
   1833 			}
   1834 		} else {
   1835 			/*
   1836 			 * Unlike setrctl() we don't want to return an error on
   1837 			 * a duplicate entry; we are concerned solely with
   1838 			 * ensuring that all the values specified are set.
   1839 			 */
   1840 			kmem_cache_free(rctl_val_cache, new_values);
   1841 		}
   1842 		new_values = next;
   1843 	}
   1844 
   1845 	/* Teardown any unused rctl_val_t */
   1846 	while (alloc_values != NULL) {
   1847 		tmp_val = alloc_values;
   1848 		alloc_values = alloc_values->rcv_next;
   1849 		kmem_cache_free(rctl_val_cache, tmp_val);
   1850 	}
   1851 
   1852 	/* Reset the cursor if rctl values have been modified */
   1853 	if (modified) {
   1854 		rctl->rc_cursor = rctl->rc_values;
   1855 		rctl_val_list_reset(rctl->rc_cursor);
   1856 		RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p,
   1857 		    rctl->rc_cursor->rcv_value));
   1858 	}
   1859 
   1860 	return (0);
   1861 }
   1862 
   1863 int
   1864 rctl_local_insert_all(rctl_hndl_t hndl, rctl_val_t *new_values,
   1865     rctl_val_t *alloc_values, struct proc *p)
   1866 {
   1867 	return (rctl_local_op(hndl, new_values, alloc_values,
   1868 	    rctl_local_insert_all_cb, p));
   1869 }
   1870 
   1871 /*
   1872  * rctl_local_replace_all_cb()
   1873  *
   1874  * Overview
   1875  *   Called for RCENTITY_PROJECT rctls only, via rctlsys_projset().
   1876  *
   1877  *   Clears the active rctl values (rc_values), and stored values from the
   1878  *   previous insertions from the project database (rc_projdb).
   1879  *
   1880  *   Inserts new values from the project database (new_values).  alloc_values
   1881  *   should be a linked list of pre-allocated rctl_val_t, which are used to
   1882  *   populate (rc_projdb).
   1883  *
   1884  * Return Values
   1885  *   0 is always returned.
   1886  */
   1887 /*ARGSUSED*/
   1888 static int
   1889 rctl_local_replace_all_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e,
   1890     rctl_t *rctl, rctl_val_t *new_values, rctl_val_t *alloc_values)
   1891 {
   1892 	rctl_val_t *val;
   1893 	rctl_val_t *next;
   1894 	rctl_val_t *tmp_val;
   1895 
   1896 	/* Delete all the privilege vaules */
   1897 	val = rctl->rc_values;
   1898 
   1899 	while (val != NULL) {
   1900 		if (val->rcv_privilege == RCPRIV_PRIVILEGED) {
   1901 			if (val->rcv_prev != NULL)
   1902 				val->rcv_prev->rcv_next = val->rcv_next;
   1903 			else
   1904 				rctl->rc_values = val->rcv_next;
   1905 
   1906 			if (val->rcv_next != NULL)
   1907 				val->rcv_next->rcv_prev = val->rcv_prev;
   1908 
   1909 			tmp_val = val;
   1910 			val = val->rcv_next;
   1911 			kmem_cache_free(rctl_val_cache, tmp_val);
   1912 		} else {
   1913 			val = val->rcv_next;
   1914 		}
   1915 	}
   1916 
   1917 	/* Delete the contents of rc_projdb */
   1918 	val = rctl->rc_projdb;
   1919 	while (val != NULL) {
   1920 
   1921 		tmp_val = val;
   1922 		val = val->rcv_next;
   1923 		kmem_cache_free(rctl_val_cache, tmp_val);
   1924 	}
   1925 	rctl->rc_projdb = NULL;
   1926 
   1927 	/*
   1928 	 * Insert new values from the project database.
   1929 	 */
   1930 	while (new_values != NULL) {
   1931 		next = new_values->rcv_next;
   1932 
   1933 		if (rctl_val_list_insert(&rctl->rc_projdb, new_values) == 0) {
   1934 			tmp_val = alloc_values->rcv_next;
   1935 			bcopy(new_values, alloc_values, sizeof (rctl_val_t));
   1936 			alloc_values->rcv_next = tmp_val;
   1937 
   1938 			if (rctl_val_list_insert(&rctl->rc_values,
   1939 			    alloc_values) == 0) {
   1940 				/* inserted, so move alloc_values on */
   1941 				alloc_values = tmp_val;
   1942 			}
   1943 		} else {
   1944 			/*
   1945 			 * Unlike setrctl() we don't want to return an error on
   1946 			 * a duplicate entry; we are concerned solely with
   1947 			 * ensuring that all the values specified are set.
   1948 			 */
   1949 			kmem_cache_free(rctl_val_cache, new_values);
   1950 		}
   1951 
   1952 		new_values = next;
   1953 	}
   1954 
   1955 	/* Teardown any unused rctl_val_t */
   1956 	while (alloc_values != NULL) {
   1957 		tmp_val = alloc_values;
   1958 		alloc_values = alloc_values->rcv_next;
   1959 		kmem_cache_free(rctl_val_cache, tmp_val);
   1960 	}
   1961 
   1962 	/* Always reset the cursor */
   1963 	rctl->rc_cursor = rctl->rc_values;
   1964 	rctl_val_list_reset(rctl->rc_cursor);
   1965 	RCTLOP_SET(rctl, p, e, rctl_model_value(rctl->rc_dict_entry, p,
   1966 	    rctl->rc_cursor->rcv_value));
   1967 
   1968 	return (0);
   1969 }
   1970 
   1971 int
   1972 rctl_local_replace_all(rctl_hndl_t hndl, rctl_val_t *new_values,
   1973     rctl_val_t *alloc_values, struct proc *p)
   1974 {
   1975 	return (rctl_local_op(hndl, new_values, alloc_values,
   1976 	    rctl_local_replace_all_cb, p));
   1977 }
   1978 
   1979 static int
   1980 rctl_local_replace_cb(rctl_hndl_t hndl, struct proc *p, rctl_entity_p_t *e,
   1981     rctl_t *rctl, rctl_val_t *oval, rctl_val_t *nval)
   1982 {
   1983 	int ret;
   1984 	rctl_val_t *tmp;
   1985 
   1986 	/* Verify that old will be delete-able */
   1987 	tmp = rctl_val_list_find(&rctl->rc_values, oval);
   1988 	if (tmp == NULL)
   1989 		return (ESRCH);
   1990 	/*
   1991 	 * Caller should verify that value being deleted is not the
   1992 	 * system value.
   1993 	 */
   1994 	ASSERT(tmp->rcv_privilege != RCPRIV_SYSTEM);
   1995 
   1996 	/*
   1997 	 * rctl_local_insert_cb() does the job of flagging an error
   1998 	 * for any duplicate values. So, call rctl_local_insert_cb()
   1999 	 * for the new value first, then do deletion of the old value.
   2000 	 * Since this is a callback function to rctl_local_op, we can
   2001 	 * count on rcs_lock being held at this point. This guarantees
   2002 	 * that there is at no point a visible list which contains both
   2003 	 * new and old values.
   2004 	 */
   2005 	if (ret = rctl_local_insert_cb(hndl, p, e, rctl, NULL, nval))
   2006 		return (ret);
   2007 
   2008 	ret = rctl_local_delete_cb(hndl, p, e, rctl, NULL, oval);
   2009 	ASSERT(ret == 0);
   2010 	return (0);
   2011 }
   2012 
   2013 /*
   2014  * int rctl_local_replace(rctl_hndl_t, void *, int, uint64_t *)
   2015  *
   2016  * Overview
   2017  *   Replace the rctl value with a new one.
   2018  *
   2019  * Return values
   2020  *   0 for successful replace, errno otherwise.
   2021  */
   2022 int
   2023 rctl_local_replace(rctl_hndl_t hndl, rctl_val_t *oval, rctl_val_t *nval,
   2024     struct proc *p)
   2025 {
   2026 	return (rctl_local_op(hndl, oval, nval, rctl_local_replace_cb, p));
   2027 }
   2028 
   2029 /*
   2030  * int rctl_rlimit_get(rctl_hndl_t, struct proc *, struct rlimit64 *)
   2031  *
   2032  * Overview
   2033  *   To support rlimit compatibility, we need a function which takes a 64-bit
   2034  *   rlimit and encodes it as appropriate rcontrol values on the given rcontrol.
   2035  *   This operation is only intended for legacy rlimits.
   2036  */
   2037 int
   2038 rctl_rlimit_get(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64)
   2039 {
   2040 	rctl_t *rctl;
   2041 	rctl_val_t *rval;
   2042 	rctl_set_t *rset = p->p_rctls;
   2043 	int soft_limit_seen = 0;
   2044 	int test_for_deny = 1;
   2045 
   2046 	mutex_enter(&rset->rcs_lock);
   2047 	if (rctl_set_find(rset, rc, &rctl) == -1) {
   2048 		mutex_exit(&rset->rcs_lock);
   2049 		return (-1);
   2050 	}
   2051 
   2052 	rval = rctl->rc_values;
   2053 
   2054 	if (rctl->rc_dict_entry->rcd_flagaction & (RCTL_GLOBAL_DENY_NEVER |
   2055 	    RCTL_GLOBAL_DENY_ALWAYS))
   2056 		test_for_deny = 0;
   2057 
   2058 	/*
   2059 	 * 1.  Find the first control value with the RCTL_LOCAL_DENY bit set.
   2060 	 */
   2061 	while (rval != NULL && rval->rcv_privilege != RCPRIV_SYSTEM) {
   2062 		if (test_for_deny &&
   2063 		    (rval->rcv_flagaction & RCTL_LOCAL_DENY) == 0) {
   2064 			rval = rval->rcv_next;
   2065 			continue;
   2066 		}
   2067 
   2068 		/*
   2069 		 * 2.  If this is an RCPRIV_BASIC value, then we've found the
   2070 		 * effective soft limit and should set rlim_cur.  We should then
   2071 		 * continue looking for another control value with the DENY bit
   2072 		 * set.
   2073 		 */
   2074 		if (rval->rcv_privilege == RCPRIV_BASIC) {
   2075 			if (soft_limit_seen) {
   2076 				rval = rval->rcv_next;
   2077 				continue;
   2078 			}
   2079 
   2080 			if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 &&
   2081 			    rval->rcv_value < rctl_model_maximum(
   2082 			    rctl->rc_dict_entry, p))
   2083 				rlp64->rlim_cur = rval->rcv_value;
   2084 			else
   2085 				rlp64->rlim_cur = RLIM64_INFINITY;
   2086 			soft_limit_seen = 1;
   2087 
   2088 			rval = rval->rcv_next;
   2089 			continue;
   2090 		}
   2091 
   2092 		/*
   2093 		 * 3.  This is an RCPRIV_PRIVILEGED value.  If we haven't found
   2094 		 * a soft limit candidate, then we've found the effective hard
   2095 		 * and soft limits and should set both  If we had found a soft
   2096 		 * limit, then this is only the hard limit and we need only set
   2097 		 * rlim_max.
   2098 		 */
   2099 		if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 &&
   2100 		    rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry,
   2101 		    p))
   2102 			rlp64->rlim_max = rval->rcv_value;
   2103 		else
   2104 			rlp64->rlim_max = RLIM64_INFINITY;
   2105 		if (!soft_limit_seen)
   2106 			rlp64->rlim_cur = rlp64->rlim_max;
   2107 
   2108 		mutex_exit(&rset->rcs_lock);
   2109 		return (0);
   2110 	}
   2111 
   2112 	if (rval == NULL) {
   2113 		/*
   2114 		 * This control sequence is corrupt, as it is not terminated by
   2115 		 * a system privileged control value.
   2116 		 */
   2117 		mutex_exit(&rset->rcs_lock);
   2118 		return (-1);
   2119 	}
   2120 
   2121 	/*
   2122 	 * 4.  If we run into a RCPRIV_SYSTEM value, then the hard limit (and
   2123 	 * the soft, if we haven't a soft candidate) should be the value of the
   2124 	 * system control value.
   2125 	 */
   2126 	if ((rval->rcv_flagaction & RCTL_LOCAL_MAXIMAL) == 0 &&
   2127 	    rval->rcv_value < rctl_model_maximum(rctl->rc_dict_entry, p))
   2128 		rlp64->rlim_max = rval->rcv_value;
   2129 	else
   2130 		rlp64->rlim_max = RLIM64_INFINITY;
   2131 
   2132 	if (!soft_limit_seen)
   2133 		rlp64->rlim_cur = rlp64->rlim_max;
   2134 
   2135 	mutex_exit(&rset->rcs_lock);
   2136 	return (0);
   2137 }
   2138 
   2139 /*
   2140  * rctl_alloc_gp_t *rctl_rlimit_set_prealloc(uint_t)
   2141  *
   2142  * Overview
   2143  *   Before making a series of calls to rctl_rlimit_set(), we must have a
   2144  *   preallocated batch of resource control values, as rctl_rlimit_set() can
   2145  *   potentially consume two resource control values per call.
   2146  *
   2147  * Return values
   2148  *   A populated resource control allocation group with 2n resource control
   2149  *   values.
   2150  *
   2151  * Caller's context
   2152  *   Must be safe for KM_SLEEP allocations.
   2153  */
   2154 rctl_alloc_gp_t *
   2155 rctl_rlimit_set_prealloc(uint_t n)
   2156 {
   2157 	rctl_alloc_gp_t *gp = kmem_zalloc(sizeof (rctl_alloc_gp_t), KM_SLEEP);
   2158 
   2159 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
   2160 
   2161 	gp->rcag_nvals = 2 * n;
   2162 
   2163 	rctl_gp_alloc(gp);
   2164 
   2165 	return (gp);
   2166 }
   2167 
   2168 /*
   2169  * int rctl_rlimit_set(rctl_hndl_t, struct proc *, struct rlimit64 *, int,
   2170  *   int)
   2171  *
   2172  * Overview
   2173  *   To support rlimit compatibility, we need a function which takes a 64-bit
   2174  *   rlimit and encodes it as appropriate rcontrol values on the given rcontrol.
   2175  *   This operation is only intended for legacy rlimits.
   2176  *
   2177  *   The implementation of rctl_rlimit_set() is a bit clever, as it tries to
   2178  *   minimize the number of values placed on the value sequence in various
   2179  *   cases.  Furthermore, we don't allow multiple identical privilege-action
   2180  *   values on the same sequence.  (That is, we don't want a sequence like
   2181  *   "while (1) { rlim.rlim_cur++; setrlimit(..., rlim); }" to exhaust kernel
   2182  *   memory.)  So we want to delete any values with the same privilege value and
   2183  *   action.
   2184  *
   2185  * Return values
   2186  *   0 for successful set, errno otherwise. Errno will be either EINVAL
   2187  *   or EPERM, in keeping with defined errnos for ulimit() and setrlimit()
   2188  *   system calls.
   2189  */
   2190 /*ARGSUSED*/
   2191 int
   2192 rctl_rlimit_set(rctl_hndl_t rc, struct proc *p, struct rlimit64 *rlp64,
   2193     rctl_alloc_gp_t *ragp, int flagaction, int signal, const cred_t *cr)
   2194 {
   2195 	rctl_t *rctl;
   2196 	rctl_val_t *rval, *rval_priv, *rval_basic;
   2197 	rctl_set_t *rset = p->p_rctls;
   2198 	rctl_qty_t max;
   2199 	rctl_entity_p_t e;
   2200 	struct rlimit64 cur_rl;
   2201 
   2202 	e.rcep_t = RCENTITY_PROCESS;
   2203 	e.rcep_p.proc = p;
   2204 
   2205 	if (rlp64->rlim_cur > rlp64->rlim_max)
   2206 		return (EINVAL);
   2207 
   2208 	if (rctl_rlimit_get(rc, p, &cur_rl) == -1)
   2209 		return (EINVAL);
   2210 
   2211 	/*
   2212 	 * If we are not privileged, we can only lower the hard limit.
   2213 	 */
   2214 	if ((rlp64->rlim_max > cur_rl.rlim_max) &&
   2215 	    cur_rl.rlim_max != RLIM64_INFINITY &&
   2216 	    secpolicy_resource(cr) != 0)
   2217 		return (EPERM);
   2218 
   2219 	mutex_enter(&rset->rcs_lock);
   2220 
   2221 	if (rctl_set_find(rset, rc, &rctl) == -1) {
   2222 		mutex_exit(&rset->rcs_lock);
   2223 		return (EINVAL);
   2224 	}
   2225 
   2226 	rval_priv = rctl_gp_detach_val(ragp);
   2227 
   2228 	rval = rctl->rc_values;
   2229 
   2230 	while (rval != NULL) {
   2231 		rctl_val_t *next = rval->rcv_next;
   2232 
   2233 		if (rval->rcv_privilege == RCPRIV_SYSTEM)
   2234 			break;
   2235 
   2236 		if ((rval->rcv_privilege == RCPRIV_BASIC) ||
   2237 		    (rval->rcv_flagaction & ~RCTL_LOCAL_ACTION_MASK) ==
   2238 		    (flagaction & ~RCTL_LOCAL_ACTION_MASK)) {
   2239 			if (rctl->rc_cursor == rval) {
   2240 				rctl->rc_cursor = rval->rcv_next;
   2241 				rctl_val_list_reset(rctl->rc_cursor);
   2242 				RCTLOP_SET(rctl, p, &e, rctl_model_value(
   2243 				    rctl->rc_dict_entry, p,
   2244 				    rctl->rc_cursor->rcv_value));
   2245 			}
   2246 			(void) rctl_val_list_delete(&rctl->rc_values, rval);
   2247 		}
   2248 
   2249 		rval = next;
   2250 	}
   2251 
   2252 	rval_priv->rcv_privilege = RCPRIV_PRIVILEGED;
   2253 	rval_priv->rcv_flagaction = flagaction;
   2254 	if (rlp64->rlim_max == RLIM64_INFINITY) {
   2255 		rval_priv->rcv_flagaction |= RCTL_LOCAL_MAXIMAL;
   2256 		max = rctl->rc_dict_entry->rcd_max_native;
   2257 	} else {
   2258 		max = rlp64->rlim_max;
   2259 	}
   2260 	rval_priv->rcv_value = max;
   2261 	rval_priv->rcv_action_signal = signal;
   2262 	rval_priv->rcv_action_recipient = NULL;
   2263 	rval_priv->rcv_action_recip_pid = -1;
   2264 	rval_priv->rcv_firing_time = 0;
   2265 	rval_priv->rcv_prev = rval_priv->rcv_next = NULL;
   2266 
   2267 	(void) rctl_val_list_insert(&rctl->rc_values, rval_priv);
   2268 	rctl->rc_cursor = rval_priv;
   2269 	rctl_val_list_reset(rctl->rc_cursor);
   2270 	RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p,
   2271 	    rctl->rc_cursor->rcv_value));
   2272 
   2273 	if (rlp64->rlim_cur != RLIM64_INFINITY && rlp64->rlim_cur < max) {
   2274 		rval_basic = rctl_gp_detach_val(ragp);
   2275 
   2276 		rval_basic->rcv_privilege = RCPRIV_BASIC;
   2277 		rval_basic->rcv_value = rlp64->rlim_cur;
   2278 		rval_basic->rcv_flagaction = flagaction;
   2279 		rval_basic->rcv_action_signal = signal;
   2280 		rval_basic->rcv_action_recipient = p;
   2281 		rval_basic->rcv_action_recip_pid = p->p_pid;
   2282 		rval_basic->rcv_firing_time = 0;
   2283 		rval_basic->rcv_prev = rval_basic->rcv_next = NULL;
   2284 
   2285 		(void) rctl_val_list_insert(&rctl->rc_values, rval_basic);
   2286 		rctl->rc_cursor = rval_basic;
   2287 		rctl_val_list_reset(rctl->rc_cursor);
   2288 		RCTLOP_SET(rctl, p, &e, rctl_model_value(rctl->rc_dict_entry, p,
   2289 		    rctl->rc_cursor->rcv_value));
   2290 	}
   2291 
   2292 	ASSERT(rctl->rc_cursor != NULL);
   2293 
   2294 	mutex_exit(&rset->rcs_lock);
   2295 	return (0);
   2296 }
   2297 
   2298 
   2299 /*
   2300  * rctl_hndl_t rctl_register(const char *, rctl_entity_t, int, rlim64_t,
   2301  *   rlim64_t, rctl_ops_t *)
   2302  *
   2303  * Overview
   2304  *   rctl_register() performs a look-up in the dictionary of rctls
   2305  *   active on the system; if a rctl of that name is absent, an entry is
   2306  *   made into the dictionary.  The rctl is returned with its reference
   2307  *   count incremented by one.  If the rctl name already exists, we panic.
   2308  *   (Were the resource control system to support dynamic loading and unloading,
   2309  *   which it is structured for, duplicate registration should lead to load
   2310  *   failure instead of panicking.)
   2311  *
   2312  *   Each registered rctl has a requirement that a RCPRIV_SYSTEM limit be
   2313  *   defined.  This limit contains the highest possible value for this quantity
   2314  *   on the system.  Furthermore, the registered control must provide infinite
   2315  *   values for all applicable address space models supported by the operating
   2316  *   system.  Attempts to set resource control values beyond the system limit
   2317  *   will fail.
   2318  *
   2319  * Return values
   2320  *   The rctl's ID.
   2321  *
   2322  * Caller's context
   2323  *   Caller must be in a context suitable for KM_SLEEP allocations.
   2324  */
   2325 rctl_hndl_t
   2326 rctl_register(
   2327     const char *name,
   2328     rctl_entity_t entity,
   2329     int global_flags,
   2330     rlim64_t max_native,
   2331     rlim64_t max_ilp32,
   2332     rctl_ops_t *ops)
   2333 {
   2334 	rctl_t *rctl = kmem_cache_alloc(rctl_cache, KM_SLEEP);
   2335 	rctl_val_t *rctl_val = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
   2336 	rctl_dict_entry_t *rctl_de = kmem_zalloc(sizeof (rctl_dict_entry_t),
   2337 	    KM_SLEEP);
   2338 	rctl_t *old_rctl;
   2339 	rctl_hndl_t rhndl;
   2340 	int localflags;
   2341 
   2342 	ASSERT(ops != NULL);
   2343 
   2344 	bzero(rctl, sizeof (rctl_t));
   2345 	bzero(rctl_val, sizeof (rctl_val_t));
   2346 
   2347 	if (global_flags & RCTL_GLOBAL_DENY_NEVER)
   2348 		localflags = RCTL_LOCAL_MAXIMAL;
   2349 	else
   2350 		localflags = RCTL_LOCAL_MAXIMAL | RCTL_LOCAL_DENY;
   2351 
   2352 	rctl_val->rcv_privilege = RCPRIV_SYSTEM;
   2353 	rctl_val->rcv_value = max_native;
   2354 	rctl_val->rcv_flagaction = localflags;
   2355 	rctl_val->rcv_action_signal = 0;
   2356 	rctl_val->rcv_action_recipient = NULL;
   2357 	rctl_val->rcv_action_recip_pid = -1;
   2358 	rctl_val->rcv_firing_time = 0;
   2359 	rctl_val->rcv_next = NULL;
   2360 	rctl_val->rcv_prev = NULL;
   2361 
   2362 	rctl_de->rcd_name = (char *)name;
   2363 	rctl_de->rcd_default_value = rctl_val;
   2364 	rctl_de->rcd_max_native = max_native;
   2365 	rctl_de->rcd_max_ilp32 = max_ilp32;
   2366 	rctl_de->rcd_entity = entity;
   2367 	rctl_de->rcd_ops = ops;
   2368 	rctl_de->rcd_flagaction = global_flags;
   2369 
   2370 	rctl->rc_dict_entry = rctl_de;
   2371 	rctl->rc_values = rctl_val;
   2372 
   2373 	/*
   2374 	 * 1.  Take global lock, validate nonexistence of name, get ID.
   2375 	 */
   2376 	mutex_enter(&rctl_dict_lock);
   2377 
   2378 	if (mod_hash_find(rctl_dict_by_name, (mod_hash_key_t)name,
   2379 	    (mod_hash_val_t *)&rhndl) != MH_ERR_NOTFOUND)
   2380 		panic("duplicate registration of rctl %s", name);
   2381 
   2382 	rhndl = rctl_de->rcd_id = rctl->rc_id =
   2383 	    (rctl_hndl_t)id_alloc(rctl_ids);
   2384 
   2385 	/*
   2386 	 * 2.  Insert name-entry pair in rctl_dict_by_name.
   2387 	 */
   2388 	if (mod_hash_insert(rctl_dict_by_name, (mod_hash_key_t)name,
   2389 	    (mod_hash_val_t)rctl_de))
   2390 		panic("unable to insert rctl dict entry for %s (%u)", name,
   2391 		    (uint_t)rctl->rc_id);
   2392 
   2393 	/*
   2394 	 * 3.  Insert ID-rctl_t * pair in rctl_dict.
   2395 	 */
   2396 	if (mod_hash_find(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id,
   2397 	    (mod_hash_val_t *)&old_rctl) != MH_ERR_NOTFOUND)
   2398 		panic("duplicate rctl ID %u registered", rctl->rc_id);
   2399 
   2400 	if (mod_hash_insert(rctl_dict, (mod_hash_key_t)(uintptr_t)rctl->rc_id,
   2401 	    (mod_hash_val_t)rctl))
   2402 		panic("unable to insert rctl %s/%u (%p)", name,
   2403 		    (uint_t)rctl->rc_id, (void *)rctl);
   2404 
   2405 	/*
   2406 	 * 3a. Insert rctl_dict_entry_t * in appropriate entity list.
   2407 	 */
   2408 
   2409 	mutex_enter(&rctl_lists_lock);
   2410 
   2411 	switch (entity) {
   2412 	case RCENTITY_ZONE:
   2413 	case RCENTITY_PROJECT:
   2414 	case RCENTITY_TASK:
   2415 	case RCENTITY_PROCESS:
   2416 		rctl_de->rcd_next = rctl_lists[entity];
   2417 		rctl_lists[entity] = rctl_de;
   2418 		break;
   2419 	default:
   2420 		panic("registering unknown rctl entity %d (%s)", entity,
   2421 		    name);
   2422 		break;
   2423 	}
   2424 
   2425 	mutex_exit(&rctl_lists_lock);
   2426 
   2427 	/*
   2428 	 * 4.  Drop lock.
   2429 	 */
   2430 	mutex_exit(&rctl_dict_lock);
   2431 
   2432 	return (rhndl);
   2433 }
   2434 
   2435 /*
   2436  * static int rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p,
   2437  *    rctl_val_t *v)
   2438  *
   2439  * Overview
   2440  *   rctl_global_action() takes, in according with the flags on the rctl_dict
   2441  *   entry for the given control, the appropriate actions on the exceeded
   2442  *   control value.  Additionally, rctl_global_action() updates the firing time
   2443  *   on the exceeded value.
   2444  *
   2445  * Return values
   2446  *   A bitmask reflecting the actions actually taken.
   2447  *
   2448  * Caller's context
   2449  *   No restrictions on context.
   2450  */
   2451 /*ARGSUSED*/
   2452 static int
   2453 rctl_global_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v)
   2454 {
   2455 	rctl_dict_entry_t *rde = r->rc_dict_entry;
   2456 	const char *pr, *en, *idstr;
   2457 	id_t id;
   2458 	enum {
   2459 		SUFFIX_NONE,	/* id consumed directly */
   2460 		SUFFIX_NUMERIC,	/* id consumed in suffix */
   2461 		SUFFIX_STRING	/* idstr consumed in suffix */
   2462 	} suffix = SUFFIX_NONE;
   2463 	int ret = 0;
   2464 
   2465 	v->rcv_firing_time = gethrtime();
   2466 
   2467 	switch (v->rcv_privilege) {
   2468 	case RCPRIV_BASIC:
   2469 		pr = "basic";
   2470 		break;
   2471 	case RCPRIV_PRIVILEGED:
   2472 		pr = "privileged";
   2473 		break;
   2474 	case RCPRIV_SYSTEM:
   2475 		pr = "system";
   2476 		break;
   2477 	default:
   2478 		pr = "unknown";
   2479 		break;
   2480 	}
   2481 
   2482 	switch (rde->rcd_entity) {
   2483 	case RCENTITY_PROCESS:
   2484 		en = "process";
   2485 		id = p->p_pid;
   2486 		suffix = SUFFIX_NONE;
   2487 		break;
   2488 	case RCENTITY_TASK:
   2489 		en = "task";
   2490 		id = p->p_task->tk_tkid;
   2491 		suffix = SUFFIX_NUMERIC;
   2492 		break;
   2493 	case RCENTITY_PROJECT:
   2494 		en = "project";
   2495 		id = p->p_task->tk_proj->kpj_id;
   2496 		suffix = SUFFIX_NUMERIC;
   2497 		break;
   2498 	case RCENTITY_ZONE:
   2499 		en = "zone";
   2500 		idstr = p->p_zone->zone_name;
   2501 		suffix = SUFFIX_STRING;
   2502 		break;
   2503 	default:
   2504 		en = "unknown entity associated with process";
   2505 		id = p->p_pid;
   2506 		suffix = SUFFIX_NONE;
   2507 		break;
   2508 	}
   2509 
   2510 	if (rde->rcd_flagaction & RCTL_GLOBAL_SYSLOG) {
   2511 		switch (suffix) {
   2512 		default:
   2513 		case SUFFIX_NONE:
   2514 			(void) strlog(0, 0, 0,
   2515 			    rde->rcd_strlog_flags | log_global.lz_active,
   2516 			    "%s rctl %s (value %llu) exceeded by %s %d.",
   2517 			    pr, rde->rcd_name, v->rcv_value, en, id);
   2518 			break;
   2519 		case SUFFIX_NUMERIC:
   2520 			(void) strlog(0, 0, 0,
   2521 			    rde->rcd_strlog_flags | log_global.lz_active,
   2522 			    "%s rctl %s (value %llu) exceeded by process %d"
   2523 			    " in %s %d.",
   2524 			    pr, rde->rcd_name, v->rcv_value, p->p_pid,
   2525 			    en, id);
   2526 			break;
   2527 		case SUFFIX_STRING:
   2528 			(void) strlog(0, 0, 0,
   2529 			    rde->rcd_strlog_flags | log_global.lz_active,
   2530 			    "%s rctl %s (value %llu) exceeded by process %d"
   2531 			    " in %s %s.",
   2532 			    pr, rde->rcd_name, v->rcv_value, p->p_pid,
   2533 			    en, idstr);
   2534 			break;
   2535 		}
   2536 	}
   2537 
   2538 	if (rde->rcd_flagaction & RCTL_GLOBAL_DENY_ALWAYS)
   2539 		ret |= RCT_DENY;
   2540 
   2541 	return (ret);
   2542 }
   2543 
   2544 static int
   2545 rctl_local_action(rctl_t *r, rctl_set_t *rset, struct proc *p, rctl_val_t *v,
   2546     uint_t safety)
   2547 {
   2548 	int ret = 0;
   2549 	sigqueue_t *sqp = NULL;
   2550 	rctl_dict_entry_t *rde = r->rc_dict_entry;
   2551 	int unobservable = (rde->rcd_flagaction & RCTL_GLOBAL_UNOBSERVABLE);
   2552 
   2553 	proc_t *recipient = v->rcv_action_recipient;
   2554 	id_t recip_pid = v->rcv_action_recip_pid;
   2555 	int recip_signal = v->rcv_action_signal;
   2556 	uint_t flagaction = v->rcv_flagaction;
   2557 
   2558 	if (safety == RCA_UNSAFE_ALL) {
   2559 		if (flagaction & RCTL_LOCAL_DENY) {
   2560 			ret |= RCT_DENY;
   2561 		}
   2562 		return (ret);
   2563 	}
   2564 
   2565 	if (flagaction & RCTL_LOCAL_SIGNAL) {
   2566 		/*
   2567 		 * We can build a siginfo only in the case that it is
   2568 		 * safe for us to drop p_lock.  (For asynchronous
   2569 		 * checks this is currently not true.)
   2570 		 */
   2571 		if (safety == RCA_SAFE) {
   2572 			mutex_exit(&rset->rcs_lock);
   2573 			mutex_exit(&p->p_lock);
   2574 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
   2575 			mutex_enter(&p->p_lock);
   2576 			mutex_enter(&rset->rcs_lock);
   2577 
   2578 			sqp->sq_info.si_signo = recip_signal;
   2579 			sqp->sq_info.si_code = SI_RCTL;
   2580 			sqp->sq_info.si_errno = 0;
   2581 			sqp->sq_info.si_entity = (int)rde->rcd_entity;
   2582 		}
   2583 
   2584 		if (recipient == NULL || recipient == p) {
   2585 			ret |= RCT_SIGNAL;
   2586 
   2587 			if (sqp == NULL) {
   2588 				sigtoproc(p, NULL, recip_signal);
   2589 			} else if (p == curproc) {
   2590 				/*
   2591 				 * Then this is a synchronous test and we can
   2592 				 * direct the signal at the violating thread.
   2593 				 */
   2594 				sigaddqa(curproc, curthread, sqp);
   2595 			} else {
   2596 				sigaddqa(p, NULL, sqp);
   2597 			}
   2598 		} else if (!unobservable) {
   2599 			proc_t *rp;
   2600 
   2601 			mutex_exit(&rset->rcs_lock);
   2602 			mutex_exit(&p->p_lock);
   2603 
   2604 			mutex_enter(&pidlock);
   2605 			if ((rp = prfind(recip_pid)) == recipient) {
   2606 				/*
   2607 				 * Recipient process is still alive, but may not
   2608 				 * be in this task or project any longer.  In
   2609 				 * this case, the recipient's resource control
   2610 				 * set pertinent to this control will have
   2611 				 * changed--and we will not deliver the signal,
   2612 				 * as the recipient process is trying to tear
   2613 				 * itself off of its former set.
   2614 				 */
   2615 				mutex_enter(&rp->p_lock);
   2616 				mutex_exit(&pidlock);
   2617 
   2618 				if (rctl_entity_obtain_rset(rde, rp) == rset) {
   2619 					ret |= RCT_SIGNAL;
   2620 
   2621 					if (sqp == NULL)
   2622 						sigtoproc(rp, NULL,
   2623 						    recip_signal);
   2624 					else
   2625 						sigaddqa(rp, NULL, sqp);
   2626 				} else if (sqp) {
   2627 					kmem_free(sqp, sizeof (sigqueue_t));
   2628 				}
   2629 				mutex_exit(&rp->p_lock);
   2630 			} else {
   2631 				mutex_exit(&pidlock);
   2632 				if (sqp)
   2633 					kmem_free(sqp, sizeof (sigqueue_t));
   2634 			}
   2635 
   2636 			mutex_enter(&p->p_lock);
   2637 			/*
   2638 			 * Since we dropped p_lock, we may no longer be in the
   2639 			 * same task or project as we were at entry.  It is thus
   2640 			 * unsafe for us to reacquire the set lock at this
   2641 			 * point; callers of rctl_local_action() must handle
   2642 			 * this possibility.
   2643 			 */
   2644 			ret |= RCT_LK_ABANDONED;
   2645 		} else if (sqp) {
   2646 			kmem_free(sqp, sizeof (sigqueue_t));
   2647 		}
   2648 	}
   2649 
   2650 	if ((flagaction & RCTL_LOCAL_DENY) &&
   2651 	    (recipient == NULL || recipient == p)) {
   2652 		ret |= RCT_DENY;
   2653 	}
   2654 
   2655 	return (ret);
   2656 }
   2657 
   2658 /*
   2659  * int rctl_action(rctl_hndl_t, rctl_set_t *, struct proc *, uint_t)
   2660  *
   2661  * Overview
   2662  *   Take the action associated with the enforced value (as defined by
   2663  *   rctl_get_enforced_value()) being exceeded or encountered.  Possibly perform
   2664  *   a restricted subset of the available actions, if circumstances dictate that
   2665  *   we cannot safely allocate memory (for a sigqueue_t) or guarantee process
   2666  *   persistence across the duration of the function (an asynchronous action).
   2667  *
   2668  * Return values
   2669  *   Actions taken, according to the rctl_test bitmask.
   2670  *
   2671  * Caller's context
   2672  *   Safe to acquire rcs_lock.
   2673  */
   2674 int
   2675 rctl_action(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p, uint_t safety)
   2676 {
   2677 	return (rctl_action_entity(hndl, rset, p, NULL, safety));
   2678 }
   2679 
   2680 int
   2681 rctl_action_entity(rctl_hndl_t hndl, rctl_set_t *rset, struct proc *p,
   2682     rctl_entity_p_t *e, uint_t safety)
   2683 {
   2684 	int ret = RCT_NONE;
   2685 	rctl_t *lrctl;
   2686 	rctl_entity_p_t e_tmp;
   2687 
   2688 rctl_action_acquire:
   2689 	mutex_enter(&rset->rcs_lock);
   2690 	if (rctl_set_find(rset, hndl, &lrctl) == -1) {
   2691 		mutex_exit(&rset->rcs_lock);
   2692 		return (ret);
   2693 	}
   2694 
   2695 	if (e == NULL) {
   2696 		rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity,
   2697 		    p, &e_tmp);
   2698 		e = &e_tmp;
   2699 	}
   2700 
   2701 	if ((ret & RCT_LK_ABANDONED) == 0) {
   2702 		ret |= rctl_global_action(lrctl, rset, p, lrctl->rc_cursor);
   2703 
   2704 		RCTLOP_ACTION(lrctl, p, e);
   2705 
   2706 		ret |= rctl_local_action(lrctl, rset, p,
   2707 		    lrctl->rc_cursor, safety);
   2708 
   2709 		if (ret & RCT_LK_ABANDONED)
   2710 			goto rctl_action_acquire;
   2711 	}
   2712 
   2713 	ret &= ~RCT_LK_ABANDONED;
   2714 
   2715 	if (!(ret & RCT_DENY) &&
   2716 	    lrctl->rc_cursor->rcv_next != NULL) {
   2717 		lrctl->rc_cursor = lrctl->rc_cursor->rcv_next;
   2718 
   2719 		RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry,
   2720 		    p, lrctl->rc_cursor->rcv_value));
   2721 
   2722 	}
   2723 	mutex_exit(&rset->rcs_lock);
   2724 
   2725 	return (ret);
   2726 }
   2727 
   2728 /*
   2729  * int rctl_test(rctl_hndl_t, rctl_set_t *, struct proc *, rctl_qty_t, uint_t)
   2730  *
   2731  * Overview
   2732  *   Increment the resource associated with the given handle, returning zero if
   2733  *   the incremented value does not exceed the threshold for the current limit
   2734  *   on the resource.
   2735  *
   2736  * Return values
   2737  *   Actions taken, according to the rctl_test bitmask.
   2738  *
   2739  * Caller's context
   2740  *   p_lock held by caller.
   2741  */
   2742 /*ARGSUSED*/
   2743 int
   2744 rctl_test(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p,
   2745     rctl_qty_t incr, uint_t flags)
   2746 {
   2747 	return (rctl_test_entity(rhndl, rset, p, NULL, incr, flags));
   2748 }
   2749 
   2750 int
   2751 rctl_test_entity(rctl_hndl_t rhndl, rctl_set_t *rset, struct proc *p,
   2752     rctl_entity_p_t *e, rctl_qty_t incr, uint_t flags)
   2753 {
   2754 	rctl_t *lrctl;
   2755 	int ret = RCT_NONE;
   2756 	rctl_entity_p_t e_tmp;
   2757 	if (p == &p0) {
   2758 		/*
   2759 		 * We don't enforce rctls on the kernel itself.
   2760 		 */
   2761 		return (ret);
   2762 	}
   2763 
   2764 rctl_test_acquire:
   2765 	ASSERT(MUTEX_HELD(&p->p_lock));
   2766 
   2767 	mutex_enter(&rset->rcs_lock);
   2768 
   2769 	/*
   2770 	 * Dereference from rctl_set.  We don't enforce newly loaded controls
   2771 	 * that haven't been set on this entity (since the only valid value is
   2772 	 * the infinite system value).
   2773 	 */
   2774 	if (rctl_set_find(rset, rhndl, &lrctl) == -1) {
   2775 		mutex_exit(&rset->rcs_lock);
   2776 		return (ret);
   2777 	}
   2778 
   2779 	/*
   2780 	 * This control is currently unenforced:  maximal value on control
   2781 	 * supporting infinitely available resource.
   2782 	 */
   2783 	if ((lrctl->rc_dict_entry->rcd_flagaction & RCTL_GLOBAL_INFINITE) &&
   2784 	    (lrctl->rc_cursor->rcv_flagaction & RCTL_LOCAL_MAXIMAL)) {
   2785 
   2786 		mutex_exit(&rset->rcs_lock);
   2787 		return (ret);
   2788 	}
   2789 
   2790 	/*
   2791 	 * If we have been called by rctl_test, look up the entity pointer
   2792 	 * from the proc pointer.
   2793 	 */
   2794 	if (e == NULL) {
   2795 		rctl_entity_obtain_entity_p(lrctl->rc_dict_entry->rcd_entity,
   2796 		    p, &e_tmp);
   2797 		e = &e_tmp;
   2798 	}
   2799 
   2800 	/*
   2801 	 * Get enforced rctl value and current usage.  Test the increment
   2802 	 * with the current usage against the enforced value--take action as
   2803 	 * necessary.
   2804 	 */
   2805 	while (RCTLOP_TEST(lrctl, p, e, lrctl->rc_cursor, incr, flags)) {
   2806 		if ((ret & RCT_LK_ABANDONED) == 0) {
   2807 			ret |= rctl_global_action(lrctl, rset, p,
   2808 			    lrctl->rc_cursor);
   2809 
   2810 			RCTLOP_ACTION(lrctl, p, e);
   2811 
   2812 			ret |= rctl_local_action(lrctl, rset, p,
   2813 			    lrctl->rc_cursor, flags);
   2814 
   2815 			if (ret & RCT_LK_ABANDONED)
   2816 				goto rctl_test_acquire;
   2817 		}
   2818 
   2819 		ret &= ~RCT_LK_ABANDONED;
   2820 
   2821 		if ((ret & RCT_DENY) == RCT_DENY ||
   2822 		    lrctl->rc_cursor->rcv_next == NULL) {
   2823 			ret |= RCT_DENY;
   2824 			break;
   2825 		}
   2826 
   2827 		lrctl->rc_cursor = lrctl->rc_cursor->rcv_next;
   2828 		RCTLOP_SET(lrctl, p, e, rctl_model_value(lrctl->rc_dict_entry,
   2829 		    p, lrctl->rc_cursor->rcv_value));
   2830 	}
   2831 
   2832 	mutex_exit(&rset->rcs_lock);
   2833 
   2834 	return (ret);
   2835 }
   2836 
   2837 /*
   2838  * void rctl_init(void)
   2839  *
   2840  * Overview
   2841  *   Initialize the rctl subsystem, including the primoridal rctls
   2842  *   provided by the system.  New subsystem-specific rctls should _not_ be
   2843  *   initialized here.  (Do it in your own file.)
   2844  *
   2845  * Return values
   2846  *   None.
   2847  *
   2848  * Caller's context
   2849  *   Safe for KM_SLEEP allocations.  Must be called prior to any process model
   2850  *   initialization.
   2851  */
   2852 void
   2853 rctl_init(void)
   2854 {
   2855 	rctl_cache = kmem_cache_create("rctl_cache", sizeof (rctl_t),
   2856 	    0, NULL, NULL, NULL, NULL, NULL, 0);
   2857 	rctl_val_cache = kmem_cache_create("rctl_val_cache",
   2858 	    sizeof (rctl_val_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
   2859 
   2860 	rctl_dict = mod_hash_create_extended("rctl_dict",
   2861 	    rctl_dict_size, mod_hash_null_keydtor, rctl_dict_val_dtor,
   2862 	    rctl_dict_hash_by_id, NULL, rctl_dict_id_cmp, KM_SLEEP);
   2863 	rctl_dict_by_name = mod_hash_create_strhash(
   2864 	    "rctl_handles_by_name", rctl_dict_size,
   2865 	    mod_hash_null_valdtor);
   2866 	rctl_ids = id_space_create("rctl_ids", 1, max_rctl_hndl);
   2867 	bzero(rctl_lists, (RC_MAX_ENTITY + 1) * sizeof (rctl_dict_entry_t *));
   2868 
   2869 	rctlproc_init();
   2870 }
   2871 
   2872 /*
   2873  * rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
   2874  *     int chargeproc)
   2875  *
   2876  * Increments the amount of locked memory on a project, and
   2877  * zone. If proj is non-NULL the project must be held by the
   2878  * caller; if it is NULL the proj and zone of proc_t p are used.
   2879  * If chargeproc is non-zero, then the charged amount is cached
   2880  * on p->p_locked_mem so that the charge can be migrated when a
   2881  * process changes projects.
   2882  *
   2883  * Return values
   2884  *    0 - success
   2885  *    EAGAIN - attempting to increment locked memory is denied by one
   2886  *      or more resource entities.
   2887  */
   2888 int
   2889 rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
   2890     int chargeproc)
   2891 {
   2892 	kproject_t *projp;
   2893 	zone_t *zonep;
   2894 	rctl_entity_p_t e;
   2895 	int ret = 0;
   2896 
   2897 	ASSERT(p != NULL);
   2898 	ASSERT(MUTEX_HELD(&p->p_lock));
   2899 	if (proj != NULL) {
   2900 		projp = proj;
   2901 		zonep = proj->kpj_zone;
   2902 	} else {
   2903 		projp = p->p_task->tk_proj;
   2904 		zonep = p->p_zone;
   2905 	}
   2906 
   2907 	mutex_enter(&zonep->zone_mem_lock);
   2908 
   2909 	e.rcep_p.proj = projp;
   2910 	e.rcep_t = RCENTITY_PROJECT;
   2911 
   2912 	/* check for overflow */
   2913 	if ((projp->kpj_data.kpd_locked_mem + inc) <
   2914 	    projp->kpj_data.kpd_locked_mem) {
   2915 		ret = EAGAIN;
   2916 		goto out;
   2917 	}
   2918 	if (projp->kpj_data.kpd_locked_mem + inc >
   2919 	    projp->kpj_data.kpd_locked_mem_ctl) {
   2920 		if (rctl_test_entity(rc_project_locked_mem, projp->kpj_rctls,
   2921 		    p, &e, inc, 0) & RCT_DENY) {
   2922 			ret = EAGAIN;
   2923 			goto out;
   2924 		}
   2925 	}
   2926 	e.rcep_p.zone = zonep;
   2927 	e.rcep_t = RCENTITY_ZONE;
   2928 
   2929 	/* Check for overflow */
   2930 	if ((zonep->zone_locked_mem + inc) < zonep->zone_locked_mem) {
   2931 		ret = EAGAIN;
   2932 		goto out;
   2933 	}
   2934 	if (zonep->zone_locked_mem + inc > zonep->zone_locked_mem_ctl) {
   2935 		if (rctl_test_entity(rc_zone_locked_mem, zonep->zone_rctls,
   2936 		    p, &e, inc, 0) & RCT_DENY) {
   2937 			ret = EAGAIN;
   2938 			goto out;
   2939 		}
   2940 	}
   2941 
   2942 	zonep->zone_locked_mem += inc;
   2943 	projp->kpj_data.kpd_locked_mem += inc;
   2944 	if (chargeproc != 0) {
   2945 		p->p_locked_mem += inc;
   2946 	}
   2947 out:
   2948 	mutex_exit(&zonep->zone_mem_lock);
   2949 	return (ret);
   2950 }
   2951 
   2952 /*
   2953  * rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
   2954  *     int creditproc)
   2955  *
   2956  * Decrements the amount of locked memory on a project and
   2957  * zone.  If proj is non-NULL the project must be held by the
   2958  * caller; if it is NULL the proj and zone of proc_t p are used.
   2959  * If creditproc is non-zero, then the quantity of locked memory
   2960  * is subtracted from p->p_locked_mem.
   2961  *
   2962  * Return values
   2963  *   none
   2964  */
   2965 void
   2966 rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
   2967     int creditproc)
   2968 {
   2969 	kproject_t *projp;
   2970 	zone_t *zonep;
   2971 
   2972 	if (proj != NULL) {
   2973 		projp = proj;
   2974 		zonep = proj->kpj_zone;
   2975 	} else {
   2976 		ASSERT(p != NULL);
   2977 		ASSERT(MUTEX_HELD(&p->p_lock));
   2978 		projp = p->p_task->tk_proj;
   2979 		zonep = p->p_zone;
   2980 	}
   2981 
   2982 	mutex_enter(&zonep->zone_mem_lock);
   2983 	zonep->zone_locked_mem -= inc;
   2984 	projp->kpj_data.kpd_locked_mem -= inc;
   2985 	if (creditproc != 0) {
   2986 		ASSERT(p != NULL);
   2987 		ASSERT(MUTEX_HELD(&p->p_lock));
   2988 		p->p_locked_mem -= inc;
   2989 	}
   2990 	mutex_exit(&zonep->zone_mem_lock);
   2991 }
   2992 
   2993 /*
   2994  * rctl_incr_swap(proc_t *, zone_t *, size_t)
   2995  *
   2996  * Overview
   2997  *   Increments the swap charge on the specified zone.
   2998  *
   2999  * Return values
   3000  *   0 on success.  EAGAIN if swap increment fails due an rctl value
   3001  *   on the zone.
   3002  *
   3003  * Callers context
   3004  *   p_lock held on specified proc.
   3005  *   swap must be even multiple of PAGESIZE
   3006  */
   3007 int
   3008 rctl_incr_swap(proc_t *proc, zone_t *zone, size_t swap)
   3009 {
   3010 	rctl_entity_p_t e;
   3011 
   3012 	ASSERT(MUTEX_HELD(&proc->p_lock));
   3013 	ASSERT((swap & PAGEOFFSET) == 0);
   3014 	e.rcep_p.zone = zone;
   3015 	e.rcep_t = RCENTITY_ZONE;
   3016 
   3017 	mutex_enter(&zone->zone_mem_lock);
   3018 
   3019 	/* Check for overflow */
   3020 	if ((zone->zone_max_swap + swap) < zone->zone_max_swap) {
   3021 		mutex_exit(&zone->zone_mem_lock);
   3022 		return (EAGAIN);
   3023 	}
   3024 	if ((zone->zone_max_swap + swap) >
   3025 	    zone->zone_max_swap_ctl) {
   3026 
   3027 		if (rctl_test_entity(rc_zone_max_swap, zone->zone_rctls,
   3028 		    proc, &e, swap, 0) & RCT_DENY) {
   3029 			mutex_exit(&zone->zone_mem_lock);
   3030 			return (EAGAIN);
   3031 		}
   3032 	}
   3033 	zone->zone_max_swap += swap;
   3034 	mutex_exit(&zone->zone_mem_lock);
   3035 	return (0);
   3036 }
   3037 
   3038 /*
   3039  * rctl_decr_swap(zone_t *, size_t)
   3040  *
   3041  * Overview
   3042  *   Decrements the swap charge on the specified zone.
   3043  *
   3044  * Return values
   3045  *   None
   3046  *
   3047  * Callers context
   3048  *   swap must be even multiple of PAGESIZE
   3049  */
   3050 void
   3051 rctl_decr_swap(zone_t *zone, size_t swap)
   3052 {
   3053 	ASSERT((swap & PAGEOFFSET) == 0);
   3054 	mutex_enter(&zone->zone_mem_lock);
   3055 	ASSERT(zone->zone_max_swap >= swap);
   3056 	zone->zone_max_swap -= swap;
   3057 	mutex_exit(&zone->zone_mem_lock);
   3058 }
   3059 
   3060 /*
   3061  * Create resource kstat
   3062  */
   3063 static kstat_t *
   3064 rctl_kstat_create_common(char *ks_name, int ks_instance, char *ks_class,
   3065     uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, int ks_zoneid)
   3066 {
   3067 	kstat_t *ksp = NULL;
   3068 	char name[KSTAT_STRLEN];
   3069 
   3070 	(void) snprintf(name, KSTAT_STRLEN, "%s_%d", ks_name, ks_instance);
   3071 
   3072 	if ((ksp = kstat_create_zone("caps", ks_zoneid,
   3073 	    name, ks_class, ks_type,
   3074 	    ks_ndata, ks_flags, ks_zoneid)) != NULL) {
   3075 		if (ks_zoneid != GLOBAL_ZONEID)
   3076 			kstat_zone_add(ksp, GLOBAL_ZONEID);
   3077 	}
   3078 	return (ksp);
   3079 }
   3080 
   3081 /*
   3082  * Create zone-specific resource kstat
   3083  */
   3084 kstat_t *
   3085 rctl_kstat_create_zone(zone_t *zone, char *ks_name, uchar_t ks_type,
   3086     uint_t ks_ndata, uchar_t ks_flags)
   3087 {
   3088 	char name[KSTAT_STRLEN];
   3089 
   3090 	(void) snprintf(name, KSTAT_STRLEN, "%s_zone", ks_name);
   3091 
   3092 	return (rctl_kstat_create_common(name, zone->zone_id, "zone_caps",
   3093 	    ks_type, ks_ndata, ks_flags, zone->zone_id));
   3094 }
   3095 
   3096 /*
   3097  * Create project-specific resource kstat
   3098  */
   3099 kstat_t *
   3100 rctl_kstat_create_project(kproject_t *kpj, char *ks_name, uchar_t ks_type,
   3101     uint_t ks_ndata, uchar_t ks_flags)
   3102 {
   3103 	char name[KSTAT_STRLEN];
   3104 
   3105 	(void) snprintf(name, KSTAT_STRLEN, "%s_project", ks_name);
   3106 
   3107 	return (rctl_kstat_create_common(name, kpj->kpj_id, "project_caps",
   3108 	    ks_type, ks_ndata, ks_flags, kpj->kpj_zoneid));
   3109 }
   3110