Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 
     30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     31 
     32 /*
     33  * Common Inter-Process Communication routines.
     34  *
     35  * Overview
     36  * --------
     37  *
     38  * The System V inter-process communication (IPC) facilities provide
     39  * three services, message queues, semaphore arrays, and shared memory
     40  * segments, which are mananged using filesystem-like namespaces.
     41  * Unlike a filesystem, these namespaces aren't mounted and accessible
     42  * via a path -- a special API is used to interact with the different
     43  * facilities (nothing precludes a VFS-based interface, but the
     44  * standards require the special APIs).  Furthermore, these special
     45  * APIs don't use file descriptors, nor do they have an equivalent.
     46  * This means that every operation which acts on an object needs to
     47  * perform the quivalent of a lookup, which in turn means that every
     48  * operation can fail if the specified object doesn't exist in the
     49  * facility's namespace.
     50  *
     51  * Objects
     52  * -------
     53  *
     54  * Each object in a namespace has a unique ID, which is assigned by the
     55  * system and is used to identify the object when performing operations
     56  * on it.  An object can also have a key, which is selected by the user
     57  * at allocation time and is used as a primitive rendezvous mechanism.
     58  * An object without a key is said to have a "private" key.
     59  *
     60  * To perform an operation on an object given its key, one must first
     61  * perform a lookup and obtain its ID.  The ID is then used to identify
     62  * the object when performing the operation.  If the object has a
     63  * private key, the ID must be known or obtained by other means.
     64  *
     65  * Each object in the namespace has a creator uid and gid, as well as
     66  * an owner uid and gid.  Both are initialized with the ruid and rgid
     67  * of the process which created the object.  The creator or current
     68  * owner has the ability to change the owner of the object.
     69  *
     70  * Each object in the namespace has a set of file-like permissions,
     71  * which, in conjunction with the creator and owner uid and gid,
     72  * control read and write access to the object (execute is ignored).
     73  *
     74  * Each object also has a creator project and zone, which are used to
     75  * account for its resource usage.
     76  *
     77  * Operations
     78  * ----------
     79  *
     80  * There are five operations which all three facilities have in
     81  * common: GET, SET, STAT, RMID, and IDS.
     82  *
     83  * GET, like open, is used to allocate a new object or obtain an
     84  * existing one (using its key).  It takes a key, a set of flags and
     85  * mode bits, and optionally facility-specific arguments.  If the key
     86  * is IPC_PRIVATE, a new object with the requested mode bits and
     87  * facility-specific attributes is created.  If the key isn't
     88  * IPC_PRIVATE, the GET will attempt to look up the specified key and
     89  * either return that or create a new key depending on the state of the
     90  * IPC_CREAT and IPC_EXCL flags, much like open.  If GET needs to
     91  * allocate an object, it can fail if there is insufficient space in
     92  * the namespace (the maximum number of ids for the facility has been
     93  * exceeded) or if the facility-specific initialization fails.  If GET
     94  * finds an object it can return, it can still fail if that object's
     95  * permissions or facility-specific attributes are less than those
     96  * requested.
     97  *
     98  * SET is used to adjust facility-specific parameters of an object, in
     99  * addition to the owner uid and gid, and mode bits.  It can fail if
    100  * the caller isn't the creator or owner.
    101  *
    102  * STAT is used to obtain information about an object including the
    103  * general attributes object described as well as facility-specific
    104  * information.  It can fail if the caller doesn't have read
    105  * permission.
    106  *
    107  * RMID removes an object from the namespace.  Subsequent operations
    108  * using the object's ID or key will fail (until another object is
    109  * created with the same key or ID).  Since an RMID may be performed
    110  * asynchronously with other operations, it is possible that other
    111  * threads and/or processes will have references to the object.  While
    112  * a facility may have actions which need to be performed at RMID time,
    113  * only when all references are dropped can the object be destroyed.
    114  * RMID will fail if the caller isn't the creator or owner.
    115  *
    116  * IDS obtains a list of all IDs in a facility's namespace.  There are
    117  * no facility-specific behaviors of IDS.
    118  *
    119  * Design
    120  * ------
    121  *
    122  * Because some IPC facilities provide services whose operations must
    123  * scale, a mechanism which allows fast, concurrent access to
    124  * individual objects is needed.  Of primary importance is object
    125  * lookup based on ID (SET, STAT, others).  Allocation (GET),
    126  * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
    127  * lesser concerns, but should be implemented in such a way that ID
    128  * lookup isn't affected (at least not in the common case).
    129  *
    130  * Starting from the bottom up, each object is represented by a
    131  * structure, the first member of which must be a kipc_perm_t.  The
    132  * kipc_perm_t contains the information described above in "Objects", a
    133  * reference count (since the object may continue to exist after it has
    134  * been removed from the namespace), as well as some additional
    135  * metadata used to manage data structure membership.  These objects
    136  * are dynamically allocated.
    137  *
    138  * Above the objects is a power-of-two sized table of ID slots.  Each
    139  * slot contains a pointer to an object, a sequence number, and a
    140  * lock.  An object's ID is a function of its slot's index in the table
    141  * and its slot's sequence number.  Every time a slot is released (via
    142  * RMID) its sequence number is increased.  Strictly speaking, the
    143  * sequence number is unnecessary.  However, checking the sequence
    144  * number after a lookup provides a certain degree of robustness
    145  * against the use of stale IDs (useful since nothing else does).  When
    146  * the table fills up, it is resized (see Locking, below).
    147  *
    148  * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
    149  * int) the top IPC_SEQ_BITS are used for the sequence number with the
    150  * remainder holding the index into the table.  The size of the table
    151  * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
    152  *
    153  * Managing this table is the ipc_service structure.  It contains a
    154  * pointer to the dynamically allocated ID table, a namespace-global
    155  * lock, an id_space for managing the free space in the table, and
    156  * sundry other metadata necessary for the maintenance of the
    157  * namespace.  An AVL tree of all keyed objects in the table (sorted by
    158  * key) is used for key lookups.  An unordered doubly linked list of
    159  * all objects in the namespace (keyed or not) is maintained to
    160  * facilitate ID enumeration.
    161  *
    162  * To help visualize these relationships, here's a picture of a
    163  * namespace with a table of size 8 containing three objects
    164  * (IPC_SEQ_BITS = 28):
    165  *
    166  *
    167  * +-ipc_service_t--+
    168  * | table          *---\
    169  * | keys           *---+----------------------\
    170  * | all ids        *--\|                      |
    171  * |                |  ||                      |
    172  * +----------------+  ||                      |
    173  *                     ||                      |
    174  * /-------------------/|                      |
    175  * |    /---------------/                      |
    176  * |    |                                      |
    177  * |    v                                      |
    178  * |  +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
    179  * |  | Seq=3  |        |        | Seq=1  |    :   |        |        | Seq=6  |
    180  * |  |        |        |        |        |    :   |        |        |        |
    181  * |  +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
    182  * |    |                          |           |                       |
    183  * |    |                      /---/           |      /----------------/
    184  * |    |                      |               |      |
    185  * |    v                      v               |      v
    186  * |  +-kipc_perm_t-+        +-kipc_perm_t-+   |    +-kipc_perm_t-+
    187  * |  | id=0x30     |        | id=0x13     |   |    | id=0x67     |
    188  * |  | key=0xfeed  |        | key=0xbeef  |   |    | key=0xcafe  |
    189  * \->| [list]      |<------>| [list]      |<------>| [list]      |
    190  * /->| [avl left]  x   /--->| [avl left]  x   \--->| [avl left]  *---\
    191  * |  | [avl right] x   |    | [avl right] x        | [avl right] *---+-\
    192  * |  |             |   |    |             |        |             |   | |
    193  * |  +-------------+   |    +-------------+        +-------------+   | |
    194  * |                    \---------------------------------------------/ |
    195  * \--------------------------------------------------------------------/
    196  *
    197  * Locking
    198  * -------
    199  *
    200  * There are three locks (or sets of locks) which are used to ensure
    201  * correctness: the slot locks, the namespace lock, and p_lock (needed
    202  * when checking resource controls).  Their ordering is
    203  *
    204  *   namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
    205  *
    206  * Generally speaking, the namespace lock is used to protect allocation
    207  * and removal from the namespace, ID enumeration, and resizing the ID
    208  * table.  Specifically:
    209  *
    210  * - write access to all fields of the ipc_service structure
    211  * - read access to all variable fields of ipc_service except
    212  *   ipcs_tabsz (table size) and ipcs_table (the table pointer)
    213  * - read/write access to ipc_avl, ipc_list in visible objects'
    214  *   kipc_perm structures (i.e. objects which have been removed from
    215  *   the namespace don't have this restriction)
    216  * - write access to ipct_seq and ipct_data in the table entries
    217  *
    218  * A slot lock by itself is meaningless (except when resizing).  Of
    219  * greater interest conceptually is the notion of an ID lock -- a
    220  * "virtual lock" which refers to whichever slot lock an object's ID
    221  * currently hashes to.
    222  *
    223  * An ID lock protects all objects with that ID.  Normally there will
    224  * only be one such object: the one pointed to by the locked slot.
    225  * However, if an object is removed from the namespace but retains
    226  * references (e.g. an attached shared memory segment which has been
    227  * RMIDed), it continues to use the lock associated with its original
    228  * ID.  While this can result in increased contention, operations which
    229  * require taking the ID lock of removed objects are infrequent.
    230  *
    231  * Specifically, an ID lock protects the contents of an object's
    232  * structure, including the contents of the embedded kipc_perm
    233  * structure (but excluding those fields protected by the namespace
    234  * lock).  It also protects the ipct_seq and ipct_data fields in its
    235  * slot (it is really a slot lock, after all).
    236  *
    237  * Recall that the table is resizable.  To avoid requiring every ID
    238  * lookup to take a global lock, a scheme much like that employed for
    239  * file descriptors (see the comment above UF_ENTER in user.h) is
    240  * used.  Note that the sequence number and data pointer are protected
    241  * by both the namespace lock and their slot lock.  When the table is
    242  * resized, the following operations take place:
    243  *
    244  *   1) A new table is allocated.
    245  *   2) The global lock is taken.
    246  *   3) All old slots are locked, in order.
    247  *   4) The first half of the new slots are locked.
    248  *   5) All table entries are copied to the new table, and cleared from
    249  *	the old table.
    250  *   6) The ipc_service structure is updated to point to the new table.
    251  *   7) The ipc_service structure is updated with the new table size.
    252  *   8) All slot locks (old and new) are dropped.
    253  *
    254  * Because the slot locks are embedded in the table, ID lookups and
    255  * other operations which require taking an slot lock need to verify
    256  * that the lock taken wasn't part of a stale table.  This is
    257  * accomplished by checking the table size before and after
    258  * dereferencing the table pointer and taking the lock: if the size
    259  * changes, the lock must be dropped and reacquired.  It is this
    260  * additional work which distinguishes an ID lock from a slot lock.
    261  *
    262  * Because we can't guarantee that threads aren't accessing the old
    263  * tables' locks, they are never deallocated.  To prevent spurious
    264  * reports of memory leaks, a pointer to the discarded table is stored
    265  * in the new one in step 5.  (Theoretically ipcs_destroy will delete
    266  * the discarded tables, but it is only ever called from a failed _init
    267  * invocation; i.e. when there aren't any.)
    268  *
    269  * Interfaces
    270  * ----------
    271  *
    272  * The following interfaces are provided by the ipc module for use by
    273  * the individual IPC facilities:
    274  *
    275  * ipcperm_access
    276  *
    277  *   Given an object and a cred structure, determines if the requested
    278  *   access type is allowed.
    279  *
    280  * ipcperm_set, ipcperm_stat,
    281  * ipcperm_set64, ipcperm_stat64
    282  *
    283  *   Performs the common portion of an STAT or SET operation.  All
    284  *   (except stat and stat64) can fail, so they should be called before
    285  *   any facility-specific non-reversible changes are made to an
    286  *   object.  Similarly, the set operations have side effects, so they
    287  *   should only be called once the possibility of a facility-specific
    288  *   failure is eliminated.
    289  *
    290  * ipcs_create
    291  *
    292  *   Creates an IPC namespace for use by an IPC facility.
    293  *
    294  * ipcs_destroy
    295  *
    296  *   Destroys an IPC namespace.
    297  *
    298  * ipcs_lock, ipcs_unlock
    299  *
    300  *   Takes the namespace lock.  Ideally such access wouldn't be
    301  *   necessary, but there may be facility-specific data protected by
    302  *   this lock (e.g. project-wide resource consumption).
    303  *
    304  * ipc_lock
    305  *
    306  *   Takes the lock associated with an ID.  Can't fail.
    307  *
    308  * ipc_relock
    309  *
    310  *   Like ipc_lock, but takes a pointer to a held lock.  Drops the lock
    311  *   unless it is the one that would have been returned by ipc_lock.
    312  *   Used after calls to cv_wait.
    313  *
    314  * ipc_lookup
    315  *
    316  *   Performs an ID lookup, returns with the ID lock held.  Fails if
    317  *   the ID doesn't exist in the namespace.
    318  *
    319  * ipc_hold
    320  *
    321  *   Takes a reference on an object.
    322  *
    323  * ipc_rele
    324  *
    325  *   Releases a reference on an object, and drops the object's lock.
    326  *   Calls the object's destructor if last reference is being
    327  *   released.
    328  *
    329  * ipc_rele_locked
    330  *
    331  *   Releases a reference on an object.  Doesn't drop lock, and may
    332  *   only be called when there is more than one reference to the
    333  *   object.
    334  *
    335  * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
    336  *
    337  *   Components of a GET operation.  ipc_get performs a key lookup,
    338  *   allocating an object if the key isn't found (returning with the
    339  *   namespace lock and p_lock held), and returning the existing object
    340  *   if it is (with the object lock held).  ipc_get doesn't modify the
    341  *   namespace.
    342  *
    343  *   ipc_commit_begin begins the process of inserting an object
    344  *   allocated by ipc_get into the namespace, and can fail.  If
    345  *   successful, it returns with the namespace lock and p_lock held.
    346  *   ipc_commit_end completes the process of inserting an object into
    347  *   the namespace and can't fail.  The facility can call ipc_cleanup
    348  *   at any time following a successful ipc_get and before
    349  *   ipc_commit_end or a failed ipc_commit_begin to fail the
    350  *   allocation.  Pseudocode for the suggested GET implementation:
    351  *
    352  *   top:
    353  *
    354  *     ipc_get
    355  *
    356  *     if failure
    357  *       return
    358  *
    359  *     if found {
    360  *
    361  *	 if object meets criteria
    362  *	   unlock object and return success
    363  *       else
    364  *	   unlock object and return failure
    365  *
    366  *     } else {
    367  *
    368  *	 perform resource control tests
    369  *	 drop namespace lock, p_lock
    370  *	 if failure
    371  *	   ipc_cleanup
    372  *
    373  *       perform facility-specific initialization
    374  *	 if failure {
    375  *	   facility-specific cleanup
    376  *	   ipc_cleanup
    377  *       }
    378  *
    379  *	 ( At this point the object should be destructible using the
    380  *	   destructor given to ipcs_create )
    381  *
    382  *       ipc_commit_begin
    383  *	 if retry
    384  *	   goto top
    385  *       else if failure
    386  *         return
    387  *
    388  *       perform facility-specific resource control tests/allocations
    389  *	 if failure
    390  *	   ipc_cleanup
    391  *
    392  *	 ipc_commit_end
    393  *	 perform any infallible post-creation actions, unlock, and return
    394  *
    395  *     }
    396  *
    397  * ipc_rmid
    398  *
    399  *   Performs the common portion of an RMID operation -- looks up an ID
    400  *   removes it, and calls the a facility-specific function to do
    401  *   RMID-time cleanup on the private portions of the object.
    402  *
    403  * ipc_ids
    404  *
    405  *   Performs the common portion of an IDS operation.
    406  *
    407  */
    408 
    409 #include <sys/types.h>
    410 #include <sys/param.h>
    411 #include <sys/cred.h>
    412 #include <sys/policy.h>
    413 #include <sys/proc.h>
    414 #include <sys/user.h>
    415 #include <sys/ipc.h>
    416 #include <sys/ipc_impl.h>
    417 #include <sys/errno.h>
    418 #include <sys/systm.h>
    419 #include <sys/list.h>
    420 #include <sys/atomic.h>
    421 #include <sys/zone.h>
    422 #include <sys/task.h>
    423 #include <sys/modctl.h>
    424 
    425 #include <c2/audit.h>
    426 
    427 static struct modlmisc modlmisc = {
    428 	&mod_miscops,
    429 	"common ipc code",
    430 };
    431 
    432 static struct modlinkage modlinkage = {
    433 	MODREV_1, (void *)&modlmisc, NULL
    434 };
    435 
    436 
    437 int
    438 _init(void)
    439 {
    440 	return (mod_install(&modlinkage));
    441 }
    442 
    443 int
    444 _fini(void)
    445 {
    446 	return (mod_remove(&modlinkage));
    447 }
    448 
    449 int
    450 _info(struct modinfo *modinfop)
    451 {
    452 	return (mod_info(&modlinkage, modinfop));
    453 }
    454 
    455 
    456 /*
    457  * Check message, semaphore, or shared memory access permissions.
    458  *
    459  * This routine verifies the requested access permission for the current
    460  * process.  The zone ids are compared, and the appropriate bits are
    461  * checked corresponding to owner, group (including the list of
    462  * supplementary groups), or everyone.  Zero is returned on success.
    463  * On failure, the security policy is asked to check to override the
    464  * permissions check; the policy will either return 0 for access granted
    465  * or EACCES.
    466  *
    467  * Access to objects in other zones requires that the caller be in the
    468  * global zone and have the appropriate IPC_DAC_* privilege, regardless
    469  * of whether the uid or gid match those of the object.  Note that
    470  * cross-zone accesses will normally never get here since they'll
    471  * fail in ipc_lookup or ipc_get.
    472  *
    473  * The arguments must be set up as follows:
    474  * 	p - Pointer to permission structure to verify
    475  * 	mode - Desired access permissions
    476  */
    477 int
    478 ipcperm_access(kipc_perm_t *p, int mode, cred_t *cr)
    479 {
    480 	int shifts = 0;
    481 	uid_t uid = crgetuid(cr);
    482 	zoneid_t zoneid = getzoneid();
    483 
    484 	if (p->ipc_zoneid == zoneid) {
    485 		if (uid != p->ipc_uid && uid != p->ipc_cuid) {
    486 			shifts += 3;
    487 			if (!groupmember(p->ipc_gid, cr) &&
    488 			    !groupmember(p->ipc_cgid, cr))
    489 				shifts += 3;
    490 		}
    491 
    492 		mode &= ~(p->ipc_mode << shifts);
    493 
    494 		if (mode == 0)
    495 			return (0);
    496 	} else if (zoneid != GLOBAL_ZONEID)
    497 		return (EACCES);
    498 
    499 	return (secpolicy_ipc_access(cr, p, mode));
    500 }
    501 
    502 /*
    503  * There are two versions of the ipcperm_set/stat functions:
    504  *   ipcperm_???        - for use with IPC_SET/STAT
    505  *   ipcperm_???_64     - for use with IPC_SET64/STAT64
    506  *
    507  * These functions encapsulate the common portions (copying, permission
    508  * checks, and auditing) of the set/stat operations.  All, except for
    509  * stat and stat_64 which are void, return 0 on success or a non-zero
    510  * errno value on error.
    511  */
    512 
    513 int
    514 ipcperm_set(ipc_service_t *service, struct cred *cr,
    515     kipc_perm_t *kperm, struct ipc_perm *perm, model_t model)
    516 {
    517 	STRUCT_HANDLE(ipc_perm, lperm);
    518 	uid_t uid;
    519 	gid_t gid;
    520 	mode_t mode;
    521 	zone_t *zone;
    522 
    523 	ASSERT(IPC_LOCKED(service, kperm));
    524 
    525 	STRUCT_SET_HANDLE(lperm, model, perm);
    526 	uid = STRUCT_FGET(lperm, uid);
    527 	gid = STRUCT_FGET(lperm, gid);
    528 	mode = STRUCT_FGET(lperm, mode);
    529 
    530 	if (secpolicy_ipc_owner(cr, kperm) != 0)
    531 		return (EPERM);
    532 
    533 	zone = crgetzone(cr);
    534 	if (!VALID_UID(uid, zone) || !VALID_GID(gid, zone))
    535 		return (EINVAL);
    536 
    537 	kperm->ipc_uid = uid;
    538 	kperm->ipc_gid = gid;
    539 	kperm->ipc_mode = (mode & 0777) | (kperm->ipc_mode & ~0777);
    540 
    541 	if (audit_active)
    542 		audit_ipcget(service->ipcs_atype, kperm);
    543 
    544 	return (0);
    545 }
    546 
    547 void
    548 ipcperm_stat(struct ipc_perm *perm, kipc_perm_t *kperm, model_t model)
    549 {
    550 	STRUCT_HANDLE(ipc_perm, lperm);
    551 
    552 	STRUCT_SET_HANDLE(lperm, model, perm);
    553 	STRUCT_FSET(lperm, uid, kperm->ipc_uid);
    554 	STRUCT_FSET(lperm, gid, kperm->ipc_gid);
    555 	STRUCT_FSET(lperm, cuid, kperm->ipc_cuid);
    556 	STRUCT_FSET(lperm, cgid, kperm->ipc_cgid);
    557 	STRUCT_FSET(lperm, mode, kperm->ipc_mode);
    558 	STRUCT_FSET(lperm, seq, 0);
    559 	STRUCT_FSET(lperm, key, kperm->ipc_key);
    560 }
    561 
    562 int
    563 ipcperm_set64(ipc_service_t *service, struct cred *cr,
    564     kipc_perm_t *kperm, ipc_perm64_t *perm64)
    565 {
    566 	zone_t *zone;
    567 
    568 	ASSERT(IPC_LOCKED(service, kperm));
    569 
    570 	if (secpolicy_ipc_owner(cr, kperm) != 0)
    571 		return (EPERM);
    572 
    573 	zone = crgetzone(cr);
    574 	if (!VALID_UID(perm64->ipcx_uid, zone) ||
    575 	    !VALID_GID(perm64->ipcx_gid, zone))
    576 		return (EINVAL);
    577 
    578 	kperm->ipc_uid = perm64->ipcx_uid;
    579 	kperm->ipc_gid = perm64->ipcx_gid;
    580 	kperm->ipc_mode = (perm64->ipcx_mode & 0777) |
    581 	    (kperm->ipc_mode & ~0777);
    582 
    583 	if (audit_active)
    584 		audit_ipcget(service->ipcs_atype, kperm);
    585 
    586 	return (0);
    587 }
    588 
    589 void
    590 ipcperm_stat64(ipc_perm64_t *perm64, kipc_perm_t *kperm)
    591 {
    592 	perm64->ipcx_uid = kperm->ipc_uid;
    593 	perm64->ipcx_gid = kperm->ipc_gid;
    594 	perm64->ipcx_cuid = kperm->ipc_cuid;
    595 	perm64->ipcx_cgid = kperm->ipc_cgid;
    596 	perm64->ipcx_mode = kperm->ipc_mode;
    597 	perm64->ipcx_key = kperm->ipc_key;
    598 	perm64->ipcx_projid = kperm->ipc_proj->kpj_id;
    599 	perm64->ipcx_zoneid = kperm->ipc_zoneid;
    600 }
    601 
    602 
    603 /*
    604  * ipc key comparator.
    605  */
    606 static int
    607 ipc_key_compar(const void *a, const void *b)
    608 {
    609 	kipc_perm_t *aperm = (kipc_perm_t *)a;
    610 	kipc_perm_t *bperm = (kipc_perm_t *)b;
    611 	int ak = aperm->ipc_key;
    612 	int bk = bperm->ipc_key;
    613 	zoneid_t az;
    614 	zoneid_t bz;
    615 
    616 	ASSERT(ak != IPC_PRIVATE);
    617 	ASSERT(bk != IPC_PRIVATE);
    618 
    619 	/*
    620 	 * Compare key first, then zoneid.  This optimizes performance for
    621 	 * systems with only one zone, since the zone checks will only be
    622 	 * made when the keys match.
    623 	 */
    624 	if (ak < bk)
    625 		return (-1);
    626 	if (ak > bk)
    627 		return (1);
    628 
    629 	/* keys match */
    630 	az = aperm->ipc_zoneid;
    631 	bz = bperm->ipc_zoneid;
    632 	if (az < bz)
    633 		return (-1);
    634 	if (az > bz)
    635 		return (1);
    636 	return (0);
    637 }
    638 
    639 /*
    640  * Create an ipc service.
    641  */
    642 ipc_service_t *
    643 ipcs_create(const char *name, rctl_hndl_t proj_rctl, rctl_hndl_t zone_rctl,
    644     size_t size, ipc_func_t *dtor, ipc_func_t *rmid, int audit_type,
    645     size_t rctl_offset)
    646 {
    647 	ipc_service_t *result;
    648 
    649 	result = kmem_alloc(sizeof (ipc_service_t), KM_SLEEP);
    650 
    651 	mutex_init(&result->ipcs_lock, NULL, MUTEX_ADAPTIVE, NULL);
    652 	result->ipcs_count = 0;
    653 	avl_create(&result->ipcs_keys, ipc_key_compar, size, 0);
    654 	result->ipcs_tabsz = IPC_IDS_MIN;
    655 	result->ipcs_table =
    656 	    kmem_zalloc(IPC_IDS_MIN * sizeof (ipc_slot_t), KM_SLEEP);
    657 	result->ipcs_ssize = size;
    658 	result->ipcs_ids = id_space_create(name, 0, IPC_IDS_MIN);
    659 	result->ipcs_dtor = dtor;
    660 	result->ipcs_rmid = rmid;
    661 	result->ipcs_proj_rctl = proj_rctl;
    662 	result->ipcs_zone_rctl = zone_rctl;
    663 	result->ipcs_atype = audit_type;
    664 	ASSERT(rctl_offset < sizeof (ipc_rqty_t));
    665 	result->ipcs_rctlofs = rctl_offset;
    666 	list_create(&result->ipcs_usedids, sizeof (kipc_perm_t),
    667 	    offsetof(kipc_perm_t, ipc_list));
    668 
    669 	return (result);
    670 }
    671 
    672 /*
    673  * Destroy an ipc service.
    674  */
    675 void
    676 ipcs_destroy(ipc_service_t *service)
    677 {
    678 	ipc_slot_t *slot, *next;
    679 
    680 	mutex_enter(&service->ipcs_lock);
    681 
    682 	ASSERT(service->ipcs_count == 0);
    683 	avl_destroy(&service->ipcs_keys);
    684 	list_destroy(&service->ipcs_usedids);
    685 	id_space_destroy(service->ipcs_ids);
    686 
    687 	for (slot = service->ipcs_table; slot; slot = next) {
    688 		next = slot[0].ipct_chain;
    689 		kmem_free(slot, service->ipcs_tabsz * sizeof (ipc_slot_t));
    690 		service->ipcs_tabsz >>= 1;
    691 	}
    692 
    693 	mutex_destroy(&service->ipcs_lock);
    694 	kmem_free(service, sizeof (ipc_service_t));
    695 }
    696 
    697 /*
    698  * Takes the service lock.
    699  */
    700 void
    701 ipcs_lock(ipc_service_t *service)
    702 {
    703 	mutex_enter(&service->ipcs_lock);
    704 }
    705 
    706 /*
    707  * Releases the service lock.
    708  */
    709 void
    710 ipcs_unlock(ipc_service_t *service)
    711 {
    712 	mutex_exit(&service->ipcs_lock);
    713 }
    714 
    715 
    716 /*
    717  * Locks the specified ID.  Returns the ID's ID table index.
    718  */
    719 static int
    720 ipc_lock_internal(ipc_service_t *service, uint_t id)
    721 {
    722 	uint_t	tabsz;
    723 	uint_t	index;
    724 	kmutex_t *mutex;
    725 
    726 	for (;;) {
    727 		tabsz = service->ipcs_tabsz;
    728 		membar_consumer();
    729 		index = id & (tabsz - 1);
    730 		mutex = &service->ipcs_table[index].ipct_lock;
    731 		mutex_enter(mutex);
    732 		if (tabsz == service->ipcs_tabsz)
    733 			break;
    734 		mutex_exit(mutex);
    735 	}
    736 
    737 	return (index);
    738 }
    739 
    740 /*
    741  * Locks the specified ID.  Returns a pointer to the ID's lock.
    742  */
    743 kmutex_t *
    744 ipc_lock(ipc_service_t *service, int id)
    745 {
    746 	uint_t index;
    747 
    748 	/*
    749 	 * These assertions don't reflect requirements of the code
    750 	 * which follows, but they should never fail nonetheless.
    751 	 */
    752 	ASSERT(id >= 0);
    753 	ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
    754 	index = ipc_lock_internal(service, id);
    755 
    756 	return (&service->ipcs_table[index].ipct_lock);
    757 }
    758 
    759 /*
    760  * Checks to see if the held lock provided is the current lock for the
    761  * specified id.  If so, we return it instead of dropping it and
    762  * returning the result of ipc_lock.  This is intended to speed up cv
    763  * wakeups where we are left holding a lock which could be stale, but
    764  * probably isn't.
    765  */
    766 kmutex_t *
    767 ipc_relock(ipc_service_t *service, int id, kmutex_t *lock)
    768 {
    769 	ASSERT(id >= 0);
    770 	ASSERT(IPC_INDEX(id) < service->ipcs_tabsz);
    771 	ASSERT(MUTEX_HELD(lock));
    772 
    773 	if (&service->ipcs_table[IPC_INDEX(id)].ipct_lock == lock)
    774 		return (lock);
    775 
    776 	mutex_exit(lock);
    777 	return (ipc_lock(service, id));
    778 }
    779 
    780 /*
    781  * Performs an ID lookup.  If the ID doesn't exist or has been removed,
    782  * or isn't visible to the caller (because of zones), NULL is returned.
    783  * Otherwise, a pointer to the ID's perm structure and held ID lock are
    784  * returned.
    785  */
    786 kmutex_t *
    787 ipc_lookup(ipc_service_t *service, int id, kipc_perm_t **perm)
    788 {
    789 	kipc_perm_t *result;
    790 	uint_t index;
    791 
    792 	/*
    793 	 * There is no need to check to see if id is in-range (i.e.
    794 	 * positive and fits into the table).  If it is out-of-range,
    795 	 * the id simply won't match the object's.
    796 	 */
    797 
    798 	index = ipc_lock_internal(service, id);
    799 	result = service->ipcs_table[index].ipct_data;
    800 	if (result == NULL || result->ipc_id != (uint_t)id ||
    801 	    !HASZONEACCESS(curproc, result->ipc_zoneid)) {
    802 		mutex_exit(&service->ipcs_table[index].ipct_lock);
    803 		return (NULL);
    804 	}
    805 
    806 	ASSERT(IPC_SEQ(id) == service->ipcs_table[index].ipct_seq);
    807 
    808 	*perm = result;
    809 	if (audit_active)
    810 		audit_ipc(service->ipcs_atype, id, result);
    811 
    812 	return (&service->ipcs_table[index].ipct_lock);
    813 }
    814 
    815 /*
    816  * Increase the reference count on an ID.
    817  */
    818 /*ARGSUSED*/
    819 void
    820 ipc_hold(ipc_service_t *s, kipc_perm_t *perm)
    821 {
    822 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
    823 	ASSERT(IPC_LOCKED(s, perm));
    824 	perm->ipc_ref++;
    825 }
    826 
    827 /*
    828  * Decrease the reference count on an ID and drops the ID's lock.
    829  * Destroys the ID if the new reference count is zero.
    830  */
    831 void
    832 ipc_rele(ipc_service_t *s, kipc_perm_t *perm)
    833 {
    834 	int nref;
    835 
    836 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
    837 	ASSERT(IPC_LOCKED(s, perm));
    838 	ASSERT(perm->ipc_ref > 0);
    839 
    840 	nref = --perm->ipc_ref;
    841 	mutex_exit(&s->ipcs_table[IPC_INDEX(perm->ipc_id)].ipct_lock);
    842 
    843 	if (nref == 0) {
    844 		ASSERT(IPC_FREE(perm));		/* ipc_rmid clears IPC_ALLOC */
    845 		s->ipcs_dtor(perm);
    846 		project_rele(perm->ipc_proj);
    847 		zone_rele(perm->ipc_zone);
    848 		kmem_free(perm, s->ipcs_ssize);
    849 	}
    850 }
    851 
    852 /*
    853  * Decrease the reference count on an ID, but don't drop the ID lock.
    854  * Used in cases where one thread needs to remove many references (on
    855  * behalf of other parties).
    856  */
    857 void
    858 ipc_rele_locked(ipc_service_t *s, kipc_perm_t *perm)
    859 {
    860 	ASSERT(perm->ipc_ref > 1);
    861 	ASSERT(IPC_INDEX(perm->ipc_id) < s->ipcs_tabsz);
    862 	ASSERT(IPC_LOCKED(s, perm));
    863 
    864 	perm->ipc_ref--;
    865 }
    866 
    867 
    868 /*
    869  * Internal function to grow the service ID table.
    870  */
    871 static int
    872 ipc_grow(ipc_service_t *service)
    873 {
    874 	ipc_slot_t *new, *old;
    875 	int i, oldsize, newsize;
    876 
    877 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
    878 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
    879 
    880 	if (service->ipcs_tabsz == IPC_IDS_MAX)
    881 		return (ENOSPC);
    882 
    883 	oldsize = service->ipcs_tabsz;
    884 	newsize = oldsize << 1;
    885 	new = kmem_zalloc(newsize * sizeof (ipc_slot_t), KM_NOSLEEP);
    886 	if (new == NULL)
    887 		return (ENOSPC);
    888 
    889 	old = service->ipcs_table;
    890 	for (i = 0; i < oldsize; i++) {
    891 		mutex_enter(&old[i].ipct_lock);
    892 		mutex_enter(&new[i].ipct_lock);
    893 
    894 		new[i].ipct_seq = old[i].ipct_seq;
    895 		new[i].ipct_data = old[i].ipct_data;
    896 		old[i].ipct_data = NULL;
    897 	}
    898 
    899 	new[0].ipct_chain = old;
    900 	service->ipcs_table = new;
    901 	membar_producer();
    902 	service->ipcs_tabsz = newsize;
    903 
    904 	for (i = 0; i < oldsize; i++) {
    905 		mutex_exit(&old[i].ipct_lock);
    906 		mutex_exit(&new[i].ipct_lock);
    907 	}
    908 
    909 	id_space_extend(service->ipcs_ids, oldsize, service->ipcs_tabsz);
    910 
    911 	return (0);
    912 }
    913 
    914 
    915 static int
    916 ipc_keylookup(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp)
    917 {
    918 	kipc_perm_t *perm = NULL;
    919 	avl_index_t where;
    920 	kipc_perm_t template;
    921 
    922 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
    923 
    924 	template.ipc_key = key;
    925 	template.ipc_zoneid = getzoneid();
    926 	if (perm = avl_find(&service->ipcs_keys, &template, &where)) {
    927 		ASSERT(!IPC_FREE(perm));
    928 		if ((flag & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
    929 			return (EEXIST);
    930 		if ((flag & 0777) & ~perm->ipc_mode) {
    931 			if (audit_active)
    932 				audit_ipcget(NULL, (void *)perm);
    933 			return (EACCES);
    934 		}
    935 		*permp = perm;
    936 		return (0);
    937 	} else if (flag & IPC_CREAT) {
    938 		*permp = NULL;
    939 		return (0);
    940 	}
    941 	return (ENOENT);
    942 }
    943 
    944 static int
    945 ipc_alloc_test(ipc_service_t *service, proc_t *pp)
    946 {
    947 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
    948 
    949 	/*
    950 	 * Resizing the table first would result in a cleaner code
    951 	 * path, but would also allow a user to (permanently) double
    952 	 * the id table size in cases where the allocation would be
    953 	 * denied.  Hence we test the rctl first.
    954 	 */
    955 retry:
    956 	mutex_enter(&pp->p_lock);
    957 	if ((rctl_test(service->ipcs_proj_rctl, pp->p_task->tk_proj->kpj_rctls,
    958 	    pp, 1, RCA_SAFE) & RCT_DENY) ||
    959 	    (rctl_test(service->ipcs_zone_rctl, pp->p_zone->zone_rctls,
    960 	    pp, 1, RCA_SAFE) & RCT_DENY)) {
    961 		mutex_exit(&pp->p_lock);
    962 		return (ENOSPC);
    963 	}
    964 
    965 	if (service->ipcs_count == service->ipcs_tabsz) {
    966 		int error;
    967 
    968 		mutex_exit(&pp->p_lock);
    969 		if (error = ipc_grow(service))
    970 			return (error);
    971 		goto retry;
    972 	}
    973 
    974 	return (0);
    975 }
    976 
    977 /*
    978  * Given a key, search for or create the associated identifier.
    979  *
    980  * If IPC_CREAT is specified and the key isn't found, or if the key is
    981  * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
    982  * allocated object structure in permp.  A pointer to the held service
    983  * lock is placed in lockp.  ipc_mode's IPC_ALLOC bit is clear.
    984  *
    985  * If the key is found and no error conditions arise, we return 0 and
    986  * place a pointer to the existing object structure in permp.  A
    987  * pointer to the held ID lock is placed in lockp.  ipc_mode's
    988  * IPC_ALLOC bit is set.
    989  *
    990  * Otherwise, a non-zero errno value is returned.
    991  */
    992 int
    993 ipc_get(ipc_service_t *service, key_t key, int flag, kipc_perm_t **permp,
    994     kmutex_t **lockp)
    995 {
    996 	kipc_perm_t	*perm = NULL;
    997 	proc_t		*pp = curproc;
    998 	int		error, index;
    999 	cred_t		*cr = CRED();
   1000 
   1001 	if (key != IPC_PRIVATE) {
   1002 
   1003 		mutex_enter(&service->ipcs_lock);
   1004 		error = ipc_keylookup(service, key, flag, &perm);
   1005 		if (perm != NULL)
   1006 			index = ipc_lock_internal(service, perm->ipc_id);
   1007 		mutex_exit(&service->ipcs_lock);
   1008 
   1009 		if (error) {
   1010 			ASSERT(perm == NULL);
   1011 			return (error);
   1012 		}
   1013 
   1014 		if (perm) {
   1015 			ASSERT(!IPC_FREE(perm));
   1016 			*permp = perm;
   1017 			*lockp = &service->ipcs_table[index].ipct_lock;
   1018 			return (0);
   1019 		}
   1020 
   1021 		/* Key not found; fall through */
   1022 	}
   1023 
   1024 	perm = kmem_zalloc(service->ipcs_ssize, KM_SLEEP);
   1025 
   1026 	mutex_enter(&service->ipcs_lock);
   1027 	if (error = ipc_alloc_test(service, pp)) {
   1028 		mutex_exit(&service->ipcs_lock);
   1029 		kmem_free(perm, service->ipcs_ssize);
   1030 		return (error);
   1031 	}
   1032 
   1033 	perm->ipc_cuid = perm->ipc_uid = crgetuid(cr);
   1034 	perm->ipc_cgid = perm->ipc_gid = crgetgid(cr);
   1035 	perm->ipc_zoneid = getzoneid();
   1036 	perm->ipc_mode = flag & 0777;
   1037 	perm->ipc_key = key;
   1038 	perm->ipc_ref = 1;
   1039 	perm->ipc_id = IPC_ID_INVAL;
   1040 	*permp = perm;
   1041 	*lockp = &service->ipcs_lock;
   1042 
   1043 	return (0);
   1044 }
   1045 
   1046 /*
   1047  * Attempts to add the a newly created ID to the global namespace.  If
   1048  * creating it would cause an error, we return the error.  If there is
   1049  * the possibility that we could obtain the existing ID and return it
   1050  * to the user, we return EAGAIN.  Otherwise, we return 0 with p_lock
   1051  * and the service lock held.
   1052  *
   1053  * Since this should be only called after all initialization has been
   1054  * completed, on failure we automatically invoke the destructor for the
   1055  * object and deallocate the memory associated with it.
   1056  */
   1057 int
   1058 ipc_commit_begin(ipc_service_t *service, key_t key, int flag,
   1059     kipc_perm_t *newperm)
   1060 {
   1061 	kipc_perm_t *perm;
   1062 	int error;
   1063 	proc_t *pp = curproc;
   1064 
   1065 	ASSERT(newperm->ipc_ref == 1);
   1066 	ASSERT(IPC_FREE(newperm));
   1067 
   1068 	/*
   1069 	 * Set ipc_proj and ipc_zone so that future calls to ipc_cleanup()
   1070 	 * clean up the necessary state.  This must be done before the
   1071 	 * potential call to ipcs_dtor() below.
   1072 	 */
   1073 	newperm->ipc_proj = pp->p_task->tk_proj;
   1074 	newperm->ipc_zone = pp->p_zone;
   1075 
   1076 	mutex_enter(&service->ipcs_lock);
   1077 	/*
   1078 	 * Ensure that no-one has raced with us and created the key.
   1079 	 */
   1080 	if ((key != IPC_PRIVATE) &&
   1081 	    (((error = ipc_keylookup(service, key, flag, &perm)) != 0) ||
   1082 	    (perm != NULL))) {
   1083 		error = error ? error : EAGAIN;
   1084 		goto errout;
   1085 	}
   1086 
   1087 	/*
   1088 	 * Ensure that no-one has raced with us and used the last of
   1089 	 * the permissible ids, or the last of the free spaces in the
   1090 	 * id table.
   1091 	 */
   1092 	if (error = ipc_alloc_test(service, pp))
   1093 		goto errout;
   1094 
   1095 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
   1096 	ASSERT(MUTEX_HELD(&pp->p_lock));
   1097 
   1098 	return (0);
   1099 errout:
   1100 	mutex_exit(&service->ipcs_lock);
   1101 	service->ipcs_dtor(newperm);
   1102 	kmem_free(newperm, service->ipcs_ssize);
   1103 	return (error);
   1104 }
   1105 
   1106 /*
   1107  * Commit the ID allocation transaction.  Called with p_lock and the
   1108  * service lock held, both of which are dropped.  Returns the held ID
   1109  * lock so the caller can extract the ID and perform ipcget auditing.
   1110  */
   1111 kmutex_t *
   1112 ipc_commit_end(ipc_service_t *service, kipc_perm_t *perm)
   1113 {
   1114 	ipc_slot_t *slot;
   1115 	avl_index_t where;
   1116 	int index;
   1117 	void *loc;
   1118 
   1119 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
   1120 	ASSERT(MUTEX_HELD(&curproc->p_lock));
   1121 
   1122 	(void) project_hold(perm->ipc_proj);
   1123 	(void) zone_hold(perm->ipc_zone);
   1124 	mutex_exit(&curproc->p_lock);
   1125 
   1126 	/*
   1127 	 * Pick out our slot.
   1128 	 */
   1129 	service->ipcs_count++;
   1130 	index = id_alloc(service->ipcs_ids);
   1131 	ASSERT(index < service->ipcs_tabsz);
   1132 	slot = &service->ipcs_table[index];
   1133 	mutex_enter(&slot->ipct_lock);
   1134 	ASSERT(slot->ipct_data == NULL);
   1135 
   1136 	/*
   1137 	 * Update the perm structure.
   1138 	 */
   1139 	perm->ipc_mode |= IPC_ALLOC;
   1140 	perm->ipc_id = (slot->ipct_seq << IPC_SEQ_SHIFT) | index;
   1141 
   1142 	/*
   1143 	 * Push into global visibility.
   1144 	 */
   1145 	slot->ipct_data = perm;
   1146 	if (perm->ipc_key != IPC_PRIVATE) {
   1147 		loc = avl_find(&service->ipcs_keys, perm, &where);
   1148 		ASSERT(loc == NULL);
   1149 		avl_insert(&service->ipcs_keys, perm, where);
   1150 	}
   1151 	list_insert_head(&service->ipcs_usedids, perm);
   1152 
   1153 	/*
   1154 	 * Update resource consumption.
   1155 	 */
   1156 	IPC_PROJ_USAGE(perm, service) += 1;
   1157 	IPC_ZONE_USAGE(perm, service) += 1;
   1158 
   1159 	mutex_exit(&service->ipcs_lock);
   1160 	return (&slot->ipct_lock);
   1161 }
   1162 
   1163 /*
   1164  * Clean up function, in case the allocation fails.  If called between
   1165  * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
   1166  * merely free the perm structure.  If called after ipc_commit_begin,
   1167  * we also drop locks and call the ID's destructor.
   1168  */
   1169 void
   1170 ipc_cleanup(ipc_service_t *service, kipc_perm_t *perm)
   1171 {
   1172 	ASSERT(IPC_FREE(perm));
   1173 	if (perm->ipc_proj) {
   1174 		mutex_exit(&curproc->p_lock);
   1175 		mutex_exit(&service->ipcs_lock);
   1176 		service->ipcs_dtor(perm);
   1177 	}
   1178 	kmem_free(perm, service->ipcs_ssize);
   1179 }
   1180 
   1181 
   1182 /*
   1183  * Common code to remove an IPC object.  This should be called after
   1184  * all permissions checks have been performed, and with the service
   1185  * and ID locked.  Note that this does not remove the object from
   1186  * the ipcs_usedids list (this needs to be done by the caller before
   1187  * dropping the service lock).
   1188  */
   1189 static void
   1190 ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
   1191 {
   1192 	int id = perm->ipc_id;
   1193 	int index;
   1194 
   1195 	ASSERT(MUTEX_HELD(&service->ipcs_lock));
   1196 	ASSERT(IPC_LOCKED(service, perm));
   1197 
   1198 	index = IPC_INDEX(id);
   1199 
   1200 	service->ipcs_table[index].ipct_data = NULL;
   1201 
   1202 	if (perm->ipc_key != IPC_PRIVATE)
   1203 		avl_remove(&service->ipcs_keys, perm);
   1204 	list_remove(&service->ipcs_usedids, perm);
   1205 	perm->ipc_mode &= ~IPC_ALLOC;
   1206 
   1207 	id_free(service->ipcs_ids, index);
   1208 
   1209 	if (service->ipcs_table[index].ipct_seq++ == IPC_SEQ_MASK)
   1210 		service->ipcs_table[index].ipct_seq = 0;
   1211 	service->ipcs_count--;
   1212 	ASSERT(IPC_PROJ_USAGE(perm, service) > 0);
   1213 	ASSERT(IPC_ZONE_USAGE(perm, service) > 0);
   1214 	IPC_PROJ_USAGE(perm, service) -= 1;
   1215 	IPC_ZONE_USAGE(perm, service) -= 1;
   1216 	ASSERT(service->ipcs_count || ((IPC_PROJ_USAGE(perm, service) == 0) &&
   1217 	    (IPC_ZONE_USAGE(perm, service) == 0)));
   1218 }
   1219 
   1220 
   1221 /*
   1222  * Common code to perform an IPC_RMID.  Returns an errno value on
   1223  * failure, 0 on success.
   1224  */
   1225 int
   1226 ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
   1227 {
   1228 	kipc_perm_t *perm;
   1229 	kmutex_t *lock;
   1230 
   1231 	mutex_enter(&service->ipcs_lock);
   1232 
   1233 	lock = ipc_lookup(service, id, &perm);
   1234 	if (lock == NULL) {
   1235 		mutex_exit(&service->ipcs_lock);
   1236 		return (EINVAL);
   1237 	}
   1238 
   1239 	ASSERT(service->ipcs_count > 0);
   1240 
   1241 	if (secpolicy_ipc_owner(cr, perm) != 0) {
   1242 		mutex_exit(lock);
   1243 		mutex_exit(&service->ipcs_lock);
   1244 		return (EPERM);
   1245 	}
   1246 
   1247 	/*
   1248 	 * Nothing can fail from this point on.
   1249 	 */
   1250 	ipc_remove(service, perm);
   1251 	mutex_exit(&service->ipcs_lock);
   1252 
   1253 	/* perform any per-service removal actions */
   1254 	service->ipcs_rmid(perm);
   1255 
   1256 	ipc_rele(service, perm);
   1257 
   1258 	return (0);
   1259 }
   1260 
   1261 /*
   1262  * Implementation for shmids, semids, and msgids.  buf is the address
   1263  * of the user buffer, nids is the size, and pnids is a pointer to
   1264  * where we write the actual number of ids that [would] have been
   1265  * copied out.
   1266  */
   1267 int
   1268 ipc_ids(ipc_service_t *service, int *buf, uint_t nids, uint_t *pnids)
   1269 {
   1270 	kipc_perm_t *perm;
   1271 	size_t	idsize = 0;
   1272 	int	error = 0;
   1273 	int	idcount;
   1274 	int	*ids;
   1275 	int	numids = 0;
   1276 	zoneid_t zoneid = getzoneid();
   1277 	int	global = INGLOBALZONE(curproc);
   1278 
   1279 	if (buf == NULL)
   1280 		nids = 0;
   1281 
   1282 	/*
   1283 	 * Get an accurate count of the total number of ids, and allocate a
   1284 	 * staging buffer.  Since ipcs_count is always sane, we don't have
   1285 	 * to take ipcs_lock for our first guess.  If there are no ids, or
   1286 	 * we're in the global zone and the number of ids is greater than
   1287 	 * the size of the specified buffer, we shunt to the end.  Otherwise,
   1288 	 * we go through the id list looking for (and counting) what is
   1289 	 * visible in the specified zone.
   1290 	 */
   1291 	idcount = service->ipcs_count;
   1292 	for (;;) {
   1293 		if ((global && idcount > nids) || idcount == 0) {
   1294 			numids = idcount;
   1295 			nids = 0;
   1296 			goto out;
   1297 		}
   1298 
   1299 		idsize = idcount * sizeof (int);
   1300 		ids = kmem_alloc(idsize, KM_SLEEP);
   1301 
   1302 		mutex_enter(&service->ipcs_lock);
   1303 		if (idcount >= service->ipcs_count)
   1304 			break;
   1305 		idcount = service->ipcs_count;
   1306 		mutex_exit(&service->ipcs_lock);
   1307 
   1308 		if (idsize != 0) {
   1309 			kmem_free(ids, idsize);
   1310 			idsize = 0;
   1311 		}
   1312 	}
   1313 
   1314 	for (perm = list_head(&service->ipcs_usedids); perm != NULL;
   1315 	    perm = list_next(&service->ipcs_usedids, perm)) {
   1316 		ASSERT(!IPC_FREE(perm));
   1317 		if (global || perm->ipc_zoneid == zoneid)
   1318 			ids[numids++] = perm->ipc_id;
   1319 	}
   1320 	mutex_exit(&service->ipcs_lock);
   1321 
   1322 	/*
   1323 	 * If there isn't enough space to hold all of the ids, just
   1324 	 * return the number of ids without copying out any of them.
   1325 	 */
   1326 	if (nids < numids)
   1327 		nids = 0;
   1328 
   1329 out:
   1330 	if (suword32(pnids, (uint32_t)numids) ||
   1331 	    (nids != 0 && copyout(ids, buf, numids * sizeof (int))))
   1332 		error = EFAULT;
   1333 	if (idsize != 0)
   1334 		kmem_free(ids, idsize);
   1335 	return (error);
   1336 }
   1337 
   1338 /*
   1339  * Destroy IPC objects from the given service that are associated with
   1340  * the given zone.
   1341  *
   1342  * We can't hold on to the service lock when freeing objects, so we
   1343  * first search the service and move all the objects to a private
   1344  * list, then walk through and free them after dropping the lock.
   1345  */
   1346 void
   1347 ipc_remove_zone(ipc_service_t *service, zoneid_t zoneid)
   1348 {
   1349 	kipc_perm_t *perm, *next;
   1350 	list_t rmlist;
   1351 	kmutex_t *lock;
   1352 
   1353 	list_create(&rmlist, sizeof (kipc_perm_t),
   1354 	    offsetof(kipc_perm_t, ipc_list));
   1355 
   1356 	mutex_enter(&service->ipcs_lock);
   1357 	for (perm = list_head(&service->ipcs_usedids); perm != NULL;
   1358 	    perm = next) {
   1359 		next = list_next(&service->ipcs_usedids, perm);
   1360 		if (perm->ipc_zoneid != zoneid)
   1361 			continue;
   1362 
   1363 		/*
   1364 		 * Remove the object from the service, then put it on
   1365 		 * the removal list so we can defer the call to
   1366 		 * ipc_rele (which will actually free the structure).
   1367 		 * We need to do this since the destructor may grab
   1368 		 * the service lock.
   1369 		 */
   1370 		ASSERT(!IPC_FREE(perm));
   1371 		lock = ipc_lock(service, perm->ipc_id);
   1372 		ipc_remove(service, perm);
   1373 		mutex_exit(lock);
   1374 		list_insert_tail(&rmlist, perm);
   1375 	}
   1376 	mutex_exit(&service->ipcs_lock);
   1377 
   1378 	/*
   1379 	 * Now that we've dropped the service lock, loop through the
   1380 	 * private list freeing removed objects.
   1381 	 */
   1382 	for (perm = list_head(&rmlist); perm != NULL; perm = next) {
   1383 		next = list_next(&rmlist, perm);
   1384 		list_remove(&rmlist, perm);
   1385 
   1386 		(void) ipc_lock(service, perm->ipc_id);
   1387 
   1388 		/* perform any per-service removal actions */
   1389 		service->ipcs_rmid(perm);
   1390 
   1391 		/* release reference */
   1392 		ipc_rele(service, perm);
   1393 	}
   1394 
   1395 	list_destroy(&rmlist);
   1396 }
   1397