OpenGrok

Cross Reference: shm.c
xref: /onnv/onnv-gate/usr/src/uts/common/os/shm.c
Home | History | Annotate | Line # | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
     24  */
     25 
     26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
     27 /*	  All Rights Reserved	*/
     28 
     29 /*
     30  * University Copyright- Copyright (c) 1982, 1986, 1988
     31  * The Regents of the University of California
     32  * All Rights Reserved
     33  *
     34  * University Acknowledgment- Portions of this document are derived from
     35  * software developed by the University of California, Berkeley, and its
     36  * contributors.
     37  */
     38 
     39 /*
     40  * Inter-Process Communication Shared Memory Facility.
     41  *
     42  * See os/ipc.c for a description of common IPC functionality.
     43  *
     44  * Resource controls
     45  * -----------------
     46  *
     47  * Control:      zone.max-shm-ids (rc_zone_shmmni)
     48  * Description:  Maximum number of shared memory ids allowed a zone.
     49  *
     50  *   When shmget() is used to allocate a shared memory segment, one id
     51  *   is allocated.  If the id allocation doesn't succeed, shmget()
     52  *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
     53  *   IPC_RMID) the id is deallocated.
     54  *
     55  * Control:      project.max-shm-ids (rc_project_shmmni)
     56  * Description:  Maximum number of shared memory ids allowed a project.
     57  *
     58  *   When shmget() is used to allocate a shared memory segment, one id
     59  *   is allocated.  If the id allocation doesn't succeed, shmget()
     60  *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
     61  *   IPC_RMID) the id is deallocated.
     62  *
     63  * Control:      zone.max-shm-memory (rc_zone_shmmax)
     64  * Description:  Total amount of shared memory allowed a zone.
     65  *
     66  *   When shmget() is used to allocate a shared memory segment, the
     67  *   segment's size is allocated against this limit.  If the space
     68  *   allocation doesn't succeed, shmget() fails and errno is set to
     69  *   EINVAL.  The size will be deallocated once the last process has
     70  *   detached the segment and the segment has been successfully
     71  *   shmctl(, IPC_RMID)ed.
     72  *
     73  * Control:      project.max-shm-memory (rc_project_shmmax)
     74  * Description:  Total amount of shared memory allowed a project.
     75  *
     76  *   When shmget() is used to allocate a shared memory segment, the
     77  *   segment's size is allocated against this limit.  If the space
     78  *   allocation doesn't succeed, shmget() fails and errno is set to
     79  *   EINVAL.  The size will be deallocated once the last process has
     80  *   detached the segment and the segment has been successfully
     81  *   shmctl(, IPC_RMID)ed.
     82  */
     83 
     84 #include <sys/types.h>
     85 #include <sys/param.h>
     86 #include <sys/cred.h>
     87 #include <sys/errno.h>
     88 #include <sys/time.h>
     89 #include <sys/kmem.h>
     90 #include <sys/user.h>
     91 #include <sys/proc.h>
     92 #include <sys/systm.h>
     93 #include <sys/prsystm.h>
     94 #include <sys/sysmacros.h>
     95 #include <sys/tuneable.h>
     96 #include <sys/vm.h>
     97 #include <sys/mman.h>
     98 #include <sys/swap.h>
     99 #include <sys/cmn_err.h>
    100 #include <sys/debug.h>
    101 #include <sys/lwpchan_impl.h>
    102 #include <sys/avl.h>
    103 #include <sys/modctl.h>
    104 #include <sys/syscall.h>
    105 #include <sys/task.h>
    106 #include <sys/project.h>
    107 #include <sys/policy.h>
    108 #include <sys/zone.h>
    109 #include <sys/rctl.h>
    110 
    111 #include <sys/ipc.h>
    112 #include <sys/ipc_impl.h>
    113 #include <sys/shm.h>
    114 #include <sys/shm_impl.h>
    115 
    116 #include <vm/hat.h>
    117 #include <vm/seg.h>
    118 #include <vm/as.h>
    119 #include <vm/seg_vn.h>
    120 #include <vm/anon.h>
    121 #include <vm/page.h>
    122 #include <vm/vpage.h>
    123 #include <vm/seg_spt.h>
    124 
    125 #include <c2/audit.h>
    126 
    127 static int shmem_lock(kshmid_t *sp, struct anon_map *amp);
    128 static void shmem_unlock(kshmid_t *sp, struct anon_map *amp);
    129 static void sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags,
    130 	kshmid_t *id);
    131 static void shm_rm_amp(kshmid_t *sp);
    132 static void shm_dtor(kipc_perm_t *);
    133 static void shm_rmid(kipc_perm_t *);
    134 static void shm_remove_zone(zoneid_t, void *);
    135 
    136 /*
    137  * Semantics for share_page_table and ism_off:
    138  *
    139  * These are hooks in /etc/system - only for internal testing purpose.
    140  *
    141  * Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
    142  * in a call to shmat(2). In other words, with share_page_table set, you always
    143  * get ISM, even if say, DISM is specified. It should really be called "ism_on".
    144  *
    145  * Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
    146  * shmat(2).
    147  *
    148  * If both share_page_table and ism_off are set, share_page_table prevails.
    149  *
    150  * Although these tunables should probably be removed, they do have some
    151  * external exposure; as long as they exist, they should at least work sensibly.
    152  */
    153 
    154 int share_page_table;
    155 int ism_off;
    156 
    157 /*
    158  * The following tunables are obsolete.  Though for compatibility we
    159  * still read and interpret shminfo_shmmax and shminfo_shmmni (see
    160  * os/project.c), the preferred mechanism for administrating the IPC
    161  * Shared Memory facility is through the resource controls described at
    162  * the top of this file.
    163  */
    164 size_t	shminfo_shmmax = 0x800000;	/* (obsolete) */
    165 int	shminfo_shmmni = 100;		/* (obsolete) */
    166 size_t	shminfo_shmmin = 1;		/* (obsolete) */
    167 int	shminfo_shmseg = 6;		/* (obsolete) */
    168 
    169 extern rctl_hndl_t rc_zone_shmmax;
    170 extern rctl_hndl_t rc_zone_shmmni;
    171 extern rctl_hndl_t rc_project_shmmax;
    172 extern rctl_hndl_t rc_project_shmmni;
    173 static ipc_service_t *shm_svc;
    174 static zone_key_t shm_zone_key;
    175 
    176 /*
    177  * Module linkage information for the kernel.
    178  */
    179 static uintptr_t shmsys(int, uintptr_t, uintptr_t, uintptr_t);
    180 
    181 static struct sysent ipcshm_sysent = {
    182 	4,
    183 #ifdef	_SYSCALL32_IMPL
    184 	SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
    185 #else	/* _SYSCALL32_IMPL */
    186 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
    187 #endif	/* _SYSCALL32_IMPL */
    188 	(int (*)())shmsys
    189 };
    190 
    191 #ifdef	_SYSCALL32_IMPL
    192 static struct sysent ipcshm_sysent32 = {
    193 	4,
    194 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
    195 	(int (*)())shmsys
    196 };
    197 #endif	/* _SYSCALL32_IMPL */
    198 
    199 static struct modlsys modlsys = {
    200 	&mod_syscallops, "System V shared memory", &ipcshm_sysent
    201 };
    202 
    203 #ifdef	_SYSCALL32_IMPL
    204 static struct modlsys modlsys32 = {
    205 	&mod_syscallops32, "32-bit System V shared memory", &ipcshm_sysent32
    206 };
    207 #endif	/* _SYSCALL32_IMPL */
    208 
    209 static struct modlinkage modlinkage = {
    210 	MODREV_1,
    211 	&modlsys,
    212 #ifdef	_SYSCALL32_IMPL
    213 	&modlsys32,
    214 #endif
    215 	NULL
    216 };
    217 
    218 
    219 int
    220 _init(void)
    221 {
    222 	int result;
    223 
    224 	shm_svc = ipcs_create("shmids", rc_project_shmmni, rc_zone_shmmni,
    225 	    sizeof (kshmid_t), shm_dtor, shm_rmid, AT_IPC_SHM,
    226 	    offsetof(ipc_rqty_t, ipcq_shmmni));
    227 	zone_key_create(&shm_zone_key, NULL, shm_remove_zone, NULL);
    228 
    229 	if ((result = mod_install(&modlinkage)) == 0)
    230 		return (0);
    231 
    232 	(void) zone_key_delete(shm_zone_key);
    233 	ipcs_destroy(shm_svc);
    234 
    235 	return (result);
    236 }
    237 
    238 int
    239 _fini(void)
    240 {
    241 	return (EBUSY);
    242 }
    243 
    244 int
    245 _info(struct modinfo *modinfop)
    246 {
    247 	return (mod_info(&modlinkage, modinfop));
    248 }
    249 
    250 /*
    251  * Shmat (attach shared segment) system call.
    252  */
    253 static int
    254 shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
    255 {
    256 	kshmid_t *sp;	/* shared memory header ptr */
    257 	size_t	size;
    258 	int	error = 0;
    259 	proc_t *pp = curproc;
    260 	struct as *as = pp->p_as;
    261 	struct segvn_crargs	crargs;	/* segvn create arguments */
    262 	kmutex_t	*lock;
    263 	struct seg 	*segspt = NULL;
    264 	caddr_t		addr = uaddr;
    265 	int		flags = (uflags & SHMAT_VALID_FLAGS_MASK);
    266 	int		useISM;
    267 	uchar_t		prot = PROT_ALL;
    268 	int result;
    269 
    270 	if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
    271 		return (EINVAL);
    272 	if (error = ipcperm_access(&sp->shm_perm, SHM_R, CRED()))
    273 		goto errret;
    274 	if ((flags & SHM_RDONLY) == 0 &&
    275 	    (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
    276 		goto errret;
    277 	if (spt_invalid(flags)) {
    278 		error = EINVAL;
    279 		goto errret;
    280 	}
    281 	if (ism_off)
    282 		flags = flags & ~SHM_SHARE_MMU;
    283 	if (share_page_table) {
    284 		flags = flags & ~SHM_PAGEABLE;
    285 		flags = flags | SHM_SHARE_MMU;
    286 	}
    287 	useISM = (spt_locked(flags) || spt_pageable(flags));
    288 	if (useISM && (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
    289 		goto errret;
    290 	if (useISM && isspt(sp)) {
    291 		uint_t newsptflags = flags | spt_flags(sp->shm_sptseg);
    292 		/*
    293 		 * If trying to change an existing {D}ISM segment from ISM
    294 		 * to DISM or vice versa, return error. Note that this
    295 		 * validation of flags needs to be done after the effect of
    296 		 * tunables such as ism_off and share_page_table, for
    297 		 * semantics that are consistent with the tunables' settings.
    298 		 */
    299 		if (spt_invalid(newsptflags)) {
    300 			error = EINVAL;
    301 			goto errret;
    302 		}
    303 	}
    304 	ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
    305 	size = sp->shm_amp->size;
    306 	ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
    307 
    308 	/* somewhere to record spt info for final detach */
    309 	if (sp->shm_sptinfo == NULL)
    310 		sp->shm_sptinfo = kmem_zalloc(sizeof (sptinfo_t), KM_SLEEP);
    311 
    312 	as_rangelock(as);
    313 
    314 	if (useISM) {
    315 		/*
    316 		 * Handle ISM
    317 		 */
    318 		uint_t	share_szc;
    319 		size_t	share_size;
    320 		struct	shm_data ssd;
    321 		uintptr_t align_hint;
    322 
    323 		/*
    324 		 * Pick a share pagesize to use, if (!isspt(sp)).
    325 		 * Otherwise use the already chosen page size.
    326 		 *
    327 		 * For the initial shmat (!isspt(sp)), where sptcreate is
    328 		 * called, map_pgsz is called to recommend a [D]ISM pagesize,
    329 		 * important for systems which offer more than one potential
    330 		 * [D]ISM pagesize.
    331 		 * If the shmat is just to attach to an already created
    332 		 * [D]ISM segment, then use the previously selected page size.
    333 		 */
    334 		if (!isspt(sp)) {
    335 			share_size = map_pgsz(MAPPGSZ_ISM, pp, addr, size, 0);
    336 			if (share_size == 0) {
    337 				as_rangeunlock(as);
    338 				error = EINVAL;
    339 				goto errret;
    340 			}
    341 			share_szc = page_szc(share_size);
    342 		} else {
    343 			share_szc = sp->shm_sptseg->s_szc;
    344 			share_size = page_get_pagesize(share_szc);
    345 		}
    346 		size = P2ROUNDUP(size, share_size);
    347 
    348 		align_hint = share_size;
    349 #if defined(__i386) || defined(__amd64)
    350 		/*
    351 		 * For x86, we want to share as much of the page table tree
    352 		 * as possible. We use a large align_hint at first, but
    353 		 * if that fails, then the code below retries with align_hint
    354 		 * set to share_size.
    355 		 *
    356 		 * The explicit extern here is due to the difficulties
    357 		 * of getting to platform dependent includes. When/if the
    358 		 * platform dependent bits of this function are cleaned up,
    359 		 * another way of doing this should found.
    360 		 */
    361 		{
    362 			extern uint_t ptes_per_table;
    363 
    364 			while (size >= ptes_per_table * (uint64_t)align_hint)
    365 				align_hint *= ptes_per_table;
    366 		}
    367 #endif /* __i386 || __amd64 */
    368 
    369 #if defined(__sparcv9)
    370 		if (addr == 0 &&
    371 		    pp->p_model == DATAMODEL_LP64 && AS_TYPE_64BIT(as)) {
    372 			/*
    373 			 * If no address has been passed in, and this is a
    374 			 * 64-bit process, we'll try to find an address
    375 			 * in the predict-ISM zone.
    376 			 */
    377 			caddr_t predbase = (caddr_t)PREDISM_1T_BASE;
    378 			size_t len = PREDISM_BOUND - PREDISM_1T_BASE;
    379 
    380 			as_purge(as);
    381 			if (as_gap(as, size + share_size, &predbase, &len,
    382 			    AH_LO, (caddr_t)NULL) != -1) {
    383 				/*
    384 				 * We found an address which looks like a
    385 				 * candidate.  We want to round it up, and
    386 				 * then check that it's a valid user range.
    387 				 * This assures that we won't fail below.
    388 				 */
    389 				addr = (caddr_t)P2ROUNDUP((uintptr_t)predbase,
    390 				    share_size);
    391 
    392 				if (valid_usr_range(addr, size, prot,
    393 				    as, as->a_userlimit) != RANGE_OKAY) {
    394 					addr = 0;
    395 				}
    396 			}
    397 		}
    398 #endif /* __sparcv9 */
    399 
    400 		if (addr == 0) {
    401 			for (;;) {
    402 				addr = (caddr_t)align_hint;
    403 				map_addr(&addr, size, 0ll, 1, MAP_ALIGN);
    404 				if (addr != NULL || align_hint == share_size)
    405 					break;
    406 				align_hint = share_size;
    407 			}
    408 			if (addr == NULL) {
    409 				as_rangeunlock(as);
    410 				error = ENOMEM;
    411 				goto errret;
    412 			}
    413 			ASSERT(((uintptr_t)addr & (align_hint - 1)) == 0);
    414 		} else {
    415 			/* Use the user-supplied attach address */
    416 			caddr_t base;
    417 			size_t len;
    418 
    419 			/*
    420 			 * Check that the address range
    421 			 *  1) is properly aligned
    422 			 *  2) is correct in unix terms
    423 			 *  3) is within an unmapped address segment
    424 			 */
    425 			base = addr;
    426 			len = size;		/* use spt aligned size */
    427 			/* XXX - in SunOS, is sp->shm_segsz */
    428 			if ((uintptr_t)base & (share_size - 1)) {
    429 				error = EINVAL;
    430 				as_rangeunlock(as);
    431 				goto errret;
    432 			}
    433 			result = valid_usr_range(base, len, prot, as,
    434 			    as->a_userlimit);
    435 			if (result == RANGE_BADPROT) {
    436 				/*
    437 				 * We try to accomodate processors which
    438 				 * may not support execute permissions on
    439 				 * all ISM segments by trying the check
    440 				 * again but without PROT_EXEC.
    441 				 */
    442 				prot &= ~PROT_EXEC;
    443 				result = valid_usr_range(base, len, prot, as,
    444 				    as->a_userlimit);
    445 			}
    446 			as_purge(as);
    447 			if (result != RANGE_OKAY ||
    448 			    as_gap(as, len, &base, &len, AH_LO,
    449 			    (caddr_t)NULL) != 0) {
    450 				error = EINVAL;
    451 				as_rangeunlock(as);
    452 				goto errret;
    453 			}
    454 		}
    455 
    456 		if (!isspt(sp)) {
    457 			error = sptcreate(size, &segspt, sp->shm_amp, prot,
    458 			    flags, share_szc);
    459 			if (error) {
    460 				as_rangeunlock(as);
    461 				goto errret;
    462 			}
    463 			sp->shm_sptinfo->sptas = segspt->s_as;
    464 			sp->shm_sptseg = segspt;
    465 			sp->shm_sptprot = prot;
    466 		} else if ((prot & sp->shm_sptprot) != sp->shm_sptprot) {
    467 			/*
    468 			 * Ensure we're attaching to an ISM segment with
    469 			 * fewer or equal permissions than what we're
    470 			 * allowed.  Fail if the segment has more
    471 			 * permissions than what we're allowed.
    472 			 */
    473 			error = EACCES;
    474 			as_rangeunlock(as);
    475 			goto errret;
    476 		}
    477 
    478 		ssd.shm_sptseg = sp->shm_sptseg;
    479 		ssd.shm_sptas = sp->shm_sptinfo->sptas;
    480 		ssd.shm_amp = sp->shm_amp;
    481 		error = as_map(as, addr, size, segspt_shmattach, &ssd);
    482 		if (error == 0)
    483 			sp->shm_ismattch++; /* keep count of ISM attaches */
    484 	} else {
    485 
    486 		/*
    487 		 * Normal case.
    488 		 */
    489 		if (flags & SHM_RDONLY)
    490 			prot &= ~PROT_WRITE;
    491 
    492 		if (addr == 0) {
    493 			/* Let the system pick the attach address */
    494 			map_addr(&addr, size, 0ll, 1, 0);
    495 			if (addr == NULL) {
    496 				as_rangeunlock(as);
    497 				error = ENOMEM;
    498 				goto errret;
    499 			}
    500 		} else {
    501 			/* Use the user-supplied attach address */
    502 			caddr_t base;
    503 			size_t len;
    504 
    505 			if (flags & SHM_RND)
    506 				addr = (caddr_t)((uintptr_t)addr &
    507 				    ~(SHMLBA - 1));
    508 			/*
    509 			 * Check that the address range
    510 			 *  1) is properly aligned
    511 			 *  2) is correct in unix terms
    512 			 *  3) is within an unmapped address segment
    513 			 */
    514 			base = addr;
    515 			len = size;		/* use aligned size */
    516 			/* XXX - in SunOS, is sp->shm_segsz */
    517 			if ((uintptr_t)base & PAGEOFFSET) {
    518 				error = EINVAL;
    519 				as_rangeunlock(as);
    520 				goto errret;
    521 			}
    522 			result = valid_usr_range(base, len, prot, as,
    523 			    as->a_userlimit);
    524 			if (result == RANGE_BADPROT) {
    525 				prot &= ~PROT_EXEC;
    526 				result = valid_usr_range(base, len, prot, as,
    527 				    as->a_userlimit);
    528 			}
    529 			as_purge(as);
    530 			if (result != RANGE_OKAY ||
    531 			    as_gap(as, len, &base, &len,
    532 			    AH_LO, (caddr_t)NULL) != 0) {
    533 				error = EINVAL;
    534 				as_rangeunlock(as);
    535 				goto errret;
    536 			}
    537 		}
    538 
    539 		/* Initialize the create arguments and map the segment */
    540 		crargs = *(struct segvn_crargs *)zfod_argsp;
    541 		crargs.offset = 0;
    542 		crargs.type = MAP_SHARED;
    543 		crargs.amp = sp->shm_amp;
    544 		crargs.prot = prot;
    545 		crargs.maxprot = crargs.prot;
    546 		crargs.flags = 0;
    547 
    548 		error = as_map(as, addr, size, segvn_create, &crargs);
    549 	}
    550 
    551 	as_rangeunlock(as);
    552 	if (error)
    553 		goto errret;
    554 
    555 	/* record shmem range for the detach */
    556 	sa_add(pp, addr, (size_t)size, useISM ? SHMSA_ISM : 0, sp);
    557 	*rvp = (uintptr_t)addr;
    558 
    559 	sp->shm_atime = gethrestime_sec();
    560 	sp->shm_lpid = pp->p_pid;
    561 	ipc_hold(shm_svc, (kipc_perm_t *)sp);
    562 
    563 	/*
    564 	 * Tell machine specific code that lwp has mapped shared memory
    565 	 */
    566 	LWP_MMODEL_SHARED_AS(addr, size);
    567 
    568 errret:
    569 	mutex_exit(lock);
    570 	return (error);
    571 }
    572 
    573 static void
    574 shm_dtor(kipc_perm_t *perm)
    575 {
    576 	kshmid_t *sp = (kshmid_t *)perm;
    577 	uint_t cnt;
    578 	size_t rsize;
    579 
    580 	ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
    581 	anonmap_purge(sp->shm_amp);
    582 	ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
    583 
    584 	if (sp->shm_sptinfo) {
    585 		if (isspt(sp)) {
    586 			sptdestroy(sp->shm_sptinfo->sptas, sp->shm_amp);
    587 			sp->shm_lkcnt = 0;
    588 		}
    589 		kmem_free(sp->shm_sptinfo, sizeof (sptinfo_t));
    590 	}
    591 
    592 	if (sp->shm_lkcnt > 0) {
    593 		shmem_unlock(sp, sp->shm_amp);
    594 		sp->shm_lkcnt = 0;
    595 	}
    596 
    597 	ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
    598 	cnt = --sp->shm_amp->refcnt;
    599 	ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
    600 	ASSERT(cnt == 0);
    601 	shm_rm_amp(sp);
    602 
    603 	if (sp->shm_perm.ipc_id != IPC_ID_INVAL) {
    604 		rsize = ptob(btopr(sp->shm_segsz));
    605 		ipcs_lock(shm_svc);
    606 		sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax -= rsize;
    607 		sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax -= rsize;
    608 		ipcs_unlock(shm_svc);
    609 	}
    610 }
    611 
    612 /* ARGSUSED */
    613 static void
    614 shm_rmid(kipc_perm_t *perm)
    615 {
    616 	/* nothing to do */
    617 }
    618 
    619 /*
    620  * Shmctl system call.
    621  */
    622 /* ARGSUSED */
    623 static int
    624 shmctl(int shmid, int cmd, void *arg)
    625 {
    626 	kshmid_t		*sp;	/* shared memory header ptr */
    627 	STRUCT_DECL(shmid_ds, ds);	/* for SVR4 IPC_SET */
    628 	int			error = 0;
    629 	struct cred 		*cr = CRED();
    630 	kmutex_t		*lock;
    631 	model_t			mdl = get_udatamodel();
    632 	struct shmid_ds64	ds64;
    633 	shmatt_t		nattch;
    634 
    635 	STRUCT_INIT(ds, mdl);
    636 
    637 	/*
    638 	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
    639 	 */
    640 	switch (cmd) {
    641 	case IPC_SET:
    642 		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
    643 			return (EFAULT);
    644 		break;
    645 
    646 	case IPC_SET64:
    647 		if (copyin(arg, &ds64, sizeof (struct shmid_ds64)))
    648 			return (EFAULT);
    649 		break;
    650 
    651 	case IPC_RMID:
    652 		return (ipc_rmid(shm_svc, shmid, cr));
    653 	}
    654 
    655 	if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
    656 		return (EINVAL);
    657 
    658 	switch (cmd) {
    659 	/* Set ownership and permissions. */
    660 	case IPC_SET:
    661 		if (error = ipcperm_set(shm_svc, cr, &sp->shm_perm,
    662 		    &STRUCT_BUF(ds)->shm_perm, mdl))
    663 				break;
    664 		sp->shm_ctime = gethrestime_sec();
    665 		break;
    666 
    667 	case IPC_STAT:
    668 		if (error = ipcperm_access(&sp->shm_perm, SHM_R, cr))
    669 			break;
    670 
    671 		nattch = sp->shm_perm.ipc_ref - 1;
    672 
    673 		ipcperm_stat(&STRUCT_BUF(ds)->shm_perm, &sp->shm_perm, mdl);
    674 		STRUCT_FSET(ds, shm_segsz, sp->shm_segsz);
    675 		STRUCT_FSETP(ds, shm_amp, NULL);	/* kernel addr */
    676 		STRUCT_FSET(ds, shm_lkcnt, sp->shm_lkcnt);
    677 		STRUCT_FSET(ds, shm_lpid, sp->shm_lpid);
    678 		STRUCT_FSET(ds, shm_cpid, sp->shm_cpid);
    679 		STRUCT_FSET(ds, shm_nattch, nattch);
    680 		STRUCT_FSET(ds, shm_cnattch, sp->shm_ismattch);
    681 		STRUCT_FSET(ds, shm_atime, sp->shm_atime);
    682 		STRUCT_FSET(ds, shm_dtime, sp->shm_dtime);
    683 		STRUCT_FSET(ds, shm_ctime, sp->shm_ctime);
    684 
    685 		mutex_exit(lock);
    686 		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
    687 			return (EFAULT);
    688 
    689 		return (0);
    690 
    691 	case IPC_SET64:
    692 		if (error = ipcperm_set64(shm_svc, cr,
    693 		    &sp->shm_perm, &ds64.shmx_perm))
    694 			break;
    695 		sp->shm_ctime = gethrestime_sec();
    696 		break;
    697 
    698 	case IPC_STAT64:
    699 		nattch = sp->shm_perm.ipc_ref - 1;
    700 
    701 		ipcperm_stat64(&ds64.shmx_perm, &sp->shm_perm);
    702 		ds64.shmx_segsz = sp->shm_segsz;
    703 		ds64.shmx_lkcnt = sp->shm_lkcnt;
    704 		ds64.shmx_lpid = sp->shm_lpid;
    705 		ds64.shmx_cpid = sp->shm_cpid;
    706 		ds64.shmx_nattch = nattch;
    707 		ds64.shmx_cnattch = sp->shm_ismattch;
    708 		ds64.shmx_atime = sp->shm_atime;
    709 		ds64.shmx_dtime = sp->shm_dtime;
    710 		ds64.shmx_ctime = sp->shm_ctime;
    711 
    712 		mutex_exit(lock);
    713 		if (copyout(&ds64, arg, sizeof (struct shmid_ds64)))
    714 			return (EFAULT);
    715 
    716 		return (0);
    717 
    718 	/* Lock segment in memory */
    719 	case SHM_LOCK:
    720 		if ((error = secpolicy_lock_memory(cr)) != 0)
    721 			break;
    722 
    723 		/* protect against overflow */
    724 		if (sp->shm_lkcnt >= USHRT_MAX) {
    725 			error = ENOMEM;
    726 			break;
    727 		}
    728 		if (!isspt(sp) && (sp->shm_lkcnt++ == 0)) {
    729 			if (error = shmem_lock(sp, sp->shm_amp)) {
    730 				ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock,
    731 				    RW_WRITER);
    732 				cmn_err(CE_NOTE, "shmctl - couldn't lock %ld"
    733 				    " pages into memory", sp->shm_amp->size);
    734 				ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
    735 				error = ENOMEM;
    736 				sp->shm_lkcnt--;
    737 			}
    738 		}
    739 		break;
    740 
    741 	/* Unlock segment */
    742 	case SHM_UNLOCK:
    743 		if ((error = secpolicy_lock_memory(cr)) != 0)
    744 			break;
    745 
    746 		if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) {
    747 			shmem_unlock(sp, sp->shm_amp);
    748 		}
    749 		break;
    750 
    751 	default:
    752 		error = EINVAL;
    753 		break;
    754 	}
    755 	mutex_exit(lock);
    756 	return (error);
    757 }
    758 
    759 static void
    760 shm_detach(proc_t *pp, segacct_t *sap)
    761 {
    762 	kshmid_t	*sp = sap->sa_id;
    763 	size_t		len = sap->sa_len;
    764 	caddr_t		addr = sap->sa_addr;
    765 
    766 	/*
    767 	 * Discard lwpchan mappings.
    768 	 */
    769 	if (pp->p_lcp != NULL)
    770 		lwpchan_delete_mapping(pp, addr, addr + len);
    771 	(void) as_unmap(pp->p_as, addr, len);
    772 
    773 	/*
    774 	 * Perform some detach-time accounting.
    775 	 */
    776 	(void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
    777 	if (sap->sa_flags & SHMSA_ISM)
    778 		sp->shm_ismattch--;
    779 	sp->shm_dtime = gethrestime_sec();
    780 	sp->shm_lpid = pp->p_pid;
    781 	ipc_rele(shm_svc, (kipc_perm_t *)sp);	/* Drops lock */
    782 
    783 	kmem_free(sap, sizeof (segacct_t));
    784 }
    785 
    786 static int
    787 shmdt(caddr_t addr)
    788 {
    789 	proc_t *pp = curproc;
    790 	segacct_t *sap, template;
    791 
    792 	mutex_enter(&pp->p_lock);
    793 	prbarrier(pp);			/* block /proc.  See shmgetid(). */
    794 
    795 	template.sa_addr = addr;
    796 	template.sa_len = 0;
    797 	if ((pp->p_segacct == NULL) ||
    798 	    ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)) {
    799 		mutex_exit(&pp->p_lock);
    800 		return (EINVAL);
    801 	}
    802 	if (sap->sa_addr != addr) {
    803 		mutex_exit(&pp->p_lock);
    804 		return (EINVAL);
    805 	}
    806 	avl_remove(pp->p_segacct, sap);
    807 	mutex_exit(&pp->p_lock);
    808 
    809 	shm_detach(pp, sap);
    810 
    811 	return (0);
    812 }
    813 
    814 /*
    815  * Remove all shared memory segments associated with a given zone.
    816  * Called by zone_shutdown when the zone is halted.
    817  */
    818 /*ARGSUSED1*/
    819 static void
    820 shm_remove_zone(zoneid_t zoneid, void *arg)
    821 {
    822 	ipc_remove_zone(shm_svc, zoneid);
    823 }
    824 
    825 /*
    826  * Shmget (create new shmem) system call.
    827  */
    828 static int
    829 shmget(key_t key, size_t size, int shmflg, uintptr_t *rvp)
    830 {
    831 	proc_t		*pp = curproc;
    832 	kshmid_t	*sp;
    833 	kmutex_t	*lock;
    834 	int		error;
    835 
    836 top:
    837 	if (error = ipc_get(shm_svc, key, shmflg, (kipc_perm_t **)&sp, &lock))
    838 		return (error);
    839 
    840 	if (!IPC_FREE(&sp->shm_perm)) {
    841 		/*
    842 		 * A segment with the requested key exists.
    843 		 */
    844 		if (size > sp->shm_segsz) {
    845 			mutex_exit(lock);
    846 			return (EINVAL);
    847 		}
    848 	} else {
    849 		/*
    850 		 * A new segment should be created.
    851 		 */
    852 		size_t npages = btopr(size);
    853 		size_t rsize = ptob(npages);
    854 
    855 		/*
    856 		 * Check rsize and the per-project and per-zone limit on
    857 		 * shared memory.  Checking rsize handles both the size == 0
    858 		 * case and the size < ULONG_MAX & PAGEMASK case (i.e.
    859 		 * rounding up wraps a size_t).
    860 		 */
    861 		if (rsize == 0 ||
    862 		    (rctl_test(rc_project_shmmax,
    863 		    pp->p_task->tk_proj->kpj_rctls, pp, rsize,
    864 		    RCA_SAFE) & RCT_DENY) ||
    865 		    (rctl_test(rc_zone_shmmax,
    866 		    pp->p_zone->zone_rctls, pp, rsize,
    867 		    RCA_SAFE) & RCT_DENY)) {
    868 
    869 			mutex_exit(&pp->p_lock);
    870 			mutex_exit(lock);
    871 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
    872 			return (EINVAL);
    873 		}
    874 		mutex_exit(&pp->p_lock);
    875 		mutex_exit(lock);
    876 
    877 		if (anon_resv(rsize) == 0) {
    878 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
    879 			return (ENOMEM);
    880 		}
    881 
    882 		/*
    883 		 * If any new failure points are introduced between the
    884 		 * the above anon_resv() and the below ipc_commit_begin(),
    885 		 * these failure points will need to unreserve the anon
    886 		 * reserved using anon_unresv().
    887 		 *
    888 		 * Once ipc_commit_begin() is called, the anon reserved
    889 		 * above will be automatically unreserved by future calls to
    890 		 * ipcs_cleanup() -> shm_dtor() -> shm_rm_amp().  If
    891 		 * ipc_commit_begin() fails, it internally calls shm_dtor(),
    892 		 * unreserving the above anon, and freeing the below amp.
    893 		 */
    894 
    895 		sp->shm_amp = anonmap_alloc(rsize, rsize, ANON_SLEEP);
    896 		sp->shm_amp->a_sp = sp;
    897 		/*
    898 		 * Store the original user's requested size, in bytes,
    899 		 * rather than the page-aligned size.  The former is
    900 		 * used for IPC_STAT and shmget() lookups.  The latter
    901 		 * is saved in the anon_map structure and is used for
    902 		 * calls to the vm layer.
    903 		 */
    904 		sp->shm_segsz = size;
    905 		sp->shm_atime = sp->shm_dtime = 0;
    906 		sp->shm_ctime = gethrestime_sec();
    907 		sp->shm_lpid = (pid_t)0;
    908 		sp->shm_cpid = curproc->p_pid;
    909 		sp->shm_ismattch = 0;
    910 		sp->shm_sptinfo = NULL;
    911 		/*
    912 		 * Check limits one last time, push id into global
    913 		 * visibility, and update resource usage counts.
    914 		 */
    915 		if (error = ipc_commit_begin(shm_svc, key, shmflg,
    916 		    (kipc_perm_t *)sp)) {
    917 			if (error == EAGAIN)
    918 				goto top;
    919 			return (error);
    920 		}
    921 
    922 		if ((rctl_test(rc_project_shmmax,
    923 		    sp->shm_perm.ipc_proj->kpj_rctls, pp, rsize,
    924 		    RCA_SAFE) & RCT_DENY) ||
    925 		    (rctl_test(rc_zone_shmmax,
    926 		    sp->shm_perm.ipc_zone_ref.zref_zone->zone_rctls, pp, rsize,
    927 		    RCA_SAFE) & RCT_DENY)) {
    928 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
    929 			return (EINVAL);
    930 		}
    931 		sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax += rsize;
    932 		sp->shm_perm.ipc_zone_ref.zref_zone->zone_shmmax += rsize;
    933 
    934 		lock = ipc_commit_end(shm_svc, &sp->shm_perm);
    935 	}
    936 
    937 	if (AU_AUDITING())
    938 		audit_ipcget(AT_IPC_SHM, (void *)sp);
    939 
    940 	*rvp = (uintptr_t)(sp->shm_perm.ipc_id);
    941 
    942 	mutex_exit(lock);
    943 	return (0);
    944 }
    945 
    946 /*
    947  * shmids system call.
    948  */
    949 static int
    950 shmids(int *buf, uint_t nids, uint_t *pnids)
    951 {
    952 	return (ipc_ids(shm_svc, buf, nids, pnids));
    953 }
    954 
    955 /*
    956  * System entry point for shmat, shmctl, shmdt, and shmget system calls.
    957  */
    958 static uintptr_t
    959 shmsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2)
    960 {
    961 	int	error;
    962 	uintptr_t r_val = 0;
    963 
    964 	switch (opcode) {
    965 	case SHMAT:
    966 		error = shmat((int)a0, (caddr_t)a1, (int)a2, &r_val);
    967 		break;
    968 	case SHMCTL:
    969 		error = shmctl((int)a0, (int)a1, (void *)a2);
    970 		break;
    971 	case SHMDT:
    972 		error = shmdt((caddr_t)a0);
    973 		break;
    974 	case SHMGET:
    975 		error = shmget((key_t)a0, (size_t)a1, (int)a2, &r_val);
    976 		break;
    977 	case SHMIDS:
    978 		error = shmids((int *)a0, (uint_t)a1, (uint_t *)a2);
    979 		break;
    980 	default:
    981 		error = EINVAL;
    982 		break;
    983 	}
    984 
    985 	if (error)
    986 		return ((uintptr_t)set_errno(error));
    987 
    988 	return (r_val);
    989 }
    990 
    991 /*
    992  * segacct_t comparator
    993  * This works as expected, with one minor change: the first of two real
    994  * segments with equal addresses is considered to be 'greater than' the
    995  * second.  We only return equal when searching using a template, in
    996  * which case we explicitly set the template segment's length to 0
    997  * (which is invalid for a real segment).
    998  */
    999 static int
   1000 shm_sacompar(const void *x, const void *y)
   1001 {
   1002 	segacct_t *sa1 = (segacct_t *)x;
   1003 	segacct_t *sa2 = (segacct_t *)y;
   1004 
   1005 	if (sa1->sa_addr < sa2->sa_addr) {
   1006 		return (-1);
   1007 	} else if (sa2->sa_len != 0) {
   1008 		if (sa1->sa_addr >= sa2->sa_addr + sa2->sa_len) {
   1009 			return (1);
   1010 		} else if (sa1->sa_len != 0) {
   1011 			return (1);
   1012 		} else {
   1013 			return (0);
   1014 		}
   1015 	} else if (sa1->sa_addr > sa2->sa_addr) {
   1016 		return (1);
   1017 	} else {
   1018 		return (0);
   1019 	}
   1020 }
   1021 
   1022 /*
   1023  * add this record to the segacct list.
   1024  */
   1025 static void
   1026 sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags, kshmid_t *id)
   1027 {
   1028 	segacct_t *nsap;
   1029 	avl_tree_t *tree = NULL;
   1030 	avl_index_t where;
   1031 
   1032 	nsap = kmem_alloc(sizeof (segacct_t), KM_SLEEP);
   1033 	nsap->sa_addr = addr;
   1034 	nsap->sa_len  = len;
   1035 	nsap->sa_flags = flags;
   1036 	nsap->sa_id = id;
   1037 
   1038 	if (pp->p_segacct == NULL)
   1039 		tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
   1040 
   1041 	mutex_enter(&pp->p_lock);
   1042 	prbarrier(pp);			/* block /proc.  See shmgetid(). */
   1043 
   1044 	if (pp->p_segacct == NULL) {
   1045 		avl_create(tree, shm_sacompar, sizeof (segacct_t),
   1046 		    offsetof(segacct_t, sa_tree));
   1047 		pp->p_segacct = tree;
   1048 	} else if (tree) {
   1049 		kmem_free(tree, sizeof (avl_tree_t));
   1050 	}
   1051 
   1052 	/*
   1053 	 * We can ignore the result of avl_find, as the comparator will
   1054 	 * never return equal for segments with non-zero length.  This
   1055 	 * is a necessary hack to get around the fact that we do, in
   1056 	 * fact, have duplicate keys.
   1057 	 */
   1058 	(void) avl_find(pp->p_segacct, nsap, &where);
   1059 	avl_insert(pp->p_segacct, nsap, where);
   1060 
   1061 	mutex_exit(&pp->p_lock);
   1062 }
   1063 
   1064 /*
   1065  * Duplicate parent's segacct records in child.
   1066  */
   1067 void
   1068 shmfork(struct proc *ppp, struct proc *cpp)
   1069 {
   1070 	segacct_t *sap;
   1071 	kshmid_t *sp;
   1072 	kmutex_t *mp;
   1073 
   1074 	ASSERT(ppp->p_segacct != NULL);
   1075 
   1076 	/*
   1077 	 * We are the only lwp running in the parent so nobody can
   1078 	 * mess with our p_segacct list.  Thus it is safe to traverse
   1079 	 * the list without holding p_lock.  This is essential because
   1080 	 * we can't hold p_lock during a KM_SLEEP allocation.
   1081 	 */
   1082 	for (sap = (segacct_t *)avl_first(ppp->p_segacct); sap != NULL;
   1083 	    sap = (segacct_t *)AVL_NEXT(ppp->p_segacct, sap)) {
   1084 		sa_add(cpp, sap->sa_addr, sap->sa_len, sap->sa_flags,
   1085 		    sap->sa_id);
   1086 		sp = sap->sa_id;
   1087 		mp = ipc_lock(shm_svc, sp->shm_perm.ipc_id);
   1088 		if (sap->sa_flags & SHMSA_ISM)
   1089 			sp->shm_ismattch++;
   1090 		ipc_hold(shm_svc, (kipc_perm_t *)sp);
   1091 		mutex_exit(mp);
   1092 	}
   1093 }
   1094 
   1095 /*
   1096  * Detach shared memory segments from exiting process.
   1097  */
   1098 void
   1099 shmexit(struct proc *pp)
   1100 {
   1101 	segacct_t *sap;
   1102 	avl_tree_t *tree;
   1103 	void *cookie = NULL;
   1104 
   1105 	ASSERT(pp->p_segacct != NULL);
   1106 
   1107 	mutex_enter(&pp->p_lock);
   1108 	prbarrier(pp);
   1109 	tree = pp->p_segacct;
   1110 	pp->p_segacct = NULL;
   1111 	mutex_exit(&pp->p_lock);
   1112 
   1113 	while ((sap = avl_destroy_nodes(tree, &cookie)) != NULL)
   1114 		(void) shm_detach(pp, sap);
   1115 
   1116 	avl_destroy(tree);
   1117 	kmem_free(tree, sizeof (avl_tree_t));
   1118 }
   1119 
   1120 /*
   1121  * At this time pages should be in memory, so just lock them.
   1122  */
   1123 static void
   1124 lock_again(size_t npages, kshmid_t *sp, struct anon_map *amp)
   1125 {
   1126 	struct anon *ap;
   1127 	struct page *pp;
   1128 	struct vnode *vp;
   1129 	u_offset_t off;
   1130 	ulong_t anon_idx;
   1131 	anon_sync_obj_t cookie;
   1132 
   1133 	mutex_enter(&sp->shm_mlock);
   1134 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
   1135 	for (anon_idx = 0; npages != 0; anon_idx++, npages--) {
   1136 
   1137 		anon_array_enter(amp, anon_idx, &cookie);
   1138 		ap = anon_get_ptr(amp->ahp, anon_idx);
   1139 		ASSERT(ap != NULL);
   1140 		swap_xlate(ap, &vp, &off);
   1141 		anon_array_exit(&cookie);
   1142 
   1143 		pp = page_lookup(vp, off, SE_SHARED);
   1144 		if (pp == NULL) {
   1145 			panic("lock_again: page not in the system");
   1146 			/*NOTREACHED*/
   1147 		}
   1148 		/* page should already be locked by caller */
   1149 		ASSERT(pp->p_lckcnt > 0);
   1150 		(void) page_pp_lock(pp, 0, 0);
   1151 		page_unlock(pp);
   1152 	}
   1153 	ANON_LOCK_EXIT(&amp->a_rwlock);
   1154 	mutex_exit(&sp->shm_mlock);
   1155 }
   1156 
   1157 /*
   1158  * Attach the shared memory segment to the process
   1159  * address space and lock the pages.
   1160  */
   1161 static int
   1162 shmem_lock(kshmid_t *sp, struct anon_map *amp)
   1163 {
   1164 	size_t npages = btopr(amp->size);
   1165 	struct as *as;
   1166 	struct segvn_crargs crargs;
   1167 	uint_t error;
   1168 
   1169 	/*
   1170 	 * A later ISM/DISM attach may increase the size of the amp, so
   1171 	 * cache the number of pages locked for the future shmem_unlock()
   1172 	 */
   1173 	sp->shm_lkpages = npages;
   1174 
   1175 	as = as_alloc();
   1176 	/* Initialize the create arguments and map the segment */
   1177 	crargs = *(struct segvn_crargs *)zfod_argsp;	/* structure copy */
   1178 	crargs.offset = (u_offset_t)0;
   1179 	crargs.type = MAP_SHARED;
   1180 	crargs.amp = amp;
   1181 	crargs.prot = PROT_ALL;
   1182 	crargs.maxprot = crargs.prot;
   1183 	crargs.flags = 0;
   1184 	error = as_map(as, 0x0, amp->size, segvn_create, &crargs);
   1185 	if (!error) {
   1186 		if ((error = as_ctl(as, 0x0, amp->size, MC_LOCK, 0, 0,
   1187 		    NULL, 0)) == 0) {
   1188 			lock_again(npages, sp, amp);
   1189 		}
   1190 		(void) as_unmap(as, 0x0, amp->size);
   1191 	}
   1192 	as_free(as);
   1193 	return (error);
   1194 }
   1195 
   1196 
   1197 /*
   1198  * Unlock shared memory
   1199  */
   1200 static void
   1201 shmem_unlock(kshmid_t *sp, struct anon_map *amp)
   1202 {
   1203 	struct anon *ap;
   1204 	pgcnt_t npages = sp->shm_lkpages;
   1205 	struct vnode *vp;
   1206 	struct page *pp;
   1207 	u_offset_t off;
   1208 	ulong_t anon_idx;
   1209 	size_t unlocked_bytes = 0;
   1210 	kproject_t	*proj;
   1211 	anon_sync_obj_t cookie;
   1212 
   1213 	proj = sp->shm_perm.ipc_proj;
   1214 	mutex_enter(&sp->shm_mlock);
   1215 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
   1216 	for (anon_idx = 0; anon_idx < npages; anon_idx++) {
   1217 
   1218 		anon_array_enter(amp, anon_idx, &cookie);
   1219 		if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
   1220 			panic("shmem_unlock: null app");
   1221 			/*NOTREACHED*/
   1222 		}
   1223 		swap_xlate(ap, &vp, &off);
   1224 		anon_array_exit(&cookie);
   1225 		pp = page_lookup(vp, off, SE_SHARED);
   1226 		if (pp == NULL) {
   1227 			panic("shmem_unlock: page not in the system");
   1228 			/*NOTREACHED*/
   1229 		}
   1230 		/*
   1231 		 * Page should at least have once lock from previous
   1232 		 * shmem_lock
   1233 		 */
   1234 		ASSERT(pp->p_lckcnt > 0);
   1235 		page_pp_unlock(pp, 0, 0);
   1236 		if (pp->p_lckcnt == 0)
   1237 			unlocked_bytes += PAGESIZE;
   1238 
   1239 		page_unlock(pp);
   1240 	}
   1241 
   1242 	if (unlocked_bytes > 0) {
   1243 		rctl_decr_locked_mem(NULL, proj, unlocked_bytes, 0);
   1244 	}
   1245 
   1246 	ANON_LOCK_EXIT(&amp->a_rwlock);
   1247 	mutex_exit(&sp->shm_mlock);
   1248 }
   1249 
   1250 /*
   1251  * We call this routine when we have removed all references to this
   1252  * amp.  This means all shmdt()s and the IPC_RMID have been done.
   1253  */
   1254 static void
   1255 shm_rm_amp(kshmid_t *sp)
   1256 {
   1257 	struct anon_map *amp = sp->shm_amp;
   1258 	zone_t *zone;
   1259 
   1260 	zone = sp->shm_perm.ipc_zone_ref.zref_zone;
   1261 	ASSERT(zone != NULL);
   1262 	/*
   1263 	 * Free up the anon_map.
   1264 	 */
   1265 	lgrp_shm_policy_fini(amp, NULL);
   1266 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
   1267 	if (amp->a_szc != 0) {
   1268 		anon_shmap_free_pages(amp, 0, amp->size);
   1269 	} else {
   1270 		anon_free(amp->ahp, 0, amp->size);
   1271 	}
   1272 	ANON_LOCK_EXIT(&amp->a_rwlock);
   1273 	anon_unresv_zone(amp->swresv, zone);
   1274 	anonmap_free(amp);
   1275 }
   1276 
   1277 /*
   1278  * Return the shared memory id for the process's virtual address.
   1279  * Return SHMID_NONE if addr is not within a SysV shared memory segment.
   1280  * Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
   1281  *
   1282  * shmgetid() is called from code in /proc with the process locked but
   1283  * with pp->p_lock not held.  The address space lock is held, so we
   1284  * cannot grab pp->p_lock here due to lock-ordering constraints.
   1285  * Because of all this, modifications to the p_segacct list must only
   1286  * be made after calling prbarrier() to ensure the process is not locked.
   1287  * See shmdt() and sa_add(), above. shmgetid() may also be called on a
   1288  * thread's own process without the process locked.
   1289  */
   1290 int
   1291 shmgetid(proc_t *pp, caddr_t addr)
   1292 {
   1293 	segacct_t *sap, template;
   1294 
   1295 	ASSERT(MUTEX_NOT_HELD(&pp->p_lock));
   1296 	ASSERT((pp->p_proc_flag & P_PR_LOCK) || pp == curproc);
   1297 
   1298 	if (pp->p_segacct == NULL)
   1299 		return (SHMID_NONE);
   1300 
   1301 	template.sa_addr = addr;
   1302 	template.sa_len = 0;
   1303 	if ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)
   1304 		return (SHMID_NONE);
   1305 
   1306 	if (IPC_FREE(&sap->sa_id->shm_perm))
   1307 		return (SHMID_FREE);
   1308 
   1309 	return (sap->sa_id->shm_perm.ipc_id);
   1310 }
   1311