Home | History | Annotate | Download | only in syscall
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 
     30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     31 
     32 #include <sys/types.h>
     33 #include <sys/bitmap.h>
     34 #include <sys/sysmacros.h>
     35 #include <sys/kmem.h>
     36 #include <sys/param.h>
     37 #include <sys/systm.h>
     38 #include <sys/user.h>
     39 #include <sys/unistd.h>
     40 #include <sys/errno.h>
     41 #include <sys/proc.h>
     42 #include <sys/mman.h>
     43 #include <sys/tuneable.h>
     44 #include <sys/cmn_err.h>
     45 #include <sys/cred.h>
     46 #include <sys/vmsystm.h>
     47 #include <sys/debug.h>
     48 #include <sys/policy.h>
     49 
     50 #include <vm/as.h>
     51 #include <vm/seg.h>
     52 
     53 static uint_t mem_getpgszc(size_t);
     54 
     55 /*
     56  * Memory control operations
     57  */
     58 int
     59 memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
     60 {
     61 	struct as *as = ttoproc(curthread)->p_as;
     62 	struct proc *p = ttoproc(curthread);
     63 	size_t pgsz;
     64 	uint_t szc, oszc, pgcmd;
     65 	int error = 0;
     66 	faultcode_t fc;
     67 	uintptr_t iarg;
     68 	STRUCT_DECL(memcntl_mha, mha);
     69 
     70 	if (mask)
     71 		return (set_errno(EINVAL));
     72 	if ((cmd == MC_LOCKAS) || (cmd == MC_UNLOCKAS)) {
     73 		if ((addr != 0) || (len != 0)) {
     74 			return (set_errno(EINVAL));
     75 		}
     76 	} else if (cmd != MC_HAT_ADVISE) {
     77 		if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) {
     78 			return (set_errno(EINVAL));
     79 		}
     80 		/*
     81 		 * We're only concerned with the address range
     82 		 * here, not the protections.  The protections
     83 		 * are only used as a "filter" in this code,
     84 		 * they aren't set or modified here.
     85 		 */
     86 		if (valid_usr_range(addr, len, 0, as,
     87 		    as->a_userlimit) != RANGE_OKAY) {
     88 			return (set_errno(ENOMEM));
     89 		}
     90 	}
     91 
     92 	if (cmd == MC_HAT_ADVISE) {
     93 		if (attr != 0 || mask != 0) {
     94 			return (set_errno(EINVAL));
     95 		}
     96 
     97 	} else {
     98 		if ((VALID_ATTR & attr) != attr) {
     99 			return (set_errno(EINVAL));
    100 		}
    101 		if ((attr & SHARED) && (attr & PRIVATE)) {
    102 			return (set_errno(EINVAL));
    103 		}
    104 		if (((cmd == MC_LOCKAS) || (cmd == MC_LOCK) ||
    105 		    (cmd == MC_UNLOCKAS) || (cmd == MC_UNLOCK)) &&
    106 		    (error = secpolicy_lock_memory(CRED())) != 0)
    107 			return (set_errno(error));
    108 	}
    109 	if (attr) {
    110 		attr |= PROT_USER;
    111 	}
    112 
    113 	switch (cmd) {
    114 	case MC_SYNC:
    115 		/*
    116 		 * MS_SYNC used to be defined to be zero but is now non-zero.
    117 		 * For binary compatibility we still accept zero
    118 		 * (the absence of MS_ASYNC) to mean the same thing.
    119 		 */
    120 		iarg = (uintptr_t)arg;
    121 		if ((iarg & ~MS_INVALIDATE) == 0)
    122 			iarg |= MS_SYNC;
    123 
    124 		if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
    125 			((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
    126 			error = set_errno(EINVAL);
    127 		} else {
    128 			error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
    129 			if (error) {
    130 				(void) set_errno(error);
    131 			}
    132 		}
    133 		return (error);
    134 	case MC_LOCKAS:
    135 		if ((uintptr_t)arg & ~(MCL_FUTURE|MCL_CURRENT) ||
    136 		    (uintptr_t)arg == 0) {
    137 			return (set_errno(EINVAL));
    138 		}
    139 		break;
    140 	case MC_LOCK:
    141 	case MC_UNLOCKAS:
    142 	case MC_UNLOCK:
    143 		break;
    144 	case MC_HAT_ADVISE:
    145 		/*
    146 		 * Set prefered page size.
    147 		 */
    148 		STRUCT_INIT(mha, get_udatamodel());
    149 		if (copyin(arg, STRUCT_BUF(mha), STRUCT_SIZE(mha))) {
    150 			return (set_errno(EFAULT));
    151 		}
    152 
    153 		pgcmd = STRUCT_FGET(mha, mha_cmd);
    154 
    155 		/*
    156 		 * Currently only MHA_MAPSIZE_VA, MHA_MAPSIZE_STACK
    157 		 * and MHA_MAPSIZE_BSSBRK are supported. Only one
    158 		 * command may be specified at a time.
    159 		 */
    160 		if ((~(MHA_MAPSIZE_VA|MHA_MAPSIZE_STACK|MHA_MAPSIZE_BSSBRK) &
    161 		    pgcmd) || pgcmd == 0 || !ISP2(pgcmd) ||
    162 		    STRUCT_FGET(mha, mha_flags))
    163 			return (set_errno(EINVAL));
    164 
    165 		pgsz = STRUCT_FGET(mha, mha_pagesize);
    166 
    167 		/*
    168 		 * call platform specific map_pgsz() routine to get the
    169 		 * optimal pgsz if pgsz is 0.
    170 		 *
    171 		 * For stack and heap operations addr and len must be zero.
    172 		 */
    173 		if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
    174 			if (addr != NULL || len != 0) {
    175 				return (set_errno(EINVAL));
    176 			}
    177 
    178 			/*
    179 			 * Disable autompss for this process unless pgsz == 0,
    180 			 * which means the system should pick.  In the
    181 			 * pgsz == 0 case, leave the SAUTOLPG setting alone, as
    182 			 * we don't want to enable it when someone has
    183 			 * disabled automatic large page selection for the
    184 			 * whole system.
    185 			 */
    186 			mutex_enter(&p->p_lock);
    187 			if (pgsz != 0) {
    188 				p->p_flag &= ~SAUTOLPG;
    189 			}
    190 			mutex_exit(&p->p_lock);
    191 
    192 			as_rangelock(as);
    193 
    194 			if (pgsz == 0) {
    195 				int	type;
    196 
    197 				if (pgcmd == MHA_MAPSIZE_BSSBRK)
    198 					type = MAPPGSZ_HEAP;
    199 				else
    200 					type = MAPPGSZ_STK;
    201 
    202 				pgsz = map_pgsz(type, p, 0, 0, 1);
    203 			}
    204 		} else {
    205 			/*
    206 			 * addr and len must be valid for range specified.
    207 			 */
    208 			if (valid_usr_range(addr, len, 0, as,
    209 			    as->a_userlimit) != RANGE_OKAY) {
    210 				return (set_errno(ENOMEM));
    211 			}
    212 			/*
    213 			 * Note that we don't disable automatic large page
    214 			 * selection for anon segments based on use of
    215 			 * memcntl().
    216 			 */
    217 			if (pgsz == 0) {
    218 				error = as_set_default_lpsize(as, addr, len);
    219 				if (error) {
    220 					(void) set_errno(error);
    221 				}
    222 				return (error);
    223 			}
    224 
    225 			/*
    226 			 * addr and len must be prefered page size aligned
    227 			 */
    228 			if (!IS_P2ALIGNED(addr, pgsz) ||
    229 			    !IS_P2ALIGNED(len, pgsz)) {
    230 				return (set_errno(EINVAL));
    231 			}
    232 		}
    233 
    234 		szc = mem_getpgszc(pgsz);
    235 		if (szc == (uint_t)-1) {
    236 			if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK))
    237 			    != 0) {
    238 				as_rangeunlock(as);
    239 			}
    240 			return (set_errno(EINVAL));
    241 		}
    242 
    243 		/*
    244 		 * For stack and heap operations we first need to pad
    245 		 * out existing range (create new mappings) to the new
    246 		 * prefered page size boundary. Also the start of the
    247 		 * .bss for the heap or user's stack base may not be on
    248 		 * the new prefered page size boundary. For these cases
    249 		 * we align the base of the request on the new prefered
    250 		 * page size.
    251 		 */
    252 		if (pgcmd & MHA_MAPSIZE_BSSBRK) {
    253 			if (szc == p->p_brkpageszc) {
    254 				as_rangeunlock(as);
    255 				return (0);
    256 			}
    257 			if (szc > p->p_brkpageszc) {
    258 				error = brk_internal(p->p_brkbase
    259 				    + p->p_brksize, szc);
    260 				if (error) {
    261 					as_rangeunlock(as);
    262 					return (set_errno(error));
    263 				}
    264 			}
    265 			/*
    266 			 * It is possible for brk_internal to silently fail to
    267 			 * promote the heap size, so don't panic or ASSERT.
    268 			 */
    269 			if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) {
    270 				as_rangeunlock(as);
    271 				return (set_errno(ENOMEM));
    272 			}
    273 			oszc = p->p_brkpageszc;
    274 			p->p_brkpageszc = szc;
    275 
    276 			addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
    277 			    pgsz);
    278 			len = (p->p_brkbase + p->p_brksize) - addr;
    279 			ASSERT(IS_P2ALIGNED(len, pgsz));
    280 			/*
    281 			 * Perhaps no existing pages to promote.
    282 			 */
    283 			if (len == 0) {
    284 				as_rangeunlock(as);
    285 				return (0);
    286 			}
    287 		}
    288 		/*
    289 		 * The code below, as does grow.c, assumes stacks always grow
    290 		 * downward.
    291 		 */
    292 		if (pgcmd & MHA_MAPSIZE_STACK) {
    293 			if (szc == p->p_stkpageszc) {
    294 				as_rangeunlock(as);
    295 				return (0);
    296 			}
    297 
    298 			if (szc > p->p_stkpageszc) {
    299 				error = grow_internal(p->p_usrstack -
    300 				    p->p_stksize, szc);
    301 				if (error) {
    302 					as_rangeunlock(as);
    303 					return (set_errno(error));
    304 				}
    305 			}
    306 			/*
    307 			 * It is possible for grow_internal to silently fail to
    308 			 * promote the stack size, so don't panic or ASSERT.
    309 			 */
    310 			if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) {
    311 				as_rangeunlock(as);
    312 				return (set_errno(ENOMEM));
    313 			}
    314 			oszc = p->p_stkpageszc;
    315 			p->p_stkpageszc = szc;
    316 
    317 			addr = p->p_usrstack - p->p_stksize;
    318 			len = P2ALIGN(p->p_stksize, pgsz);
    319 
    320 			/*
    321 			 * Perhaps nothing to promote.
    322 			 */
    323 			if (len == 0 || addr >= p->p_usrstack ||
    324 			    (addr + len) < addr) {
    325 				as_rangeunlock(as);
    326 				return (0);
    327 			}
    328 		}
    329 		ASSERT(IS_P2ALIGNED(addr, pgsz));
    330 		ASSERT(IS_P2ALIGNED(len, pgsz));
    331 		error = as_setpagesize(as, addr, len, szc, B_TRUE);
    332 
    333 		/*
    334 		 * On stack or heap failures restore original
    335 		 * pg size code.
    336 		 */
    337 		if (error) {
    338 			if ((pgcmd & MHA_MAPSIZE_BSSBRK) != 0) {
    339 				p->p_brkpageszc = oszc;
    340 			}
    341 			if ((pgcmd & MHA_MAPSIZE_STACK) != 0) {
    342 				p->p_stkpageszc = oszc;
    343 			}
    344 			(void) set_errno(error);
    345 		}
    346 		if ((pgcmd & (MHA_MAPSIZE_BSSBRK|MHA_MAPSIZE_STACK)) != 0) {
    347 			as_rangeunlock(as);
    348 		}
    349 		return (error);
    350 	case MC_ADVISE:
    351 		if ((uintptr_t)arg == MADV_FREE) {
    352 			len &= PAGEMASK;
    353 		}
    354 		switch ((uintptr_t)arg) {
    355 		case MADV_WILLNEED:
    356 			fc = as_faulta(as, addr, len);
    357 			if (fc) {
    358 				if (FC_CODE(fc) == FC_OBJERR)
    359 					error = set_errno(FC_ERRNO(fc));
    360 				else if (FC_CODE(fc) == FC_NOMAP)
    361 					error = set_errno(ENOMEM);
    362 				else
    363 					error = set_errno(EINVAL);
    364 				return (error);
    365 			}
    366 			break;
    367 
    368 		case MADV_DONTNEED:
    369 			/*
    370 			 * For now, don't need is turned into an as_ctl(MC_SYNC)
    371 			 * operation flagged for async invalidate.
    372 			 */
    373 			error = as_ctl(as, addr, len, MC_SYNC, attr,
    374 			    MS_ASYNC | MS_INVALIDATE, NULL, 0);
    375 			if (error)
    376 				(void) set_errno(error);
    377 			return (error);
    378 
    379 		default:
    380 			error = as_ctl(as, addr, len, cmd, attr,
    381 			    (uintptr_t)arg, NULL, 0);
    382 			if (error)
    383 				(void) set_errno(error);
    384 			return (error);
    385 		}
    386 		break;
    387 	default:
    388 		return (set_errno(EINVAL));
    389 	}
    390 
    391 	error = as_ctl(as, addr, len, cmd, attr, (uintptr_t)arg, NULL, 0);
    392 
    393 	if (error)
    394 		(void) set_errno(error);
    395 	return (error);
    396 }
    397 
    398 /*
    399  * Return page size code for page size passed in. If
    400  * matching page size not found or supported, return -1.
    401  */
    402 static uint_t
    403 mem_getpgszc(size_t pgsz) {
    404 	return ((uint_t)page_szc_user_filtered(pgsz));
    405 }
    406